gcc/ada/scng.adb

   1 ------------------------------------------------------------------------------
   2 --                                                                          --
   3 --                         GNAT COMPILER COMPONENTS                         --
   4 --                                                                          --
   5 --                                 S C N G                                  --
   6 --                                                                          --
   7 --                                 B o d y                                  --
   8 --                                                                          --
   9 --          Copyright (C) 1992-2010, Free Software Foundation, Inc.         --
  10 --                                                                          --
  11 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
  12 -- terms of the  GNU General Public License as published  by the Free Soft- --
  13 -- ware  Foundation;  either version 3,  or (at your option) any later ver- --
  14 -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
  15 -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
  16 -- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
  17 -- for  more details.  You should have  received  a copy of the GNU General --
  18 -- Public License  distributed with GNAT; see file COPYING3.  If not, go to --
  19 -- http://www.gnu.org/licenses for a complete copy of the license.          --
  20 --                                                                          --
  21 -- GNAT was originally developed  by the GNAT team at  New York University. --
  22 -- Extensive contributions were provided by Ada Core Technologies Inc.      --
  23 --                                                                          --
  24 ------------------------------------------------------------------------------
  25
  26 with Csets;    use Csets;
  27 with Err_Vars; use Err_Vars;
  28 with Hostparm; use Hostparm;
  29 with Namet;    use Namet;
  30 with Opt;      use Opt;
  31 with Scans;    use Scans;
  32 with Sinput;   use Sinput;
  33 with Snames;   use Snames;
  34 with Stringt;  use Stringt;
  35 with Stylesw;  use Stylesw;
  36 with Uintp;    use Uintp;
  37 with Urealp;   use Urealp;
  38 with Widechar; use Widechar;
  39
  40 pragma Warnings (Off);
  41 --  This package is used also by gnatcoll
  42 with System.CRC32;
  43 with System.UTF_32;  use System.UTF_32;
  44 with System.WCh_Con; use System.WCh_Con;
  45 pragma Warnings (On);
  46
  47 package body Scng is
  48
  49    use ASCII;
  50    --  Make control characters visible
  51
  52    Special_Characters : array (Character) of Boolean := (others => False);
  53    --  For characters that are Special token, the value is True
  54
  55    Comment_Is_Token : Boolean := False;
  56    --  True if comments are tokens
  57
  58    End_Of_Line_Is_Token : Boolean := False;
  59    --  True if End_Of_Line is a token
  60
  61    -----------------------
  62    -- Local Subprograms --
  63    -----------------------
  64
  65    procedure Accumulate_Token_Checksum;
  66    pragma Inline (Accumulate_Token_Checksum);
  67
  68    procedure Accumulate_Checksum (C : Character);
  69    pragma Inline (Accumulate_Checksum);
  70    --  This routine accumulates the checksum given character C. During the
  71    --  scanning of a source file, this routine is called with every character
  72    --  in the source, excluding blanks, and all control characters (except
  73    --  that ESC is included in the checksum). Upper case letters not in string
  74    --  literals are folded by the caller. See Sinput spec for the documentation
  75    --  of the checksum algorithm. Note: checksum values are only used if we
  76    --  generate code, so it is not necessary to worry about making the right
  77    --  sequence of calls in any error situation.
  78
  79    procedure Accumulate_Checksum (C : Char_Code);
  80    pragma Inline (Accumulate_Checksum);
  81    --  This version is identical, except that the argument, C, is a character
  82    --  code value instead of a character. This is used when wide characters
  83    --  are scanned. We use the character code rather than the ASCII characters
  84    --  so that the checksum is independent of wide character encoding method.
  85
  86    procedure Initialize_Checksum;
  87    pragma Inline (Initialize_Checksum);
  88    --  Initialize checksum value
  89
  90    -------------------------
  91    -- Accumulate_Checksum --
  92    -------------------------
  93
  94    procedure Accumulate_Checksum (C : Character) is
  95    begin
  96       System.CRC32.Update (System.CRC32.CRC32 (Checksum), C);
  97    end Accumulate_Checksum;
  98
  99    procedure Accumulate_Checksum (C : Char_Code) is
 100    begin
 101       if C > 16#FFFF# then
 102          Accumulate_Checksum (Character'Val (C / 2 ** 24));
 103          Accumulate_Checksum (Character'Val ((C / 2 ** 16) mod 256));
 104          Accumulate_Checksum (Character'Val ((C / 256) mod 256));
 105       else
 106          Accumulate_Checksum (Character'Val (C / 256));
 107       end if;
 108
 109       Accumulate_Checksum (Character'Val (C mod 256));
 110    end Accumulate_Checksum;
 111
 112    -------------------------------
 113    -- Accumulate_Token_Checksum --
 114    -------------------------------
 115
 116    procedure Accumulate_Token_Checksum is
 117    begin
 118       System.CRC32.Update
 119         (System.CRC32.CRC32 (Checksum),
 120          Character'Val (Token_Type'Pos (Token)));
 121    end Accumulate_Token_Checksum;
 122
 123    ----------------------------
 124    -- Determine_Token_Casing --
 125    ----------------------------
 126
 127    function Determine_Token_Casing return Casing_Type is
 128    begin
 129       return Determine_Casing (Source (Token_Ptr .. Scan_Ptr - 1));
 130    end Determine_Token_Casing;
 131
 132    -------------------------
 133    -- Initialize_Checksum --
 134    -------------------------
 135
 136    procedure Initialize_Checksum is
 137    begin
 138       System.CRC32.Initialize (System.CRC32.CRC32 (Checksum));
 139    end Initialize_Checksum;
 140
 141    ------------------------
 142    -- Initialize_Scanner --
 143    ------------------------
 144
 145    procedure Initialize_Scanner (Index : Source_File_Index) is
 146    begin
 147       --  Establish reserved words
 148
 149       Scans.Initialize_Ada_Keywords;
 150
 151       --  Initialize scan control variables
 152
 153       Current_Source_File       := Index;
 154       Source                    := Source_Text (Current_Source_File);
 155       Scan_Ptr                  := Source_First (Current_Source_File);
 156       Token                     := No_Token;
 157       Token_Ptr                 := Scan_Ptr;
 158       Current_Line_Start        := Scan_Ptr;
 159       Token_Node                := Empty;
 160       Token_Name                := No_Name;
 161       Start_Column              := Set_Start_Column;
 162       First_Non_Blank_Location  := Scan_Ptr;
 163
 164       Initialize_Checksum;
 165       Wide_Char_Byte_Count := 0;
 166
 167       --  Do not call Scan, otherwise the License stuff does not work in Scn
 168
 169    end Initialize_Scanner;
 170
 171    ------------------------------
 172    -- Reset_Special_Characters --
 173    ------------------------------
 174
 175    procedure Reset_Special_Characters is
 176    begin
 177       Special_Characters := (others => False);
 178    end Reset_Special_Characters;
 179
 180    ----------
 181    -- Scan --
 182    ----------
 183
 184    procedure Scan is
 185
 186       Start_Of_Comment : Source_Ptr;
 187       --  Record start of comment position
 188
 189       Underline_Found : Boolean;
 190       --  During scanning of an identifier, set to True if last character
 191       --  scanned was an underline or other punctuation character. This
 192       --  is used to flag the error of two underlines/punctuations in a
 193       --  row or ending an identifier with a underline/punctuation. Here
 194       --  punctuation means any UTF_32 character in the Unicode category
 195       --  Punctuation,Connector.
 196
 197       Wptr : Source_Ptr;
 198       --  Used to remember start of last wide character scanned
 199
 200       procedure Check_End_Of_Line;
 201       --  Called when end of line encountered. Checks that line is not too
 202       --  long, and that other style checks for the end of line are met.
 203
 204       function Double_Char_Token (C : Character) return Boolean;
 205       --  This function is used for double character tokens like := or <>. It
 206       --  checks if the character following Source (Scan_Ptr) is C, and if so
 207       --  bumps Scan_Ptr past the pair of characters and returns True. A space
 208       --  between the two characters is also recognized with an appropriate
 209       --  error message being issued. If C is not present, False is returned.
 210       --  Note that Double_Char_Token can only be used for tokens defined in
 211       --  the Ada syntax (it's use for error cases like && is not appropriate
 212       --  since we do not want a junk message for a case like &-space-&).
 213
 214       procedure Error_Illegal_Character;
 215       --  Give illegal character error, Scan_Ptr points to character. On
 216       --  return, Scan_Ptr is bumped past the illegal character.
 217
 218       procedure Error_Illegal_Wide_Character;
 219       --  Give illegal wide character message. On return, Scan_Ptr is bumped
 220       --  past the illegal character, which may still leave us pointing to
 221       --  junk, not much we can do if the escape sequence is messed up!
 222
 223       procedure Error_Long_Line;
 224       --  Signal error of excessively long line
 225
 226       procedure Error_No_Double_Underline;
 227       --  Signal error of two underline or punctuation characters in a row.
 228       --  Called with Scan_Ptr pointing to second underline/punctuation char.
 229
 230       procedure Nlit;
 231       --  This is the procedure for scanning out numeric literals. On entry,
 232       --  Scan_Ptr points to the digit that starts the numeric literal (the
 233       --  checksum for this character has not been accumulated yet). On return
 234       --  Scan_Ptr points past the last character of the numeric literal, Token
 235       --  and Token_Node are set appropriately, and the checksum is updated.
 236
 237       procedure Slit;
 238       --  This is the procedure for scanning out string literals. On entry,
 239       --  Scan_Ptr points to the opening string quote (the checksum for this
 240       --  character has not been accumulated yet). On return Scan_Ptr points
 241       --  past the closing quote of the string literal, Token and Token_Node
 242       --  are set appropriately, and the checksum is updated.
 243
 244       procedure Skip_Other_Format_Characters;
 245       --  Skips past any "other format" category characters at the current
 246       --  cursor location (does not skip past spaces or any other characters).
 247
 248       function Start_Of_Wide_Character return Boolean;
 249       --  Returns True if the scan pointer is pointing to the start of a wide
 250       --  character sequence, does not modify the scan pointer in any case.
 251
 252       -----------------------
 253       -- Check_End_Of_Line --
 254       -----------------------
 255
 256       procedure Check_End_Of_Line is
 257          Len : constant Int :=
 258                  Int (Scan_Ptr) -
 259                  Int (Current_Line_Start) -
 260                  Wide_Char_Byte_Count;
 261
 262       begin
 263          if Style_Check then
 264             Style.Check_Line_Terminator (Len);
 265          end if;
 266
 267          --  Deal with checking maximum line length
 268
 269          if Style_Check and Style_Check_Max_Line_Length then
 270             Style.Check_Line_Max_Length (Len);
 271
 272          --  If style checking is inactive, check maximum line length against
 273          --  standard value.
 274
 275          elsif Len > Max_Line_Length then
 276             Error_Long_Line;
 277          end if;
 278
 279          --  Now one more checking circuit. Normally we are only enforcing a
 280          --  limit of physical characters, with tabs counting as one character.
 281          --  But if after tab expansion we would have a total line length that
 282          --  exceeded 32766, that would really cause trouble, because column
 283          --  positions would exceed the maximum we allow for a column count.
 284          --  Note: the limit is 32766 rather than 32767, since we use a value
 285          --  of 32767 for special purposes (see Sinput). Now we really do not
 286          --  want to go messing with tabs in the normal case, so what we do is
 287          --  to check for a line that has more than 4096 physical characters.
 288          --  Any shorter line could not be a problem, even if it was all tabs.
 289
 290          if Len >= 4096 then
 291             declare
 292                Col : Natural;
 293                Ptr : Source_Ptr;
 294
 295             begin
 296                Col := 1;
 297                Ptr := Current_Line_Start;
 298                loop
 299                   exit when Ptr = Scan_Ptr;
 300
 301                   if Source (Ptr) = ASCII.HT then
 302                      Col := (Col - 1 + 8) / 8 * 8 + 1;
 303                   else
 304                      Col := Col + 1;
 305                   end if;
 306
 307                   if Col > 32766 then
 308                      Error_Msg
 309                        ("this line is longer than 32766 characters",
 310                         Current_Line_Start);
 311                      raise Unrecoverable_Error;
 312                   end if;
 313
 314                   Ptr := Ptr + 1;
 315                end loop;
 316             end;
 317          end if;
 318
 319          --  Reset wide character byte count for next line
 320
 321          Wide_Char_Byte_Count := 0;
 322       end Check_End_Of_Line;
 323
 324       -----------------------
 325       -- Double_Char_Token --
 326       -----------------------
 327
 328       function Double_Char_Token (C : Character) return Boolean is
 329       begin
 330          if Source (Scan_Ptr + 1) = C then
 331             Accumulate_Checksum (C);
 332             Scan_Ptr := Scan_Ptr + 2;
 333             return True;
 334
 335          elsif Source (Scan_Ptr + 1) = ' '
 336            and then Source (Scan_Ptr + 2) = C
 337          then
 338             Scan_Ptr := Scan_Ptr + 1;
 339             Error_Msg_S -- CODEFIX
 340               ("no space allowed here");
 341             Scan_Ptr := Scan_Ptr + 2;
 342             return True;
 343
 344          else
 345             return False;
 346          end if;
 347       end Double_Char_Token;
 348
 349       -----------------------------
 350       -- Error_Illegal_Character --
 351       -----------------------------
 352
 353       procedure Error_Illegal_Character is
 354       begin
 355          Error_Msg_S ("illegal character");
 356          Scan_Ptr := Scan_Ptr + 1;
 357       end Error_Illegal_Character;
 358
 359       ----------------------------------
 360       -- Error_Illegal_Wide_Character --
 361       ----------------------------------
 362
 363       procedure Error_Illegal_Wide_Character is
 364       begin
 365          Scan_Ptr := Scan_Ptr + 1;
 366          Error_Msg ("illegal wide character", Wptr);
 367       end Error_Illegal_Wide_Character;
 368
 369       ---------------------
 370       -- Error_Long_Line --
 371       ---------------------
 372
 373       procedure Error_Long_Line is
 374       begin
 375          Error_Msg
 376            ("this line is too long",
 377             Current_Line_Start + Source_Ptr (Max_Line_Length));
 378       end Error_Long_Line;
 379
 380       -------------------------------
 381       -- Error_No_Double_Underline --
 382       -------------------------------
 383
 384       procedure Error_No_Double_Underline is
 385       begin
 386          Underline_Found := False;
 387
 388          --  There are four cases, and we special case the messages
 389
 390          if Source (Scan_Ptr) = '_' then
 391             if Source (Scan_Ptr - 1) = '_' then
 392                Error_Msg_S -- CODEFIX
 393                  ("two consecutive underlines not permitted");
 394             else
 395                Error_Msg_S ("underline cannot follow punctuation character");
 396             end if;
 397
 398          else
 399             if Source (Scan_Ptr - 1) = '_' then
 400                Error_Msg_S ("punctuation character cannot follow underline");
 401             else
 402                Error_Msg_S
 403                  ("two consecutive punctuation characters not permitted");
 404             end if;
 405          end if;
 406       end Error_No_Double_Underline;
 407
 408       ----------
 409       -- Nlit --
 410       ----------
 411
 412       procedure Nlit is
 413
 414          C : Character;
 415          --  Current source program character
 416
 417          Base_Char : Character;
 418          --  Either # or : (character at start of based number)
 419
 420          Base : Int;
 421          --  Value of base
 422
 423          UI_Base : Uint;
 424          --  Value of base in Uint format
 425
 426          UI_Int_Value : Uint;
 427          --  Value of integer scanned by Scan_Integer in Uint format
 428
 429          UI_Num_Value : Uint;
 430          --  Value of integer in numeric value being scanned
 431
 432          Scale : Int;
 433          --  Scale value for real literal
 434
 435          UI_Scale : Uint;
 436          --  Scale in Uint format
 437
 438          Exponent_Is_Negative : Boolean;
 439          --  Set true for negative exponent
 440
 441          Extended_Digit_Value : Int;
 442          --  Extended digit value
 443
 444          Point_Scanned : Boolean;
 445          --  Flag for decimal point scanned in numeric literal
 446
 447          -----------------------
 448          -- Local Subprograms --
 449          -----------------------
 450
 451          procedure Error_Digit_Expected;
 452          --  Signal error of bad digit, Scan_Ptr points to the location at
 453          --  which the digit was expected on input, and is unchanged on return.
 454
 455          procedure Scan_Integer;
 456          --  Procedure to scan integer literal. On entry, Scan_Ptr points to a
 457          --  digit, on exit Scan_Ptr points past the last character of the
 458          --  integer.
 459          --
 460          --  For each digit encountered, UI_Int_Value is multiplied by 10, and
 461          --  the value of the digit added to the result. In addition, the
 462          --  value in Scale is decremented by one for each actual digit
 463          --  scanned.
 464
 465          --------------------------
 466          -- Error_Digit_Expected --
 467          --------------------------
 468
 469          procedure Error_Digit_Expected is
 470          begin
 471             Error_Msg_S ("digit expected");
 472          end Error_Digit_Expected;
 473
 474          ------------------
 475          -- Scan_Integer --
 476          ------------------
 477
 478          procedure Scan_Integer is
 479             C : Character;
 480             --  Next character scanned
 481
 482          begin
 483             C := Source (Scan_Ptr);
 484
 485             --  Loop through digits (allowing underlines)
 486
 487             loop
 488                Accumulate_Checksum (C);
 489                UI_Int_Value :=
 490                  UI_Int_Value * 10 + (Character'Pos (C) - Character'Pos ('0'));
 491                Scan_Ptr := Scan_Ptr + 1;
 492                Scale := Scale - 1;
 493                C := Source (Scan_Ptr);
 494
 495                --  Case of underline encountered
 496
 497                if C = '_' then
 498
 499                   --  We do not accumulate the '_' in the checksum, so that
 500                   --  1_234 is equivalent to 1234, and does not trigger
 501                   --  compilation for "minimal recompilation" (gnatmake -m).
 502
 503                   loop
 504                      Scan_Ptr := Scan_Ptr + 1;
 505                      C := Source (Scan_Ptr);
 506                      exit when C /= '_';
 507                      Error_No_Double_Underline;
 508                   end loop;
 509
 510                   if C not in '0' .. '9' then
 511                      Error_Digit_Expected;
 512                      exit;
 513                   end if;
 514
 515                else
 516                   exit when C not in '0' .. '9';
 517                end if;
 518             end loop;
 519          end Scan_Integer;
 520
 521       --  Start of Processing for Nlit
 522
 523       begin
 524          Base := 10;
 525          UI_Base := Uint_10;
 526          UI_Int_Value := Uint_0;
 527          Based_Literal_Uses_Colon := False;
 528          Scale := 0;
 529          Scan_Integer;
 530          Point_Scanned := False;
 531          UI_Num_Value := UI_Int_Value;
 532
 533          --  Various possibilities now for continuing the literal are period,
 534          --  E/e (for exponent), or :/# (for based literal).
 535
 536          Scale := 0;
 537          C := Source (Scan_Ptr);
 538
 539          if C = '.' then
 540
 541             --  Scan out point, but do not scan past .. which is a range
 542             --  sequence, and must not be eaten up scanning a numeric literal.
 543
 544             while C = '.' and then Source (Scan_Ptr + 1) /= '.' loop
 545                Accumulate_Checksum ('.');
 546
 547                if Point_Scanned then
 548                   Error_Msg_S ("duplicate point ignored");
 549                end if;
 550
 551                Point_Scanned := True;
 552                Scan_Ptr := Scan_Ptr + 1;
 553                C := Source (Scan_Ptr);
 554
 555                if C not in '0' .. '9' then
 556                   Error_Msg
 557                     ("real literal cannot end with point", Scan_Ptr - 1);
 558                else
 559                   Scan_Integer;
 560                   UI_Num_Value := UI_Int_Value;
 561                end if;
 562             end loop;
 563
 564          --  Based literal case. The base is the value we already scanned.
 565          --  In the case of colon, we insist that the following character
 566          --  is indeed an extended digit or a period. This catches a number
 567          --  of common errors, as well as catching the well known tricky
 568          --  bug otherwise arising from "x : integer range 1 .. 10:= 6;"
 569
 570          elsif C = '#'
 571            or else (C = ':' and then
 572                       (Source (Scan_Ptr + 1) = '.'
 573                          or else
 574                        Source (Scan_Ptr + 1) in '0' .. '9'
 575                          or else
 576                        Source (Scan_Ptr + 1) in 'A' .. 'Z'
 577                          or else
 578                        Source (Scan_Ptr + 1) in 'a' .. 'z'))
 579          then
 580             Accumulate_Checksum (C);
 581             Base_Char := C;
 582             UI_Base := UI_Int_Value;
 583
 584             if Base_Char = ':' then
 585                Based_Literal_Uses_Colon := True;
 586             end if;
 587
 588             if UI_Base < 2 or else UI_Base > 16 then
 589                Error_Msg_SC ("base not 2-16");
 590                UI_Base := Uint_16;
 591             end if;
 592
 593             Base := UI_To_Int (UI_Base);
 594             Scan_Ptr := Scan_Ptr + 1;
 595
 596             --  Scan out extended integer [. integer]
 597
 598             C := Source (Scan_Ptr);
 599             UI_Int_Value := Uint_0;
 600             Scale := 0;
 601
 602             loop
 603                if C in '0' .. '9' then
 604                   Accumulate_Checksum (C);
 605                   Extended_Digit_Value :=
 606                     Int'(Character'Pos (C)) - Int'(Character'Pos ('0'));
 607
 608                elsif C in 'A' .. 'F' then
 609                   Accumulate_Checksum (Character'Val (Character'Pos (C) + 32));
 610                   Extended_Digit_Value :=
 611                     Int'(Character'Pos (C)) - Int'(Character'Pos ('A')) + 10;
 612
 613                elsif C in 'a' .. 'f' then
 614                   Accumulate_Checksum (C);
 615                   Extended_Digit_Value :=
 616                     Int'(Character'Pos (C)) - Int'(Character'Pos ('a')) + 10;
 617
 618                else
 619                   Error_Msg_S ("extended digit expected");
 620                   exit;
 621                end if;
 622
 623                if Extended_Digit_Value >= Base then
 624                   Error_Msg_S ("digit '>= base");
 625                end if;
 626
 627                UI_Int_Value := UI_Int_Value * UI_Base + Extended_Digit_Value;
 628                Scale := Scale - 1;
 629                Scan_Ptr := Scan_Ptr + 1;
 630                C := Source (Scan_Ptr);
 631
 632                if C = '_' then
 633                   loop
 634                      Accumulate_Checksum ('_');
 635                      Scan_Ptr := Scan_Ptr + 1;
 636                      C := Source (Scan_Ptr);
 637                      exit when C /= '_';
 638                      Error_No_Double_Underline;
 639                   end loop;
 640
 641                elsif C = '.' then
 642                   Accumulate_Checksum ('.');
 643
 644                   if Point_Scanned then
 645                      Error_Msg_S ("duplicate point ignored");
 646                   end if;
 647
 648                   Scan_Ptr := Scan_Ptr + 1;
 649                   C := Source (Scan_Ptr);
 650                   Point_Scanned := True;
 651                   Scale := 0;
 652
 653                elsif C = Base_Char then
 654                   Accumulate_Checksum (C);
 655                   Scan_Ptr := Scan_Ptr + 1;
 656                   exit;
 657
 658                elsif C = '#' or else C = ':' then
 659                   Error_Msg_S ("based number delimiters must match");
 660                   Scan_Ptr := Scan_Ptr + 1;
 661                   exit;
 662
 663                elsif not Identifier_Char (C) then
 664                   if Base_Char = '#' then
 665                      Error_Msg_S -- CODEFIX
 666                        ("missing '#");
 667                   else
 668                      Error_Msg_S -- CODEFIX
 669                        ("missing ':");
 670                   end if;
 671
 672                   exit;
 673                end if;
 674
 675             end loop;
 676
 677             UI_Num_Value := UI_Int_Value;
 678          end if;
 679
 680          --  Scan out exponent
 681
 682          if not Point_Scanned then
 683             Scale := 0;
 684             UI_Scale := Uint_0;
 685          else
 686             UI_Scale := UI_From_Int (Scale);
 687          end if;
 688
 689          if Source (Scan_Ptr) = 'e' or else Source (Scan_Ptr) = 'E' then
 690             Accumulate_Checksum ('e');
 691             Scan_Ptr := Scan_Ptr + 1;
 692             Exponent_Is_Negative := False;
 693
 694             if Source (Scan_Ptr) = '+' then
 695                Accumulate_Checksum ('+');
 696                Scan_Ptr := Scan_Ptr + 1;
 697
 698             elsif Source (Scan_Ptr) = '-' then
 699                Accumulate_Checksum ('-');
 700
 701                if not Point_Scanned then
 702                   Error_Msg_S
 703                     ("negative exponent not allowed for integer literal");
 704                else
 705                   Exponent_Is_Negative := True;
 706                end if;
 707
 708                Scan_Ptr := Scan_Ptr + 1;
 709             end if;
 710
 711             UI_Int_Value := Uint_0;
 712
 713             if Source (Scan_Ptr) in '0' .. '9' then
 714                Scan_Integer;
 715             else
 716                Error_Digit_Expected;
 717             end if;
 718
 719             if Exponent_Is_Negative then
 720                UI_Scale := UI_Scale - UI_Int_Value;
 721             else
 722                UI_Scale := UI_Scale + UI_Int_Value;
 723             end if;
 724          end if;
 725
 726          --  Case of real literal to be returned
 727
 728          if Point_Scanned then
 729             Token := Tok_Real_Literal;
 730             Real_Literal_Value :=
 731               UR_From_Components (
 732                                   Num   => UI_Num_Value,
 733                                   Den   => -UI_Scale,
 734                                   Rbase => Base);
 735
 736          --  Case of integer literal to be returned
 737
 738          else
 739             Token := Tok_Integer_Literal;
 740
 741             if UI_Scale = 0 then
 742                Int_Literal_Value := UI_Num_Value;
 743
 744             --  Avoid doing possibly expensive calculations in cases like
 745             --  parsing 163E800_000# when semantics will not be done anyway.
 746             --  This is especially useful when parsing garbled input.
 747
 748             elsif Operating_Mode /= Check_Syntax
 749               and then (Serious_Errors_Detected = 0 or else Try_Semantics)
 750             then
 751                Int_Literal_Value := UI_Num_Value * UI_Base ** UI_Scale;
 752
 753             else
 754                Int_Literal_Value := No_Uint;
 755             end if;
 756          end if;
 757
 758          Accumulate_Token_Checksum;
 759          return;
 760       end Nlit;
 761
 762       ----------
 763       -- Slit --
 764       ----------
 765
 766       procedure Slit is
 767
 768          Delimiter : Character;
 769          --  Delimiter (first character of string)
 770
 771          C : Character;
 772          --  Current source program character
 773
 774          Code : Char_Code;
 775          --  Current character code value
 776
 777          Err : Boolean;
 778          --  Error flag for Scan_Wide call
 779
 780          procedure Error_Bad_String_Char;
 781          --  Signal bad character in string/character literal. On entry
 782          --  Scan_Ptr points to the improper character encountered during the
 783          --  scan. Scan_Ptr is not modified, so it still points to the bad
 784          --  character on return.
 785
 786          procedure Error_Unterminated_String;
 787          --  Procedure called if a line terminator character is encountered
 788          --  during scanning a string, meaning that the string is not properly
 789          --  terminated.
 790
 791          procedure Set_String;
 792          --  Procedure used to distinguish between string and operator symbol.
 793          --  On entry the string has been scanned out, and its characters start
 794          --  at Token_Ptr and end one character before Scan_Ptr. On exit Token
 795          --  is set to Tok_String_Literal/Tok_Operator_Symbol as appropriate,
 796          --  and Token_Node is appropriately initialized. In addition, in the
 797          --  operator symbol case, Token_Name is appropriately set, and the
 798          --  flags [Wide_]Wide_Character_Found are set appropriately.
 799
 800          ---------------------------
 801          -- Error_Bad_String_Char --
 802          ---------------------------
 803
 804          procedure Error_Bad_String_Char is
 805             C : constant Character := Source (Scan_Ptr);
 806
 807          begin
 808             if C = HT then
 809                Error_Msg_S ("horizontal tab not allowed in string");
 810
 811             elsif C = VT or else C = FF then
 812                Error_Msg_S ("format effector not allowed in string");
 813
 814             elsif C in Upper_Half_Character then
 815                Error_Msg_S ("(Ada 83) upper half character not allowed");
 816
 817             else
 818                Error_Msg_S ("control character not allowed in string");
 819             end if;
 820          end Error_Bad_String_Char;
 821
 822          -------------------------------
 823          -- Error_Unterminated_String --
 824          -------------------------------
 825
 826          procedure Error_Unterminated_String is
 827          begin
 828             --  An interesting little refinement. Consider the following
 829             --  examples:
 830
 831             --     A := "this is an unterminated string;
 832             --     A := "this is an unterminated string &
 833             --     P(A, "this is a parameter that didn't get terminated);
 834
 835             --  We fiddle a little to do slightly better placement in these
 836             --  cases also if there is white space at the end of the line we
 837             --  place the flag at the start of this white space, not at the
 838             --  end. Note that we only have to test for blanks, since tabs
 839             --  aren't allowed in strings in the first place and would have
 840             --  caused an error message.
 841
 842             --  Two more cases that we treat specially are:
 843
 844             --     A := "this string uses the wrong terminator'
 845             --     A := "this string uses the wrong terminator' &
 846
 847             --  In these cases we give a different error message as well
 848
 849             --  We actually reposition the scan pointer to the point where we
 850             --  place the flag in these cases, since it seems a better bet on
 851             --  the original intention.
 852
 853             while Source (Scan_Ptr - 1) = ' '
 854               or else Source (Scan_Ptr - 1) = '&'
 855             loop
 856                Scan_Ptr := Scan_Ptr - 1;
 857                Unstore_String_Char;
 858             end loop;
 859
 860             --  Check for case of incorrect string terminator, but single quote
 861             --  is not considered incorrect if the opening terminator misused
 862             --  a single quote (error message already given).
 863
 864             if Delimiter /= '''
 865               and then Source (Scan_Ptr - 1) = '''
 866             then
 867                Unstore_String_Char;
 868                Error_Msg
 869                  ("incorrect string terminator character", Scan_Ptr - 1);
 870                return;
 871             end if;
 872
 873             if Source (Scan_Ptr - 1) = ';' then
 874                Scan_Ptr := Scan_Ptr - 1;
 875                Unstore_String_Char;
 876
 877                if Source (Scan_Ptr - 1) = ')' then
 878                   Scan_Ptr := Scan_Ptr - 1;
 879                   Unstore_String_Char;
 880                end if;
 881             end if;
 882
 883             Error_Msg_S -- CODEFIX
 884               ("missing string quote");
 885          end Error_Unterminated_String;
 886
 887          ----------------
 888          -- Set_String --
 889          ----------------
 890
 891          procedure Set_String is
 892             Slen : constant Int := Int (Scan_Ptr - Token_Ptr - 2);
 893             C1   : Character;
 894             C2   : Character;
 895             C3   : Character;
 896
 897          begin
 898             --  Token_Name is currently set to Error_Name. The following
 899             --  section of code resets Token_Name to the proper Name_Op_xx
 900             --  value if the string is a valid operator symbol, otherwise it is
 901             --  left set to Error_Name.
 902
 903             if Slen = 1 then
 904                C1 := Source (Token_Ptr + 1);
 905
 906                case C1 is
 907                   when '=' =>
 908                      Token_Name := Name_Op_Eq;
 909
 910                   when '>' =>
 911                      Token_Name := Name_Op_Gt;
 912
 913                   when '<' =>
 914                      Token_Name := Name_Op_Lt;
 915
 916                   when '+' =>
 917                      Token_Name := Name_Op_Add;
 918
 919                   when '-' =>
 920                      Token_Name := Name_Op_Subtract;
 921
 922                   when '&' =>
 923                      Token_Name := Name_Op_Concat;
 924
 925                   when '*' =>
 926                      Token_Name := Name_Op_Multiply;
 927
 928                   when '/' =>
 929                      Token_Name := Name_Op_Divide;
 930
 931                   when others =>
 932                      null;
 933                end case;
 934
 935             elsif Slen = 2 then
 936                C1 := Source (Token_Ptr + 1);
 937                C2 := Source (Token_Ptr + 2);
 938
 939                if C1 = '*' and then C2 = '*' then
 940                   Token_Name := Name_Op_Expon;
 941
 942                elsif C2 = '=' then
 943
 944                   if C1 = '/' then
 945                      Token_Name := Name_Op_Ne;
 946                   elsif C1 = '<' then
 947                      Token_Name := Name_Op_Le;
 948                   elsif C1 = '>' then
 949                      Token_Name := Name_Op_Ge;
 950                   end if;
 951
 952                elsif (C1 = 'O' or else C1 = 'o') and then    -- OR
 953                  (C2 = 'R' or else C2 = 'r')
 954                then
 955                   Token_Name := Name_Op_Or;
 956                end if;
 957
 958             elsif Slen = 3 then
 959                C1 := Source (Token_Ptr + 1);
 960                C2 := Source (Token_Ptr + 2);
 961                C3 := Source (Token_Ptr + 3);
 962
 963                if (C1 = 'A' or else C1 = 'a') and then       -- AND
 964                  (C2 = 'N' or else C2 = 'n') and then
 965                  (C3 = 'D' or else C3 = 'd')
 966                then
 967                   Token_Name := Name_Op_And;
 968
 969                elsif (C1 = 'A' or else C1 = 'a') and then    -- ABS
 970                  (C2 = 'B' or else C2 = 'b') and then
 971                  (C3 = 'S' or else C3 = 's')
 972                then
 973                   Token_Name := Name_Op_Abs;
 974
 975                elsif (C1 = 'M' or else C1 = 'm') and then    -- MOD
 976                  (C2 = 'O' or else C2 = 'o') and then
 977                  (C3 = 'D' or else C3 = 'd')
 978                then
 979                   Token_Name := Name_Op_Mod;
 980
 981                elsif (C1 = 'N' or else C1 = 'n') and then    -- NOT
 982                  (C2 = 'O' or else C2 = 'o') and then
 983                  (C3 = 'T' or else C3 = 't')
 984                then
 985                   Token_Name := Name_Op_Not;
 986
 987                elsif (C1 = 'R' or else C1 = 'r') and then    -- REM
 988                  (C2 = 'E' or else C2 = 'e') and then
 989                  (C3 = 'M' or else C3 = 'm')
 990                then
 991                   Token_Name := Name_Op_Rem;
 992
 993                elsif (C1 = 'X' or else C1 = 'x') and then    -- XOR
 994                  (C2 = 'O' or else C2 = 'o') and then
 995                  (C3 = 'R' or else C3 = 'r')
 996                then
 997                   Token_Name := Name_Op_Xor;
 998                end if;
 999
1000             end if;
1001
1002             --  If it is an operator symbol, then Token_Name is set. If it is
1003             --  some other string value, then Token_Name still contains
1004             --  Error_Name.
1005
1006             if Token_Name = Error_Name then
1007                Token := Tok_String_Literal;
1008
1009             else
1010                Token := Tok_Operator_Symbol;
1011             end if;
1012          end Set_String;
1013
1014       --  Start of processing for Slit
1015
1016       begin
1017          --  On entry, Scan_Ptr points to the opening character of the string
1018          --  which is either a percent, double quote, or apostrophe (single
1019          --  quote). The latter case is an error detected by the character
1020          --  literal circuit.
1021
1022          Delimiter := Source (Scan_Ptr);
1023          Accumulate_Checksum (Delimiter);
1024
1025          Start_String;
1026          Wide_Character_Found      := False;
1027          Wide_Wide_Character_Found := False;
1028          Scan_Ptr := Scan_Ptr + 1;
1029
1030          --  Loop to scan out characters of string literal
1031
1032          loop
1033             C := Source (Scan_Ptr);
1034
1035             if C = Delimiter then
1036                Accumulate_Checksum (C);
1037                Scan_Ptr := Scan_Ptr + 1;
1038                exit when Source (Scan_Ptr) /= Delimiter;
1039                Code := Get_Char_Code (C);
1040                Accumulate_Checksum (C);
1041                Scan_Ptr := Scan_Ptr + 1;
1042
1043             else
1044                if C = '"' and then Delimiter = '%' then
1045                   Error_Msg_S
1046                     ("quote not allowed in percent delimited string");
1047                   Code := Get_Char_Code (C);
1048                   Scan_Ptr := Scan_Ptr + 1;
1049
1050                elsif Start_Of_Wide_Character then
1051                   Wptr := Scan_Ptr;
1052                   Scan_Wide (Source, Scan_Ptr, Code, Err);
1053
1054                   if Err then
1055                      Error_Illegal_Wide_Character;
1056                      Code := Get_Char_Code (' ');
1057                   end if;
1058
1059                   Accumulate_Checksum (Code);
1060
1061                   --  In Ada 95 mode we allow any wide characters in a string
1062                   --  but in Ada 2005, the set of characters allowed has been
1063                   --  restricted to graphic characters.
1064
1065                   if Ada_Version >= Ada_2005
1066                     and then Is_UTF_32_Non_Graphic (UTF_32 (Code))
1067                   then
1068                      Error_Msg
1069                        ("(Ada 2005) non-graphic character not permitted " &
1070                         "in string literal", Wptr);
1071                   end if;
1072
1073                else
1074                   Accumulate_Checksum (C);
1075
1076                   if C not in Graphic_Character then
1077                      if C in Line_Terminator then
1078                         Error_Unterminated_String;
1079                         exit;
1080
1081                      elsif C in Upper_Half_Character then
1082                         if Ada_Version = Ada_83 then
1083                            Error_Bad_String_Char;
1084                         end if;
1085
1086                      else
1087                         Error_Bad_String_Char;
1088                      end if;
1089                   end if;
1090
1091                   Code := Get_Char_Code (C);
1092                   Scan_Ptr := Scan_Ptr + 1;
1093                end if;
1094             end if;
1095
1096             Store_String_Char (Code);
1097
1098             if not In_Character_Range (Code) then
1099                if In_Wide_Character_Range (Code) then
1100                   Wide_Character_Found := True;
1101                else
1102                   Wide_Wide_Character_Found := True;
1103                end if;
1104             end if;
1105          end loop;
1106
1107          String_Literal_Id := End_String;
1108          Set_String;
1109          return;
1110       end Slit;
1111
1112       ----------------------------------
1113       -- Skip_Other_Format_Characters --
1114       ----------------------------------
1115
1116       procedure Skip_Other_Format_Characters is
1117          P    : Source_Ptr;
1118          Code : Char_Code;
1119          Err  : Boolean;
1120
1121       begin
1122          while Start_Of_Wide_Character loop
1123             P := Scan_Ptr;
1124             Scan_Wide (Source, Scan_Ptr, Code, Err);
1125
1126             if not Is_UTF_32_Other (UTF_32 (Code)) then
1127                Scan_Ptr := P;
1128                return;
1129             end if;
1130          end loop;
1131       end Skip_Other_Format_Characters;
1132
1133       -----------------------------
1134       -- Start_Of_Wide_Character --
1135       -----------------------------
1136
1137       function Start_Of_Wide_Character return Boolean is
1138          C : constant Character := Source (Scan_Ptr);
1139
1140       begin
1141          --  ESC encoding method with ESC present
1142
1143          if C = ESC
1144            and then Wide_Character_Encoding_Method in WC_ESC_Encoding_Method
1145          then
1146             return True;
1147
1148          --  Upper half character with upper half encoding
1149
1150          elsif C in Upper_Half_Character and then Upper_Half_Encoding then
1151             return True;
1152
1153          --  Brackets encoding
1154
1155          elsif C = '['
1156            and then Source (Scan_Ptr + 1) = '"'
1157            and then Identifier_Char (Source (Scan_Ptr + 2))
1158          then
1159             return True;
1160
1161          --  Not the start of a wide character
1162
1163          else
1164             return False;
1165          end if;
1166       end Start_Of_Wide_Character;
1167
1168    --  Start of processing for Scan
1169
1170    begin
1171       Prev_Token := Token;
1172       Prev_Token_Ptr := Token_Ptr;
1173       Token_Name := Error_Name;
1174
1175       --  The following loop runs more than once only if a format effector
1176       --  (tab, vertical tab, form  feed, line feed, carriage return) is
1177       --  encountered and skipped, or some error situation, such as an
1178       --  illegal character, is encountered.
1179
1180       <<Scan_Next_Character>>
1181
1182       loop
1183          --  Skip past blanks, loop is opened up for speed
1184
1185          while Source (Scan_Ptr) = ' ' loop
1186             if Source (Scan_Ptr + 1) /= ' ' then
1187                Scan_Ptr := Scan_Ptr + 1;
1188                exit;
1189             end if;
1190
1191             if Source (Scan_Ptr + 2) /= ' ' then
1192                Scan_Ptr := Scan_Ptr + 2;
1193                exit;
1194             end if;
1195
1196             if Source (Scan_Ptr + 3) /= ' ' then
1197                Scan_Ptr := Scan_Ptr + 3;
1198                exit;
1199             end if;
1200
1201             if Source (Scan_Ptr + 4) /= ' ' then
1202                Scan_Ptr := Scan_Ptr + 4;
1203                exit;
1204             end if;
1205
1206             if Source (Scan_Ptr + 5) /= ' ' then
1207                Scan_Ptr := Scan_Ptr + 5;
1208                exit;
1209             end if;
1210
1211             if Source (Scan_Ptr + 6) /= ' ' then
1212                Scan_Ptr := Scan_Ptr + 6;
1213                exit;
1214             end if;
1215
1216             if Source (Scan_Ptr + 7) /= ' ' then
1217                Scan_Ptr := Scan_Ptr + 7;
1218                exit;
1219             end if;
1220
1221             Scan_Ptr := Scan_Ptr + 8;
1222          end loop;
1223
1224          --  We are now at a non-blank character, which is the first character
1225          --  of the token we will scan, and hence the value of Token_Ptr.
1226
1227          Token_Ptr := Scan_Ptr;
1228
1229          --  Here begins the main case statement which transfers control on the
1230          --  basis of the non-blank character we have encountered.
1231
1232          case Source (Scan_Ptr) is
1233
1234          --  Line terminator characters
1235
1236          when CR | LF | FF | VT =>
1237             goto Scan_Line_Terminator;
1238
1239          --  Horizontal tab, just skip past it
1240
1241          when HT =>
1242             if Style_Check then
1243                Style.Check_HT;
1244             end if;
1245
1246             Scan_Ptr := Scan_Ptr + 1;
1247
1248          --  End of file character, treated as an end of file only if it is
1249          --  the last character in the buffer, otherwise it is ignored.
1250
1251          when EOF =>
1252             if Scan_Ptr = Source_Last (Current_Source_File) then
1253                Check_End_Of_Line;
1254
1255                if Style_Check then
1256                   Style.Check_EOF;
1257                end if;
1258
1259                Token := Tok_EOF;
1260                return;
1261             else
1262                Scan_Ptr := Scan_Ptr + 1;
1263             end if;
1264
1265          --  Ampersand
1266
1267          when '&' =>
1268             Accumulate_Checksum ('&');
1269
1270             if Source (Scan_Ptr + 1) = '&' then
1271                Error_Msg_S -- CODEFIX
1272                  ("'&'& should be `AND THEN`");
1273                Scan_Ptr := Scan_Ptr + 2;
1274                Token := Tok_And;
1275                return;
1276
1277             else
1278                Scan_Ptr := Scan_Ptr + 1;
1279                Token := Tok_Ampersand;
1280                return;
1281             end if;
1282
1283          --  Asterisk (can be multiplication operator or double asterisk which
1284          --  is the exponentiation compound delimiter).
1285
1286          when '*' =>
1287             Accumulate_Checksum ('*');
1288
1289             if Source (Scan_Ptr + 1) = '*' then
1290                Accumulate_Checksum ('*');
1291                Scan_Ptr := Scan_Ptr + 2;
1292                Token := Tok_Double_Asterisk;
1293                return;
1294
1295             else
1296                Scan_Ptr := Scan_Ptr + 1;
1297                Token := Tok_Asterisk;
1298                return;
1299             end if;
1300
1301          --  Colon, which can either be an isolated colon, or part of an
1302          --  assignment compound delimiter.
1303
1304          when ':' =>
1305             Accumulate_Checksum (':');
1306
1307             if Double_Char_Token ('=') then
1308                Token := Tok_Colon_Equal;
1309
1310                if Style_Check then
1311                   Style.Check_Colon_Equal;
1312                end if;
1313
1314                return;
1315
1316             elsif Source (Scan_Ptr + 1) = '-'
1317               and then Source (Scan_Ptr + 2) /= '-'
1318             then
1319                Token := Tok_Colon_Equal;
1320                Error_Msg -- CODEFIX
1321                  (":- should be :=", Scan_Ptr);
1322                Scan_Ptr := Scan_Ptr + 2;
1323                return;
1324
1325             else
1326                Scan_Ptr := Scan_Ptr + 1;
1327                Token := Tok_Colon;
1328
1329                if Style_Check then
1330                   Style.Check_Colon;
1331                end if;
1332
1333                return;
1334             end if;
1335
1336          --  Left parenthesis
1337
1338          when '(' =>
1339             Accumulate_Checksum ('(');
1340             Scan_Ptr := Scan_Ptr + 1;
1341             Token := Tok_Left_Paren;
1342
1343             if Style_Check then
1344                Style.Check_Left_Paren;
1345             end if;
1346
1347             return;
1348
1349          --  Left bracket
1350
1351          when '[' =>
1352             if Source (Scan_Ptr + 1) = '"' then
1353                goto Scan_Wide_Character;
1354
1355             else
1356                Error_Msg_S ("illegal character, replaced by ""(""");
1357                Scan_Ptr := Scan_Ptr + 1;
1358                Token := Tok_Left_Paren;
1359                return;
1360             end if;
1361
1362          --  Left brace
1363
1364          when '{' =>
1365             Error_Msg_S ("illegal character, replaced by ""(""");
1366             Scan_Ptr := Scan_Ptr + 1;
1367             Token := Tok_Left_Paren;
1368             return;
1369
1370          --  Comma
1371
1372          when ',' =>
1373             Accumulate_Checksum (',');
1374             Scan_Ptr := Scan_Ptr + 1;
1375             Token := Tok_Comma;
1376
1377             if Style_Check then
1378                Style.Check_Comma;
1379             end if;
1380
1381             return;
1382
1383          --  Dot, which is either an isolated period, or part of a double dot
1384          --  compound delimiter sequence. We also check for the case of a
1385          --  digit following the period, to give a better error message.
1386
1387          when '.' =>
1388             Accumulate_Checksum ('.');
1389
1390             if Double_Char_Token ('.') then
1391                Token := Tok_Dot_Dot;
1392
1393                if Style_Check then
1394                   Style.Check_Dot_Dot;
1395                end if;
1396
1397                return;
1398
1399             elsif Source (Scan_Ptr + 1) in '0' .. '9' then
1400                Error_Msg_S ("numeric literal cannot start with point");
1401                Scan_Ptr := Scan_Ptr + 1;
1402
1403             else
1404                Scan_Ptr := Scan_Ptr + 1;
1405                Token := Tok_Dot;
1406                return;
1407             end if;
1408
1409          --  Equal, which can either be an equality operator, or part of the
1410          --  arrow (=>) compound delimiter.
1411
1412          when '=' =>
1413             Accumulate_Checksum ('=');
1414
1415             if Double_Char_Token ('>') then
1416                Token := Tok_Arrow;
1417
1418                if Style_Check then
1419                   Style.Check_Arrow;
1420                end if;
1421
1422                return;
1423
1424             elsif Source (Scan_Ptr + 1) = '=' then
1425                Error_Msg_S -- CODEFIX
1426                  ("== should be =");
1427                Scan_Ptr := Scan_Ptr + 1;
1428             end if;
1429
1430             Scan_Ptr := Scan_Ptr + 1;
1431             Token := Tok_Equal;
1432             return;
1433
1434          --  Greater than, which can be a greater than operator, greater than
1435          --  or equal operator, or first character of a right label bracket.
1436
1437          when '>' =>
1438             Accumulate_Checksum ('>');
1439
1440             if Double_Char_Token ('=') then
1441                Token := Tok_Greater_Equal;
1442                return;
1443
1444             elsif Double_Char_Token ('>') then
1445                Token := Tok_Greater_Greater;
1446                return;
1447
1448             else
1449                Scan_Ptr := Scan_Ptr + 1;
1450                Token := Tok_Greater;
1451                return;
1452             end if;
1453
1454          --  Less than, which can be a less than operator, less than or equal
1455          --  operator, or the first character of a left label bracket, or the
1456          --  first character of a box (<>) compound delimiter.
1457
1458          when '<' =>
1459             Accumulate_Checksum ('<');
1460
1461             if Double_Char_Token ('=') then
1462                Token := Tok_Less_Equal;
1463                return;
1464
1465             elsif Double_Char_Token ('>') then
1466                Token := Tok_Box;
1467
1468                if Style_Check then
1469                   Style.Check_Box;
1470                end if;
1471
1472                return;
1473
1474             elsif Double_Char_Token ('<') then
1475                Token := Tok_Less_Less;
1476                return;
1477
1478             else
1479                Scan_Ptr := Scan_Ptr + 1;
1480                Token := Tok_Less;
1481                return;
1482             end if;
1483
1484          --  Minus, which is either a subtraction operator, or the first
1485          --  character of double minus starting a comment
1486
1487          when '-' => Minus_Case : begin
1488             if Source (Scan_Ptr + 1) = '>' then
1489                Error_Msg_S ("invalid token");
1490                Scan_Ptr := Scan_Ptr + 2;
1491                Token := Tok_Arrow;
1492                return;
1493
1494             elsif Source (Scan_Ptr + 1) /= '-' then
1495                Accumulate_Checksum ('-');
1496                Scan_Ptr := Scan_Ptr + 1;
1497                Token := Tok_Minus;
1498                return;
1499
1500             --  Comment
1501
1502             else -- Source (Scan_Ptr + 1) = '-' then
1503                if Style_Check then
1504                   Style.Check_Comment;
1505                end if;
1506
1507                Scan_Ptr := Scan_Ptr + 2;
1508
1509                --  If we are in preprocessor mode with Replace_In_Comments set,
1510                --  then we return the "--" as a token on its own.
1511
1512                if Replace_In_Comments then
1513                   Token := Tok_Comment;
1514                   return;
1515                end if;
1516
1517                --  Otherwise scan out the comment
1518
1519                Start_Of_Comment := Scan_Ptr;
1520
1521                --  Loop to scan comment (this loop runs more than once only if
1522                --  a horizontal tab or other non-graphic character is scanned)
1523
1524                loop
1525                   --  Scan to non graphic character (opened up for speed)
1526
1527                   --  Note that we just eat left brackets, which means that
1528                   --  bracket notation cannot be used for end of line
1529                   --  characters in comments. This seems a reasonable choice,
1530                   --  since no one would ever use brackets notation in a real
1531                   --  program in this situation, and if we allow brackets
1532                   --  notation, we forbid some valid comments which contain a
1533                   --  brackets sequence that happens to match an end of line
1534                   --  character.
1535
1536                   loop
1537                      exit when Source (Scan_Ptr) not in Graphic_Character;
1538                      Scan_Ptr := Scan_Ptr + 1;
1539                      exit when Source (Scan_Ptr) not in Graphic_Character;
1540                      Scan_Ptr := Scan_Ptr + 1;
1541                      exit when Source (Scan_Ptr) not in Graphic_Character;
1542                      Scan_Ptr := Scan_Ptr + 1;
1543                      exit when Source (Scan_Ptr) not in Graphic_Character;
1544                      Scan_Ptr := Scan_Ptr + 1;
1545                      exit when Source (Scan_Ptr) not in Graphic_Character;
1546                      Scan_Ptr := Scan_Ptr + 1;
1547                   end loop;
1548
1549                   --  Keep going if horizontal tab
1550
1551                   if Source (Scan_Ptr) = HT then
1552                      if Style_Check then
1553                         Style.Check_HT;
1554                      end if;
1555
1556                      Scan_Ptr := Scan_Ptr + 1;
1557
1558                   --  Terminate scan of comment if line terminator
1559
1560                   elsif Source (Scan_Ptr) in Line_Terminator then
1561                      exit;
1562
1563                   --  Terminate scan of comment if end of file encountered
1564                   --  (embedded EOF character or real last character in file)
1565
1566                   elsif Source (Scan_Ptr) = EOF then
1567                      exit;
1568
1569                   --  If we have a wide character, we have to scan it out,
1570                   --  because it might be a legitimate line terminator
1571
1572                   elsif Start_Of_Wide_Character then
1573                      declare
1574                         Wptr : constant Source_Ptr := Scan_Ptr;
1575                         Code : Char_Code;
1576                         Err  : Boolean;
1577
1578                      begin
1579                         Scan_Wide (Source, Scan_Ptr, Code, Err);
1580
1581                         --  If not well formed wide character, then just skip
1582                         --  past it and ignore it.
1583
1584                         if Err then
1585                            Scan_Ptr := Wptr + 1;
1586
1587                         --  If UTF_32 terminator, terminate comment scan
1588
1589                         elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
1590                            Scan_Ptr := Wptr;
1591                            exit;
1592                         end if;
1593                      end;
1594
1595                   --  Keep going if character in 80-FF range, or is ESC. These
1596                   --  characters are allowed in comments by RM-2.1(1), 2.7(2).
1597                   --  They are allowed even in Ada 83 mode according to the
1598                   --  approved AI. ESC was added to the AI in June 93.
1599
1600                   elsif Source (Scan_Ptr) in Upper_Half_Character
1601                      or else Source (Scan_Ptr) = ESC
1602                   then
1603                      Scan_Ptr := Scan_Ptr + 1;
1604
1605                   --  Otherwise we have an illegal comment character
1606
1607                   else
1608                      Error_Illegal_Character;
1609                   end if;
1610                end loop;
1611
1612                --  Note that, except when comments are tokens, we do NOT
1613                --  execute a return here, instead we fall through to reexecute
1614                --  the scan loop to look for a token.
1615
1616                if Comment_Is_Token then
1617                   Name_Len := Integer (Scan_Ptr - Start_Of_Comment);
1618                   Name_Buffer (1 .. Name_Len) :=
1619                     String (Source (Start_Of_Comment .. Scan_Ptr - 1));
1620                   Comment_Id := Name_Find;
1621                   Token := Tok_Comment;
1622                   return;
1623                end if;
1624             end if;
1625          end Minus_Case;
1626
1627          --  Double quote or percent starting a string literal
1628
1629          when '"' | '%' =>
1630             Slit;
1631             Post_Scan;
1632             return;
1633
1634          --  Apostrophe. This can either be the start of a character literal,
1635          --  or an isolated apostrophe used in a qualified expression or an
1636          --  attribute. We treat it as a character literal if it does not
1637          --  follow a right parenthesis, identifier, the keyword ALL or
1638          --  a literal. This means that we correctly treat constructs like:
1639
1640          --    A := CHARACTER'('A');
1641
1642          --  Note that RM-2.2(7) does not require a separator between
1643          --  "CHARACTER" and "'" in the above.
1644
1645          when ''' => Char_Literal_Case : declare
1646             Code : Char_Code;
1647             Err  : Boolean;
1648
1649          begin
1650             Accumulate_Checksum (''');
1651             Scan_Ptr := Scan_Ptr + 1;
1652
1653             --  Here is where we make the test to distinguish the cases. Treat
1654             --  as apostrophe if previous token is an identifier, right paren
1655             --  or the reserved word "all" (latter case as in A.all'Address)
1656             --  (or the reserved word "project" in project files). Also treat
1657             --  it as apostrophe after a literal (this catches some legitimate
1658             --  cases, like A."abs"'Address, and also gives better error
1659             --  behavior for impossible cases like 123'xxx).
1660
1661             if Prev_Token = Tok_Identifier
1662                or else Prev_Token = Tok_Right_Paren
1663                or else Prev_Token = Tok_All
1664                or else Prev_Token = Tok_Project
1665                or else Prev_Token in Token_Class_Literal
1666             then
1667                Token := Tok_Apostrophe;
1668
1669                if Style_Check then
1670                   Style.Check_Apostrophe;
1671                end if;
1672
1673                return;
1674
1675             --  Otherwise the apostrophe starts a character literal
1676
1677             else
1678                --  Case of wide character literal
1679
1680                if Start_Of_Wide_Character then
1681                   Wptr := Scan_Ptr;
1682                   Scan_Wide (Source, Scan_Ptr, Code, Err);
1683                   Accumulate_Checksum (Code);
1684
1685                   if Err then
1686                      Error_Illegal_Wide_Character;
1687                      Code := Character'Pos (' ');
1688
1689                   --  In Ada 95 mode we allow any wide character in a character
1690                   --  literal, but in Ada 2005, the set of characters allowed
1691                   --  is restricted to graphic characters.
1692
1693                   elsif Ada_Version >= Ada_2005
1694                     and then Is_UTF_32_Non_Graphic (UTF_32 (Code))
1695                   then
1696                      Error_Msg -- CODEFIX????
1697                        ("(Ada 2005) non-graphic character not permitted " &
1698                         "in character literal", Wptr);
1699                   end if;
1700
1701                   if Source (Scan_Ptr) /= ''' then
1702                         Error_Msg_S ("missing apostrophe");
1703                   else
1704                      Scan_Ptr := Scan_Ptr + 1;
1705                   end if;
1706
1707                --  If we do not find a closing quote in the expected place then
1708                --  assume that we have a misguided attempt at a string literal.
1709
1710                --  However, if previous token is RANGE, then we return an
1711                --  apostrophe instead since this gives better error recovery
1712
1713                elsif Source (Scan_Ptr + 1) /= ''' then
1714                   if Prev_Token = Tok_Range then
1715                      Token := Tok_Apostrophe;
1716                      return;
1717
1718                   else
1719                      Scan_Ptr := Scan_Ptr - 1;
1720                      Error_Msg_S
1721                        ("strings are delimited by double quote character");
1722                      Slit;
1723                      Post_Scan;
1724                      return;
1725                   end if;
1726
1727                --  Otherwise we have a (non-wide) character literal
1728
1729                else
1730                   Accumulate_Checksum (Source (Scan_Ptr));
1731
1732                   if Source (Scan_Ptr) not in Graphic_Character then
1733                      if Source (Scan_Ptr) in Upper_Half_Character then
1734                         if Ada_Version = Ada_83 then
1735                            Error_Illegal_Character;
1736                         end if;
1737
1738                      else
1739                         Error_Illegal_Character;
1740                      end if;
1741                   end if;
1742
1743                   Code := Get_Char_Code (Source (Scan_Ptr));
1744                   Scan_Ptr := Scan_Ptr + 2;
1745                end if;
1746
1747                --  Fall through here with Scan_Ptr updated past the closing
1748                --  quote, and Code set to the Char_Code value for the literal
1749
1750                Accumulate_Checksum (''');
1751                Token := Tok_Char_Literal;
1752                Set_Character_Literal_Name (Code);
1753                Token_Name := Name_Find;
1754                Character_Code := Code;
1755                Post_Scan;
1756                return;
1757             end if;
1758          end Char_Literal_Case;
1759
1760          --  Right parenthesis
1761
1762          when ')' =>
1763             Accumulate_Checksum (')');
1764             Scan_Ptr := Scan_Ptr + 1;
1765             Token := Tok_Right_Paren;
1766
1767             if Style_Check then
1768                Style.Check_Right_Paren;
1769             end if;
1770
1771             return;
1772
1773          --  Right bracket or right brace, treated as right paren
1774
1775          when ']' | '}' =>
1776             Error_Msg_S ("illegal character, replaced by "")""");
1777             Scan_Ptr := Scan_Ptr + 1;
1778             Token := Tok_Right_Paren;
1779             return;
1780
1781          --  Slash (can be division operator or first character of not equal)
1782
1783          when '/' =>
1784             Accumulate_Checksum ('/');
1785
1786             if Double_Char_Token ('=') then
1787                Token := Tok_Not_Equal;
1788                return;
1789             else
1790                Scan_Ptr := Scan_Ptr + 1;
1791                Token := Tok_Slash;
1792                return;
1793             end if;
1794
1795          --  Semicolon
1796
1797          when ';' =>
1798             Accumulate_Checksum (';');
1799             Scan_Ptr := Scan_Ptr + 1;
1800             Token := Tok_Semicolon;
1801
1802             if Style_Check then
1803                Style.Check_Semicolon;
1804             end if;
1805
1806             return;
1807
1808          --  Vertical bar
1809
1810          when '|' => Vertical_Bar_Case : begin
1811             Accumulate_Checksum ('|');
1812
1813             --  Special check for || to give nice message
1814
1815             if Source (Scan_Ptr + 1) = '|' then
1816                Error_Msg_S -- CODEFIX
1817                  ("""'|'|"" should be `OR ELSE`");
1818                Scan_Ptr := Scan_Ptr + 2;
1819                Token := Tok_Or;
1820                return;
1821
1822             else
1823                Scan_Ptr := Scan_Ptr + 1;
1824                Token := Tok_Vertical_Bar;
1825
1826                if Style_Check then
1827                   Style.Check_Vertical_Bar;
1828                end if;
1829
1830                Post_Scan;
1831                return;
1832             end if;
1833          end Vertical_Bar_Case;
1834
1835          --  Exclamation, replacement character for vertical bar
1836
1837          when '!' => Exclamation_Case : begin
1838             Accumulate_Checksum ('!');
1839
1840             if Source (Scan_Ptr + 1) = '=' then
1841                Error_Msg_S -- CODEFIX
1842                  ("'!= should be /=");
1843                Scan_Ptr := Scan_Ptr + 2;
1844                Token := Tok_Not_Equal;
1845                return;
1846
1847             else
1848                Scan_Ptr := Scan_Ptr + 1;
1849                Token := Tok_Vertical_Bar;
1850                Post_Scan;
1851                return;
1852             end if;
1853          end Exclamation_Case;
1854
1855          --  Plus
1856
1857          when '+' => Plus_Case : begin
1858             Accumulate_Checksum ('+');
1859             Scan_Ptr := Scan_Ptr + 1;
1860             Token := Tok_Plus;
1861             return;
1862          end Plus_Case;
1863
1864          --  Digits starting a numeric literal
1865
1866          when '0' .. '9' =>
1867
1868             --  First a bit of a scan ahead to see if we have a case of an
1869             --  identifier starting with a digit (remembering exponent case).
1870
1871             declare
1872                C : constant Character := Source (Scan_Ptr + 1);
1873
1874             begin
1875                --  OK literal if digit followed by digit or underscore
1876
1877                if C in '0' .. '9' or else C = '_' then
1878                   null;
1879
1880                --  OK literal if digit not followed by identifier char
1881
1882                elsif not Identifier_Char (C) then
1883                   null;
1884
1885                --  OK literal if digit followed by e/E followed by digit/sign.
1886                --  We also allow underscore after the E, which is an error, but
1887                --  better handled by Nlit than deciding this is an identifier.
1888
1889                elsif (C = 'e' or else C = 'E')
1890                  and then (Source (Scan_Ptr + 2) in '0' .. '9'
1891                              or else Source (Scan_Ptr + 2) = '+'
1892                              or else Source (Scan_Ptr + 2) = '-'
1893                              or else Source (Scan_Ptr + 2) = '_')
1894                then
1895                   null;
1896
1897                --  Here we have what really looks like an identifier that
1898                --  starts with a digit, so give error msg.
1899
1900                else
1901                   Error_Msg_S ("identifier may not start with digit");
1902                   Name_Len := 1;
1903                   Underline_Found := False;
1904                   Name_Buffer (1) := Source (Scan_Ptr);
1905                   Accumulate_Checksum (Name_Buffer (1));
1906                   Scan_Ptr := Scan_Ptr + 1;
1907                   goto Scan_Identifier;
1908                end if;
1909             end;
1910
1911             --  Here we have an OK integer literal
1912
1913             Nlit;
1914
1915             --  Check for proper delimiter, ignoring other format characters
1916
1917             Skip_Other_Format_Characters;
1918
1919             if Identifier_Char (Source (Scan_Ptr)) then
1920                Error_Msg_S
1921                  ("delimiter required between literal and identifier");
1922             end if;
1923
1924             Post_Scan;
1925             return;
1926
1927          --  Lower case letters
1928
1929          when 'a' .. 'z' =>
1930             Name_Len := 1;
1931             Underline_Found := False;
1932             Name_Buffer (1) := Source (Scan_Ptr);
1933             Accumulate_Checksum (Name_Buffer (1));
1934             Scan_Ptr := Scan_Ptr + 1;
1935             goto Scan_Identifier;
1936
1937          --  Upper case letters
1938
1939          when 'A' .. 'Z' =>
1940             Name_Len := 1;
1941             Underline_Found := False;
1942             Name_Buffer (1) :=
1943               Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1944             Accumulate_Checksum (Name_Buffer (1));
1945             Scan_Ptr := Scan_Ptr + 1;
1946             goto Scan_Identifier;
1947
1948          --  Underline character
1949
1950          when '_' =>
1951             if Special_Characters ('_') then
1952                Token_Ptr := Scan_Ptr;
1953                Scan_Ptr := Scan_Ptr + 1;
1954                Token := Tok_Special;
1955                Special_Character := '_';
1956                return;
1957             end if;
1958
1959             Error_Msg_S ("identifier cannot start with underline");
1960             Name_Len := 1;
1961             Name_Buffer (1) := '_';
1962             Scan_Ptr := Scan_Ptr + 1;
1963             Underline_Found := False;
1964             goto Scan_Identifier;
1965
1966          --  Space (not possible, because we scanned past blanks)
1967
1968          when ' ' =>
1969             raise Program_Error;
1970
1971          --  Characters in top half of ASCII 8-bit chart
1972
1973          when Upper_Half_Character =>
1974
1975             --  Wide character case
1976
1977             if Upper_Half_Encoding then
1978                goto Scan_Wide_Character;
1979
1980             --  Otherwise we have OK Latin-1 character
1981
1982             else
1983                --  Upper half characters may possibly be identifier letters
1984                --  but can never be digits, so Identifier_Char can be used to
1985                --  test for a valid start of identifier character.
1986
1987                if Identifier_Char (Source (Scan_Ptr)) then
1988                   Name_Len := 0;
1989                   Underline_Found := False;
1990                   goto Scan_Identifier;
1991                else
1992                   Error_Illegal_Character;
1993                end if;
1994             end if;
1995
1996          when ESC =>
1997
1998             --  ESC character, possible start of identifier if wide characters
1999             --  using ESC encoding are allowed in identifiers, which we can
2000             --  tell by looking at the Identifier_Char flag for ESC, which is
2001             --  only true if these conditions are met. In Ada 2005 mode, may
2002             --  also be valid UTF_32 space or line terminator character.
2003
2004             if Identifier_Char (ESC) then
2005                Name_Len := 0;
2006                goto Scan_Wide_Character;
2007             else
2008                Error_Illegal_Character;
2009             end if;
2010
2011          --  Invalid control characters
2012
2013          when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS  | ASCII.SO |
2014               SI  | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
2015               EM  | FS  | GS  | RS  | US  | DEL
2016          =>
2017             Error_Illegal_Character;
2018
2019          --  Invalid graphic characters
2020
2021          when '#' | '$' | '?' | '@' | '`' | '\' | '^' | '~' =>
2022
2023             --  If Set_Special_Character has been called for this character,
2024             --  set Scans.Special_Character and return a Special token.
2025
2026             if Special_Characters (Source (Scan_Ptr)) then
2027                Token_Ptr := Scan_Ptr;
2028                Token := Tok_Special;
2029                Special_Character := Source (Scan_Ptr);
2030                Scan_Ptr := Scan_Ptr + 1;
2031                return;
2032
2033             --  Otherwise, this is an illegal character
2034
2035             else
2036                Error_Illegal_Character;
2037             end if;
2038
2039          --  End switch on non-blank character
2040
2041          end case;
2042
2043       --  End loop past format effectors. The exit from this loop is by
2044       --  executing a return statement following completion of token scan
2045       --  (control never falls out of this loop to the code which follows)
2046
2047       end loop;
2048
2049       --  Wide_Character scanning routine. On entry we have encountered the
2050       --  initial character of a wide character sequence.
2051
2052       <<Scan_Wide_Character>>
2053
2054          declare
2055             Code : Char_Code;
2056             Cat  : Category;
2057             Err  : Boolean;
2058
2059          begin
2060             Wptr := Scan_Ptr;
2061             Scan_Wide (Source, Scan_Ptr, Code, Err);
2062
2063             --  If bad wide character, signal error and continue scan
2064
2065             if Err then
2066                Error_Illegal_Wide_Character;
2067                goto Scan_Next_Character;
2068             end if;
2069
2070             Cat := Get_Category (UTF_32 (Code));
2071
2072             --  If OK letter, reset scan ptr and go scan identifier
2073
2074             if Is_UTF_32_Letter (Cat) then
2075                Scan_Ptr := Wptr;
2076                Name_Len := 0;
2077                Underline_Found := False;
2078                goto Scan_Identifier;
2079
2080             --  If OK wide space, ignore and keep scanning (we do not include
2081             --  any ignored spaces in checksum)
2082
2083             elsif Is_UTF_32_Space (Cat) then
2084                goto Scan_Next_Character;
2085
2086             --  If other format character, ignore and keep scanning (again we
2087             --  do not include in the checksum) (this is for AI-0079).
2088
2089             elsif Is_UTF_32_Other (Cat) then
2090                goto Scan_Next_Character;
2091
2092             --  If OK wide line terminator, terminate current line
2093
2094             elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
2095                Scan_Ptr := Wptr;
2096                goto Scan_Line_Terminator;
2097
2098             --  Punctuation is an error (at start of identifier)
2099
2100             elsif Is_UTF_32_Punctuation (Cat) then
2101                Error_Msg ("identifier cannot start with punctuation", Wptr);
2102                Scan_Ptr := Wptr;
2103                Name_Len := 0;
2104                Underline_Found := False;
2105                goto Scan_Identifier;
2106
2107             --  Mark character is an error (at start of identifier)
2108
2109             elsif Is_UTF_32_Mark (Cat) then
2110                Error_Msg ("identifier cannot start with mark character", Wptr);
2111                Scan_Ptr := Wptr;
2112                Name_Len := 0;
2113                Underline_Found := False;
2114                goto Scan_Identifier;
2115
2116             --  Extended digit character is an error. Could be bad start of
2117             --  identifier or bad literal. Not worth doing too much to try to
2118             --  distinguish these cases, but we will do a little bit.
2119
2120             elsif Is_UTF_32_Digit (Cat) then
2121                Error_Msg
2122                  ("identifier cannot start with digit character", Wptr);
2123                Scan_Ptr := Wptr;
2124                Name_Len := 0;
2125                Underline_Found := False;
2126                goto Scan_Identifier;
2127
2128             --  All other wide characters are illegal here
2129
2130             else
2131                Error_Illegal_Wide_Character;
2132                goto Scan_Next_Character;
2133             end if;
2134          end;
2135
2136       --  Routine to scan line terminator. On entry Scan_Ptr points to a
2137       --  character which is one of FF,LR,CR,VT, or one of the wide characters
2138       --  that is treated as a line terminator.
2139
2140       <<Scan_Line_Terminator>>
2141
2142          --  Check line too long
2143
2144          Check_End_Of_Line;
2145
2146          --  Set Token_Ptr, if End_Of_Line is a token, for the case when it is
2147          --  a physical line.
2148
2149          if End_Of_Line_Is_Token then
2150             Token_Ptr := Scan_Ptr;
2151          end if;
2152
2153          declare
2154             Physical : Boolean;
2155
2156          begin
2157             Skip_Line_Terminators (Scan_Ptr, Physical);
2158
2159             --  If we are at start of physical line, update scan pointers to
2160             --  reflect the start of the new line.
2161
2162             if Physical then
2163                Current_Line_Start       := Scan_Ptr;
2164                Start_Column             := Set_Start_Column;
2165                First_Non_Blank_Location := Scan_Ptr;
2166
2167                --  If End_Of_Line is a token, we return it as it is a
2168                --  physical line.
2169
2170                if End_Of_Line_Is_Token then
2171                   Token := Tok_End_Of_Line;
2172                   return;
2173                end if;
2174             end if;
2175          end;
2176
2177          goto Scan_Next_Character;
2178
2179       --  Identifier scanning routine. On entry, some initial characters of
2180       --  the identifier may have already been stored in Name_Buffer. If so,
2181       --  Name_Len has the number of characters stored, otherwise Name_Len is
2182       --  set to zero on entry. Underline_Found is also set False on entry.
2183
2184       <<Scan_Identifier>>
2185
2186          --  This loop scans as fast as possible past lower half letters and
2187          --  digits, which we expect to be the most common characters.
2188
2189          loop
2190             if Source (Scan_Ptr) in 'a' .. 'z'
2191               or else Source (Scan_Ptr) in '0' .. '9'
2192             then
2193                Name_Buffer (Name_Len + 1) := Source (Scan_Ptr);
2194                Accumulate_Checksum (Source (Scan_Ptr));
2195
2196             elsif Source (Scan_Ptr) in 'A' .. 'Z' then
2197                Name_Buffer (Name_Len + 1) :=
2198                  Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
2199                Accumulate_Checksum (Name_Buffer (Name_Len + 1));
2200
2201             else
2202                exit;
2203             end if;
2204
2205             Underline_Found := False;
2206             Scan_Ptr := Scan_Ptr + 1;
2207             Name_Len := Name_Len + 1;
2208          end loop;
2209
2210          --  If we fall through, then we have encountered either an underline
2211          --  character, or an extended identifier character (i.e. one from the
2212          --  upper half), or a wide character, or an identifier terminator. The
2213          --  initial test speeds us up in the most common case where we have
2214          --  an identifier terminator. Note that ESC is an identifier character
2215          --  only if a wide character encoding method that uses ESC encoding
2216          --  is active, so if we find an ESC character we know that we have a
2217          --  wide character.
2218
2219          if Identifier_Char (Source (Scan_Ptr))
2220            or else (Source (Scan_Ptr) in Upper_Half_Character
2221                      and then Upper_Half_Encoding)
2222          then
2223             --  Case of underline
2224
2225             if Source (Scan_Ptr) = '_' then
2226                Accumulate_Checksum ('_');
2227
2228                if Underline_Found then
2229                   Error_No_Double_Underline;
2230                else
2231                   Underline_Found := True;
2232                   Name_Len := Name_Len + 1;
2233                   Name_Buffer (Name_Len) := '_';
2234                end if;
2235
2236                Scan_Ptr := Scan_Ptr + 1;
2237                goto Scan_Identifier;
2238
2239             --  Upper half character
2240
2241             elsif Source (Scan_Ptr) in Upper_Half_Character
2242               and then not Upper_Half_Encoding
2243             then
2244                Accumulate_Checksum (Source (Scan_Ptr));
2245                Store_Encoded_Character
2246                  (Get_Char_Code (Fold_Lower (Source (Scan_Ptr))));
2247                Scan_Ptr := Scan_Ptr + 1;
2248                Underline_Found := False;
2249                goto Scan_Identifier;
2250
2251             --  Left bracket not followed by a quote terminates an identifier.
2252             --  This is an error, but we don't want to give a junk error msg
2253             --  about wide characters in this case!
2254
2255             elsif Source (Scan_Ptr) = '['
2256               and then Source (Scan_Ptr + 1) /= '"'
2257             then
2258                null;
2259
2260             --  We know we have a wide character encoding here (the current
2261             --  character is either ESC, left bracket, or an upper half
2262             --  character depending on the encoding method).
2263
2264             else
2265                --  Scan out the wide character and insert the appropriate
2266                --  encoding into the name table entry for the identifier.
2267
2268                declare
2269                   Code : Char_Code;
2270                   Err  : Boolean;
2271                   Chr  : Character;
2272                   Cat  : Category;
2273
2274                begin
2275                   Wptr := Scan_Ptr;
2276                   Scan_Wide (Source, Scan_Ptr, Code, Err);
2277
2278                   --  If error, signal error
2279
2280                   if Err then
2281                      Error_Illegal_Wide_Character;
2282
2283                   --  If the character scanned is a normal identifier
2284                   --  character, then we treat it that way.
2285
2286                   elsif In_Character_Range (Code)
2287                     and then Identifier_Char (Get_Character (Code))
2288                   then
2289                      Chr := Get_Character (Code);
2290                      Accumulate_Checksum (Chr);
2291                      Store_Encoded_Character
2292                        (Get_Char_Code (Fold_Lower (Chr)));
2293                      Underline_Found := False;
2294
2295                   --  Here if not a normal identifier character
2296
2297                   else
2298                      Cat := Get_Category (UTF_32 (Code));
2299
2300                      --  Wide character in Unicode category "Other, Format"
2301                      --  is not accepted in an identifier. This is because it
2302                      --  it is considered a security risk (AI-0091).
2303
2304                      --  However, it is OK for such a character to appear at
2305                      --  the end of an identifier.
2306
2307                      if Is_UTF_32_Other (Cat) then
2308                         if not Identifier_Char (Source (Scan_Ptr)) then
2309                            goto Scan_Identifier_Complete;
2310                         else
2311                            Error_Msg
2312                              ("identifier cannot contain other_format "
2313                               & "character", Wptr);
2314                            goto Scan_Identifier;
2315                         end if;
2316
2317                      --  Wide character in category Separator,Space terminates
2318
2319                      elsif Is_UTF_32_Space (Cat) then
2320                         goto Scan_Identifier_Complete;
2321                      end if;
2322
2323                      --  Here if wide character is part of the identifier
2324
2325                      --  Make sure we are allowing wide characters in
2326                      --  identifiers. Note that we allow wide character
2327                      --  notation for an OK identifier character. This in
2328                      --  particular allows bracket or other notation to be
2329                      --  used for upper half letters.
2330
2331                      --  Wide characters are always allowed in Ada 2005
2332
2333                      if Identifier_Character_Set /= 'w'
2334                        and then Ada_Version < Ada_2005
2335                      then
2336                         Error_Msg
2337                           ("wide character not allowed in identifier", Wptr);
2338                      end if;
2339
2340                      --  If OK letter, store it folding to upper case. Note
2341                      --  that we include the folded letter in the checksum.
2342
2343                      if Is_UTF_32_Letter (Cat) then
2344                         Code :=
2345                           Char_Code (UTF_32_To_Upper_Case (UTF_32 (Code)));
2346                         Accumulate_Checksum (Code);
2347                         Store_Encoded_Character (Code);
2348                         Underline_Found := False;
2349
2350                      --  If OK extended digit or mark, then store it
2351
2352                      elsif Is_UTF_32_Digit (Cat)
2353                        or else Is_UTF_32_Mark (Cat)
2354                      then
2355                         Accumulate_Checksum (Code);
2356                         Store_Encoded_Character (Code);
2357                         Underline_Found := False;
2358
2359                      --  Wide punctuation is also stored, but counts as an
2360                      --  underline character for error checking purposes.
2361
2362                      elsif Is_UTF_32_Punctuation (Cat) then
2363                         Accumulate_Checksum (Code);
2364
2365                         if Underline_Found then
2366                            declare
2367                               Cend : constant Source_Ptr := Scan_Ptr;
2368                            begin
2369                               Scan_Ptr := Wptr;
2370                               Error_No_Double_Underline;
2371                               Scan_Ptr := Cend;
2372                            end;
2373
2374                         else
2375                            Store_Encoded_Character (Code);
2376                            Underline_Found := True;
2377                         end if;
2378
2379                      --  Any other wide character is not acceptable
2380
2381                      else
2382                         Error_Msg
2383                           ("invalid wide character in identifier", Wptr);
2384                      end if;
2385                   end if;
2386
2387                   goto Scan_Identifier;
2388                end;
2389             end if;
2390          end if;
2391
2392       --  Scan of identifier is complete. The identifier is stored in
2393       --  Name_Buffer, and Scan_Ptr points past the last character.
2394
2395       <<Scan_Identifier_Complete>>
2396          Token_Name := Name_Find;
2397
2398          --  Check for identifier ending with underline or punctuation char
2399
2400          if Underline_Found then
2401             Underline_Found := False;
2402
2403             if Source (Scan_Ptr - 1) = '_' then
2404                Error_Msg
2405                  ("identifier cannot end with underline", Scan_Ptr - 1);
2406             else
2407                Error_Msg
2408                  ("identifier cannot end with punctuation character", Wptr);
2409             end if;
2410          end if;
2411
2412          --  We will assume it is an identifier, not a keyword, so that the
2413          --  checksum is independent of the Ada version.
2414
2415          Token := Tok_Identifier;
2416          Accumulate_Token_Checksum;
2417
2418          --  Here is where we check if it was a keyword
2419
2420          if Is_Keyword_Name (Token_Name) then
2421             Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name));
2422
2423             --  Keyword style checks
2424
2425             if Style_Check then
2426
2427                --  Deal with possible style check for non-lower case keyword,
2428                --  but we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords
2429                --  for this purpose if they appear as attribute designators.
2430                --  Actually we only check the first character for speed.
2431
2432                --  Ada 2005 (AI-284): Do not apply the style check in case of
2433                --  "pragma Interface"
2434
2435                --  Ada 2005 (AI-340): Do not apply the style check in case of
2436                --  MOD attribute.
2437
2438                if Source (Token_Ptr) <= 'Z'
2439                  and then (Prev_Token /= Tok_Apostrophe
2440                            or else
2441                              (Token /= Tok_Access and then
2442                               Token /= Tok_Delta  and then
2443                               Token /= Tok_Digits and then
2444                               Token /= Tok_Mod    and then
2445                               Token /= Tok_Range))
2446                        and then (Token /= Tok_Interface
2447                                   or else
2448                                     (Token = Tok_Interface
2449                                       and then Prev_Token /= Tok_Pragma))
2450                then
2451                   Style.Non_Lower_Case_Keyword;
2452                end if;
2453
2454                --  Check THEN/ELSE style rules. These do not apply to AND THEN
2455                --  or OR ELSE, and do not apply in conditional expressions.
2456
2457                if (Token = Tok_Then and then Prev_Token /= Tok_And)
2458                     or else
2459                   (Token = Tok_Else and then Prev_Token /= Tok_Or)
2460                then
2461                   if Inside_Conditional_Expression = 0 then
2462                      Style.Check_Separate_Stmt_Lines;
2463                   end if;
2464                end if;
2465             end if;
2466
2467             --  We must reset Token_Name since this is not an identifier and
2468             --  if we leave Token_Name set, the parser gets confused because
2469             --  it thinks it is dealing with an identifier instead of the
2470             --  corresponding keyword.
2471
2472             Token_Name := No_Name;
2473             return;
2474
2475          --  It is an identifier after all
2476
2477          else
2478             Post_Scan;
2479             return;
2480          end if;
2481    end Scan;
2482
2483    --------------------------
2484    -- Set_Comment_As_Token --
2485    --------------------------
2486
2487    procedure Set_Comment_As_Token (Value : Boolean) is
2488    begin
2489       Comment_Is_Token := Value;
2490    end Set_Comment_As_Token;
2491
2492    ------------------------------
2493    -- Set_End_Of_Line_As_Token --
2494    ------------------------------
2495
2496    procedure Set_End_Of_Line_As_Token (Value : Boolean) is
2497    begin
2498       End_Of_Line_Is_Token := Value;
2499    end Set_End_Of_Line_As_Token;
2500
2501    ---------------------------
2502    -- Set_Special_Character --
2503    ---------------------------
2504
2505    procedure Set_Special_Character (C : Character) is
2506    begin
2507       case C is
2508          when '#' | '$' | '_' | '?' | '@' | '`' | '\' | '^' | '~' =>
2509             Special_Characters (C) := True;
2510
2511          when others =>
2512             null;
2513       end case;
2514    end Set_Special_Character;
2515
2516    ----------------------
2517    -- Set_Start_Column --
2518    ----------------------
2519
2520    --  Note: it seems at first glance a little expensive to compute this value
2521    --  for every source line (since it is certainly not used for all source
2522    --  lines). On the other hand, it doesn't take much more work to skip past
2523    --  the initial white space on the line counting the columns than it would
2524    --  to scan past the white space using the standard scanning circuits.
2525
2526    function Set_Start_Column return Column_Number is
2527       Start_Column : Column_Number := 0;
2528
2529    begin
2530       --  Outer loop scans past horizontal tab characters
2531
2532       Tabs_Loop : loop
2533
2534          --  Inner loop scans past blanks as fast as possible, bumping Scan_Ptr
2535          --  past the blanks and adjusting Start_Column to account for them.
2536
2537          Blanks_Loop : loop
2538             if Source (Scan_Ptr) = ' ' then
2539                if Source (Scan_Ptr + 1) = ' ' then
2540                   if Source (Scan_Ptr + 2) = ' ' then
2541                      if Source (Scan_Ptr + 3) = ' ' then
2542                         if Source (Scan_Ptr + 4) = ' ' then
2543                            if Source (Scan_Ptr + 5) = ' ' then
2544                               if Source (Scan_Ptr + 6) = ' ' then
2545                                  Scan_Ptr := Scan_Ptr + 7;
2546                                  Start_Column := Start_Column + 7;
2547                               else
2548                                  Scan_Ptr := Scan_Ptr + 6;
2549                                  Start_Column := Start_Column + 6;
2550                                  exit Blanks_Loop;
2551                               end if;
2552                            else
2553                               Scan_Ptr := Scan_Ptr + 5;
2554                               Start_Column := Start_Column + 5;
2555                               exit Blanks_Loop;
2556                            end if;
2557                         else
2558                            Scan_Ptr := Scan_Ptr + 4;
2559                            Start_Column := Start_Column + 4;
2560                            exit Blanks_Loop;
2561                         end if;
2562                      else
2563                         Scan_Ptr := Scan_Ptr + 3;
2564                         Start_Column := Start_Column + 3;
2565                         exit Blanks_Loop;
2566                      end if;
2567                   else
2568                      Scan_Ptr := Scan_Ptr + 2;
2569                      Start_Column := Start_Column + 2;
2570                      exit Blanks_Loop;
2571                   end if;
2572                else
2573                   Scan_Ptr := Scan_Ptr + 1;
2574                   Start_Column := Start_Column + 1;
2575                   exit Blanks_Loop;
2576                end if;
2577             else
2578                exit Blanks_Loop;
2579             end if;
2580          end loop Blanks_Loop;
2581
2582          --  Outer loop keeps going only if a horizontal tab follows
2583
2584          if Source (Scan_Ptr) = HT then
2585             if Style_Check then
2586                Style.Check_HT;
2587             end if;
2588
2589             Scan_Ptr := Scan_Ptr + 1;
2590             Start_Column := (Start_Column / 8) * 8 + 8;
2591          else
2592             exit Tabs_Loop;
2593          end if;
2594       end loop Tabs_Loop;
2595
2596       return Start_Column;
2597
2598    --  A constraint error can happen only if we have a compiler with checks on
2599    --  and a line with a ludicrous number of tabs or spaces at the start. In
2600    --  such a case, we really don't care if Start_Column is right or not.
2601
2602    exception
2603       when Constraint_Error =>
2604          return Start_Column;
2605    end Set_Start_Column;
2606
2607 end Scng;