gcc/ada/namet.ads

   1 ------------------------------------------------------------------------------
   2 --                                                                          --
   3 --                         GNAT COMPILER COMPONENTS                         --
   4 --                                                                          --
   5 --                                N A M E T                                 --
   6 --                                                                          --
   7 --                                 S p e c                                  --
   8 --                                                                          --
   9 --          Copyright (C) 1992-2004 Free Software Foundation, Inc.          --
  10 --                                                                          --
  11 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
  12 -- terms of the  GNU General Public License as published  by the Free Soft- --
  13 -- ware  Foundation;  either version 2,  or (at your option) any later ver- --
  14 -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
  15 -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
  16 -- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
  17 -- for  more details.  You should have  received  a copy of the GNU General --
  18 -- Public License  distributed with GNAT;  see file COPYING.  If not, write --
  19 -- to  the Free Software Foundation,  59 Temple Place - Suite 330,  Boston, --
  20 -- MA 02111-1307, USA.                                                      --
  21 --                                                                          --
  22 -- As a special exception,  if other files  instantiate  generics from this --
  23 -- unit, or you link  this unit with other files  to produce an executable, --
  24 -- this  unit  does not  by itself cause  the resulting  executable  to  be --
  25 -- covered  by the  GNU  General  Public  License.  This exception does not --
  26 -- however invalidate  any other reasons why  the executable file  might be --
  27 -- covered by the  GNU Public License.                                      --
  28 --                                                                          --
  29 -- GNAT was originally developed  by the GNAT team at  New York University. --
  30 -- Extensive contributions were provided by Ada Core Technologies Inc.      --
  31 --                                                                          --
  32 ------------------------------------------------------------------------------
  33
  34 with Alloc;
  35 with Table;
  36 with System;   use System;
  37 with Types;    use Types;
  38
  39 package Namet is
  40
  41 --  WARNING: There is a C version of this package. Any changes to this
  42 --  source file must be properly reflected in the C header file namet.h
  43 --  which is created manually from namet.ads and namet.adb.
  44
  45 --  This package contains routines for handling the names table. The table
  46 --  is used to store character strings for identifiers and operator symbols,
  47 --  as well as other string values such as unit names and file names.
  48
  49 --  The forms of the entries are as follows:
  50
  51 --    Identifiers        Stored with upper case letters folded to lower case.
  52 --                       Upper half (16#80# bit set) and wide characters are
  53 --                       stored in an encoded form (Uhh for upper half and
  54 --                       Whhhh for wide characters, as provided by the routine
  55 --                       Store_Encoded_Character, where hh are hex digits for
  56 --                       the character code using lower case a-f). Normally
  57 --                       the use of U or W in other internal names is avoided,
  58 --                       but these letters may be used in internal names
  59 --                       (without this special meaning), if the appear as
  60 --                       the last character of the name, or they are followed
  61 --                       by an upper case letter or an underscore.
  62
  63
  64 --    Operator symbols   Stored with an initial letter O, and the remainder
  65 --                       of the name is the lower case characters XXX where
  66 --                       the name is Name_Op_XXX, see Snames spec for a full
  67 --                       list of the operator names. Normally the use of O
  68 --                       in other internal names is avoided, but it may be
  69 --                       used in internal names (without this special meaning)
  70 --                       if it is the last character of the name, or if it is
  71 --                       followed by an upper case letter or an underscore.
  72
  73 --    Character literals Character literals have names that are used only for
  74 --                       debugging and error message purposes. The form is a
  75 --                       upper case Q followed by a single lower case letter,
  76 --                       or by a Uxx or Wxxxx encoding as described for
  77 --                       identifiers. The Set_Character_Literal_Name procedure
  78 --                       should be used to construct these encodings. Normally
  79 --                       the use of O in other internal names is avoided, but
  80 --                       it may be used in internal names (without this special
  81 --                       meaning) if it is the last character of the name, or
  82 --                       if it is followed by an upper case letter or an
  83 --                       underscore.
  84
  85 --    Unit names         Stored with upper case letters folded to lower case,
  86 --                       using Uhh/Whhhh encoding as described for identifiers,
  87 --                       and a %s or %b suffix for specs/bodies. See package
  88 --                       Uname for further details.
  89
  90 --    File names         Are stored in the form provided by Osint. Typically
  91 --                       they may include wide character escape sequences and
  92 --                       upper case characters (in non-encoded form). Casing
  93 --                       is also derived from the external environment. Note
  94 --                       that file names provided by Osint must generally be
  95 --                       consistent with the names from Fname.Get_File_Name.
  96
  97 --    Other strings      The names table is also used as a convenient storage
  98 --                       location for other variable length strings such as
  99 --                       error messages etc. There are no restrictions on what
 100 --                       characters may appear for such entries.
 101
 102 --  Note: the encodings Uhh (upper half characters), Whhhh (wide characters),
 103 --  and Qx (character literal names) are described in the spec, since they
 104 --  are visible throughout the system (e.g. in debugging output). However,
 105 --  no code should depend on these particular encodings, so it should be
 106 --  possible to change the encodings by making changes only to the Namet
 107 --  specification (to change these comments) and the body (which actually
 108 --  implements the encodings).
 109
 110 --  The names are hashed so that a given name appears only once in the table,
 111 --  except that names entered with Name_Enter as opposed to Name_Find are
 112 --  omitted from the hash table.
 113
 114 --  The first 26 entries in the names table (with Name_Id values in the range
 115 --  First_Name_Id .. First_Name_Id + 25) represent names which are the one
 116 --  character lower case letters in the range a-z, and these names are created
 117 --  and initialized by the Initialize procedure.
 118
 119 --  Two values, one of type Int and one of type Byte, are stored with each
 120 --  names table entry and subprograms are provided for setting and retrieving
 121 --  these associated values. The usage of these values is up to the client.
 122 --  In the compiler, the Int field is used to point to a chain of potentially
 123 --  visible entities (see Sem.Ch8 for details), and the Byte field is used
 124 --  to hold the Token_Type value for reserved words (see Sem for details).
 125 --  In the binder, the Byte field is unused, and the Int field is used in
 126 --  various ways depending on the name involved (see binder documentation).
 127
 128    Name_Buffer : String (1 .. 16*1024);
 129    --  This buffer is used to set the name to be stored in the table for the
 130    --  Name_Find call, and to retrieve the name for the Get_Name_String call.
 131    --  The plus 1 in the length allows for cases of adding ASCII.NUL. The
 132    --  16K here is intended to be an infinite value that ensures that we
 133    --  never overflow the buffer (names this long are too absurd to worry!)
 134
 135    Name_Len : Natural;
 136    --  Length of name stored in Name_Buffer. Used as an input parameter for
 137    --  Name_Find, and as an output value by Get_Name_String, or Write_Name.
 138
 139    -----------------
 140    -- Subprograms --
 141    -----------------
 142
 143    procedure Finalize;
 144    --  Called at the end of a use of the Namet package (before a subsequent
 145    --  call to Initialize). Currently this routine is only used to generate
 146    --  debugging output.
 147
 148    procedure Get_Name_String (Id : Name_Id);
 149    --  Get_Name_String is used to retrieve the string associated with an entry
 150    --  in the names table. The resulting string is stored in Name_Buffer
 151    --  and Name_Len is set. It is an error to call Get_Name_String with one
 152    --  of the special name Id values (No_Name or Error_Name).
 153
 154    function Get_Name_String (Id : Name_Id) return String;
 155    --  This functional form returns the result as a string without affecting
 156    --  the contents of either Name_Buffer or Name_Len.
 157
 158    procedure Get_Unqualified_Name_String (Id : Name_Id);
 159    --  Similar to the above except that qualification (as defined in unit
 160    --  Exp_Dbug) is removed (including both preceding __ delimited names,
 161    --  and also the suffixes used to indicate package body entities and to
 162    --  distinguish between overloaded entities). Note that names are not
 163    --  qualified until just before the call to gigi, so this routine is
 164    --  only needed by processing that occurs after gigi has been called.
 165    --  This includes all ASIS processing, since ASIS works on the tree
 166    --  written after gigi has been called.
 167
 168    procedure Get_Name_String_And_Append (Id : Name_Id);
 169    --  Like Get_Name_String but the resulting characters are appended to
 170    --  the current contents of the entry stored in Name_Buffer, and Name_Len
 171    --  is incremented to include the added characters.
 172
 173    procedure Get_Decoded_Name_String (Id : Name_Id);
 174    --  Same calling sequence an interface as Get_Name_String, except that the
 175    --  result is decoded, so that upper half characters and wide characters
 176    --  appear as originally found in the source program text, operators have
 177    --  their source forms (special characters and enclosed in quotes), and
 178    --  character literals appear surrounded by apostrophes.
 179
 180    procedure Get_Unqualified_Decoded_Name_String (Id : Name_Id);
 181    --  Similar to the above except that qualification (as defined in unit
 182    --  Exp_Dbug) is removed (including both preceding __ delimited names,
 183    --  and also the suffix used to indicate package body entities). Note
 184    --  that names are not qualified until just before the call to gigi, so
 185    --  this routine is only needed by processing that occurs after gigi has
 186    --  been called. This includes all ASIS processing, since ASIS works on
 187    --  the tree written after gigi has been called.
 188
 189    procedure Get_Decoded_Name_String_With_Brackets (Id : Name_Id);
 190    --  This routine is similar to Decoded_Name, except that the brackets
 191    --  notation (Uhh replaced by ["hh"], Whhhh replaced by ["hhhh"]) is
 192    --  used for all non-lower half characters, regardless of the setting
 193    --  of Opt.Wide_Character_Encoding_Method, and also in that characters
 194    --  in the range 16#80# .. 16#FF# are converted to brackets notation
 195    --  in all cases. This routine can be used when there is a requirement
 196    --  for a canonical representation not affected by the character set
 197    --  options (e.g. in the binder generation of symbols).
 198
 199    function Get_Name_Table_Byte (Id : Name_Id) return Byte;
 200    pragma Inline (Get_Name_Table_Byte);
 201    --  Fetches the Byte value associated with the given name
 202
 203    function Get_Name_Table_Info (Id : Name_Id) return Int;
 204    pragma Inline (Get_Name_Table_Info);
 205    --  Fetches the Int value associated with the given name
 206
 207    function Is_Operator_Name (Id : Name_Id) return Boolean;
 208    --  Returns True if name given is of the form of an operator (that
 209    --  is, it starts with an upper case O).
 210
 211    procedure Initialize;
 212    --  Initializes the names table, including initializing the first 26
 213    --  entries in the table (for the 1-character lower case names a-z)
 214    --  Note that Initialize must not be called if Tree_Read is used.
 215
 216    procedure Lock;
 217    --  Lock name table before calling back end. Space for up to 10 extra
 218    --  names and 1000 extra characters is reserved before the table is locked.
 219
 220    procedure Unlock;
 221    --  Unlocks the name table to allow use of the 10 extra names and 1000
 222    --  extra characters reserved by the Lock call. See gnat1drv for details
 223    --  of the need for this.
 224
 225    function Length_Of_Name (Id : Name_Id) return Nat;
 226    pragma Inline (Length_Of_Name);
 227    --  Returns length of given name in characters. This is the length of the
 228    --  encoded name, as stored in the names table, the result is equivalent to
 229    --  calling Get_Name_String and reading Name_Len, except that a call to
 230    --  Length_Of_Name does not affect the contents of Name_Len and Name_Buffer.
 231
 232    function Name_Chars_Address return System.Address;
 233    --  Return starting address of name characters table (used in Back_End
 234    --  call to Gigi).
 235
 236    function Name_Find return Name_Id;
 237    --  Name_Find is called with a string stored in Name_Buffer whose length
 238    --  is in Name_Len (i.e. the characters of the name are in subscript
 239    --  positions 1 to Name_Len in Name_Buffer). It searches the names
 240    --  table to see if the string has already been stored. If so the Id of
 241    --  the existing entry is returned. Otherwise a new entry is created with
 242    --  its Name_Table_Info field set to zero. The contents of Name_Buffer
 243    --  and Name_Len are not modified by this call. Note that it is permissible
 244    --  for Name_Len to be set to zero to lookup the null name string.
 245
 246    function Name_Enter return Name_Id;
 247    --  Name_Enter has the same calling interface as Name_Find. The difference
 248    --  is that it does not search the table for an existing match, and also
 249    --  subsequent Name_Find calls using the same name will not locate the
 250    --  entry created by this call. Thus multiple calls to Name_Enter with the
 251    --  same name will create multiple entries in the name table with different
 252    --  Name_Id values. This is useful in the case of created names, which are
 253    --  never expected to be looked up. Note: Name_Enter should never be used
 254    --  for one character names, since these are efficiently located without
 255    --  hashing by Name_Find in any case.
 256
 257    function Name_Entries_Address return System.Address;
 258    --  Return starting address of Names table. Used in Back_End call to Gigi.
 259
 260    function Name_Entries_Count return Nat;
 261    --  Return current number of entries in the names table
 262
 263    function Is_OK_Internal_Letter (C : Character) return Boolean;
 264    pragma Inline (Is_OK_Internal_Letter);
 265    --  Returns true if C is a suitable character for using as a prefix or a
 266    --  suffix of an internally generated name, i.e. it is an upper case letter
 267    --  other than one of the ones used for encoding source names (currently
 268    --  the set of reserved letters is O, Q, U, W) and also returns False for
 269    --  the letter X, which is reserved for debug output (see Exp_Dbug).
 270
 271    function Is_Internal_Name (Id : Name_Id) return Boolean;
 272    --  Returns True if the name is an internal name (i.e. contains a character
 273    --  for which Is_OK_Internal_Letter is true, or if the name starts or ends
 274    --  with an underscore. This call destroys the value of Name_Len and
 275    --  Name_Buffer (it loads these as for Get_Name_String).
 276    --
 277    --  Note: if the name is qualified (has a double underscore), then
 278    --  only the final entity name is considered, not the qualifying
 279    --  names. Consider for example that the name:
 280    --
 281    --    pkg__B_1__xyz
 282    --
 283    --  is not an internal name, because the B comes from the internal
 284    --  name of a qualifying block, but the xyz means that this was
 285    --  indeed a declared identifier called "xyz" within this block
 286    --  and there is nothing internal about that name.
 287
 288    function Is_Internal_Name return Boolean;
 289    --  Like the form with an Id argument, except that the name to be tested is
 290    --  passed in Name_Buffer and Name_Len (which are not affected by the call).
 291    --  Name_Buffer (it loads these as for Get_Name_String).
 292
 293    procedure Reset_Name_Table;
 294    --  This procedure is used when there are multiple source files to reset
 295    --  the name table info entries associated with current entries in the
 296    --  names table. There is no harm in keeping the names entries themselves
 297    --  from one compilation to another, but we can't keep the entity info,
 298    --  since this refers to tree nodes, which are destroyed between each
 299    --  main source file.
 300
 301    procedure Add_Char_To_Name_Buffer (C : Character);
 302    pragma Inline (Add_Char_To_Name_Buffer);
 303    --  Add given character to the end of the string currently stored in the
 304    --  Name_Buffer, incrementing Name_Len.
 305
 306    procedure Add_Nat_To_Name_Buffer (V : Nat);
 307    --  Add decimal representation of given value to the end of the string
 308    --  currently stored in Name_Buffer, incrementing Name_Len as required.
 309
 310    procedure Add_Str_To_Name_Buffer (S : String);
 311    --  Add characters of string S to the end of the string currently stored
 312    --  in the Name_Buffer, incrementing Name_Len by the length of the string.
 313
 314    procedure Set_Character_Literal_Name (C : Char_Code);
 315    --  This procedure sets the proper encoded name for the character literal
 316    --  for the given character code. On return Name_Buffer and Name_Len are
 317    --  set to reflect the stored name.
 318
 319    procedure Set_Name_Table_Info (Id : Name_Id; Val : Int);
 320    pragma Inline (Set_Name_Table_Info);
 321    --  Sets the Int value associated with the given name
 322
 323    procedure Set_Name_Table_Byte (Id : Name_Id; Val : Byte);
 324    pragma Inline (Set_Name_Table_Byte);
 325    --  Sets the Byte value associated with the given name
 326
 327    procedure Store_Encoded_Character (C : Char_Code);
 328    --  Stores given character code at the end of Name_Buffer, updating the
 329    --  value in Name_Len appropriately. Lower case letters and digits are
 330    --  stored unchanged. Other 8-bit characters are stored using the Uhh
 331    --  encoding (hh = hex code), and other 16-bit wide-character values
 332    --  are stored using the Whhhh (hhhh = hex code) encoding. Note that
 333    --  this procedure does not fold upper case letters (they are stored
 334    --  using the Uhh encoding). If folding is required, it must be done
 335    --  by the caller prior to the call.
 336
 337    procedure Tree_Read;
 338    --  Initializes internal tables from current tree file using Tree_Read.
 339    --  Note that Initialize should not be called if Tree_Read is used.
 340    --  Tree_Read includes all necessary initialization.
 341
 342    procedure Tree_Write;
 343    --  Writes out internal tables to current tree file using Tree_Write
 344
 345    procedure Get_Last_Two_Chars (N : Name_Id; C1, C2 : out Character);
 346    --  Obtains last two characters of a name. C1 is last but one character
 347    --  and C2 is last character. If name is less than two characters long,
 348    --  then both C1 and C2 are set to ASCII.NUL on return.
 349
 350    procedure Write_Name (Id : Name_Id);
 351    --  Write_Name writes the characters of the specified name using the
 352    --  standard output procedures in package Output. No end of line is
 353    --  written, just the characters of the name. On return Name_Buffer and
 354    --  Name_Len are set as for a call to Get_Name_String. The name is written
 355    --  in encoded form (i.e. including Uhh, Whhh, Qx, _op as they appear in
 356    --  the name table). If Id is Error_Name, or No_Name, no text is output.
 357
 358    procedure wn (Id : Name_Id);
 359    pragma Export (Ada, wn);
 360    --  Like Write_Name, but includes new line at end. Intended for use
 361    --  from the debugger only.
 362
 363    procedure Write_Name_Decoded (Id : Name_Id);
 364    --  Like Write_Name, except that the name written is the decoded name, as
 365    --  described for Get_Decoded_Name_String, and the resulting value stored
 366    --  in Name_Len and Name_Buffer is the decoded name.
 367
 368    ---------------------------
 369    -- Table Data Structures --
 370    ---------------------------
 371
 372    --  The following declarations define the data structures used to store
 373    --  names. The definitions are in the private part of the package spec,
 374    --  rather than the body, since they are referenced directly by gigi.
 375
 376 private
 377
 378    --  This table stores the actual string names. Although logically there
 379    --  is no need for a terminating character (since the length is stored
 380    --  in the name entry table), we still store a NUL character at the end
 381    --  of every name (for convenience in interfacing to the C world).
 382
 383    package Name_Chars is new Table.Table (
 384      Table_Component_Type => Character,
 385      Table_Index_Type     => Int,
 386      Table_Low_Bound      => 0,
 387      Table_Initial        => Alloc.Name_Chars_Initial,
 388      Table_Increment      => Alloc.Name_Chars_Increment,
 389      Table_Name           => "Name_Chars");
 390
 391    type Name_Entry is record
 392       Name_Chars_Index : Int;
 393       --  Starting location of characters in the Name_Chars table minus
 394       --  one (i.e. pointer to character just before first character). The
 395       --  reason for the bias of one is that indexes in Name_Buffer are
 396       --  one's origin, so this avoids unnecessary adds and subtracts of 1.
 397
 398       Name_Len : Short;
 399       --  Length of this name in characters
 400
 401       Byte_Info : Byte;
 402       --  Byte value associated with this name
 403
 404       Hash_Link : Name_Id;
 405       --  Link to next entry in names table for same hash code
 406
 407       Int_Info : Int;
 408       --  Int Value associated with this name
 409    end record;
 410
 411    --  This is the table that is referenced by Name_Id entries.
 412    --  It contains one entry for each unique name in the table.
 413
 414    package Name_Entries is new Table.Table (
 415      Table_Component_Type => Name_Entry,
 416      Table_Index_Type     => Name_Id,
 417      Table_Low_Bound      => First_Name_Id,
 418      Table_Initial        => Alloc.Names_Initial,
 419      Table_Increment      => Alloc.Names_Increment,
 420      Table_Name           => "Name_Entries");
 421
 422 end Namet;