gcc/ada/widechar.ads

   1 ------------------------------------------------------------------------------
   2 --                                                                          --
   3 --                         GNAT COMPILER COMPONENTS                         --
   4 --                                                                          --
   5 --                             W I D E C H A R                              --
   6 --                                                                          --
   7 --                                 S p e c                                  --
   8 --                                                                          --
   9 --          Copyright (C) 1992-2005 Free Software Foundation, Inc.          --
  10 --                                                                          --
  11 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
  12 -- terms of the  GNU General Public License as published  by the Free Soft- --
  13 -- ware  Foundation;  either version 2,  or (at your option) any later ver- --
  14 -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
  15 -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
  16 -- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
  17 -- for  more details.  You should have  received  a copy of the GNU General --
  18 -- Public License  distributed with GNAT;  see file COPYING.  If not, write --
  19 -- to  the Free Software Foundation,  59 Temple Place - Suite 330,  Boston, --
  20 -- MA 02111-1307, USA.                                                      --
  21 --                                                                          --
  22 -- As a special exception,  if other files  instantiate  generics from this --
  23 -- unit, or you link  this unit with other files  to produce an executable, --
  24 -- this  unit  does not  by itself cause  the resulting  executable  to  be --
  25 -- covered  by the  GNU  General  Public  License.  This exception does not --
  26 -- however invalidate  any other reasons why  the executable file  might be --
  27 -- covered by the  GNU Public License.                                      --
  28 --                                                                          --
  29 -- GNAT was originally developed  by the GNAT team at  New York University. --
  30 -- Extensive contributions were provided by Ada Core Technologies Inc.      --
  31 --                                                                          --
  32 ------------------------------------------------------------------------------
  33
  34 --  Subprograms for manipulation of wide character sequences. Note that in
  35 --  this package, wide character and wide wide character are not distinguished
  36 --  since this package is basically concerned with syntactic notions, and it
  37 --  deals with Char_Code values, rather than values of actual Ada types.
  38
  39 with Types; use Types;
  40
  41 package Widechar is
  42
  43    function Length_Wide return Nat;
  44    --  Returns the maximum length in characters for the escape sequence that
  45    --  is used to encode wide character literals outside the ASCII range. Used
  46    --  only in the implementation of the attribute Width for Wide_Character
  47    --  and Wide_Wide_Character.
  48
  49    procedure Scan_Wide
  50      (S   : Source_Buffer_Ptr;
  51       P   : in out Source_Ptr;
  52       C   : out Char_Code;
  53       Err : out Boolean);
  54    --  On entry S (P) points to the first character in the source text for
  55    --  a wide character (i.e. to an ESC character, a left bracket, or an
  56    --  upper half character, depending on the representation method). A
  57    --  single wide character is scanned. If no error is found, the value
  58    --  stored in C is the code for this wide character, P is updated past
  59    --  the sequence and Err is set to False. If an error is found, then
  60    --  P points to the improper character, C is undefined, and Err is
  61    --  set to True.
  62
  63    procedure Set_Wide
  64      (C : Char_Code;
  65       S : in out String;
  66       P : in out Natural);
  67    --  The escape sequence (including any leading ESC character) for the
  68    --  given character code is stored starting at S (P + 1), and on return
  69    --  P points to the last stored character (i.e. P is the count of stored
  70    --  characters on entry and exit, and the escape sequence is appended to
  71    --  the end of the stored string). The character code C represents a code
  72    --  originally constructed by Scan_Wide, so it is known to be in a range
  73    --  that is appropriate for the encoding method in use.
  74
  75    procedure Skip_Wide (S : String; P : in out Natural);
  76    --  On entry, S (P) points to an ESC character for a wide character escape
  77    --  sequence or to an upper half character if the encoding method uses the
  78    --  upper bit, or to a left bracket if the brackets encoding method is in
  79    --  use. On exit, P is bumped past the wide character sequence. No error
  80    --  checking is done, since this is only used on escape sequences generated
  81    --  by Set_Wide, which are known to be correct.
  82
  83    procedure Skip_Wide (S : Source_Buffer_Ptr; P : in out Source_Ptr);
  84    --  Similar to the above procedure, but operates on a source buffer
  85    --  instead of a string, with P being a Source_Ptr referencing the
  86    --  contents of the source buffer.
  87
  88    function Is_Start_Of_Wide_Char
  89      (S : Source_Buffer_Ptr;
  90       P : Source_Ptr) return Boolean;
  91    --  Determines if S (P) is the start of a wide character sequence
  92
  93    function Is_UTF_32_Letter (U : Char_Code) return Boolean;
  94    pragma Inline (Is_UTF_32_Letter);
  95    --  Returns true iff U is a letter that can be used to start an identifier.
  96    --  This means that it is in one of the following categories:
  97    --    Letter, Uppercase (Lu)
  98    --    Letter, Lowercase (Ll)
  99    --    Letter, Titlecase (Lt)
 100    --    Letter, Modifier  (Lm)
 101    --    Letter, Other     (Lo)
 102    --    Number, Letter    (Nl)
 103
 104    function Is_UTF_32_Digit (U : Char_Code) return Boolean;
 105    pragma Inline (Is_UTF_32_Digit);
 106    --  Returns true iff U is a digit that can be used to extend an identifer,
 107    --  which means it is in one of the following categories:
 108    --    Number, Decimal_Digit (Nd)
 109
 110    function Is_UTF_32_Line_Terminator (U : Char_Code) return Boolean;
 111    pragma Inline (Is_UTF_32_Line_Terminator);
 112    --  Returns true iff U is an allowed line terminator for source programs,
 113    --  which means it is in one of the following categories:
 114    --    Separator, Line (Zl)
 115    --    Separator, Paragraph (Zp)
 116    --  or that it is a conventional line terminator (CR, LF, VT, FF)
 117
 118    function Is_UTF_32_Mark (U : Char_Code) return Boolean;
 119    pragma Inline (Is_UTF_32_Mark);
 120    --  Returns true iff U is a mark character which can be used to extend
 121    --  an identifier. This means it is in one of the following categories:
 122    --    Mark, Non-Spacing (Mn)
 123    --    Mark, Spacing Combining (Mc)
 124
 125    function Is_UTF_32_Other (U : Char_Code) return Boolean;
 126    pragma Inline (Is_UTF_32_Other);
 127    --  Returns true iff U is an other format character, which means that it
 128    --  can be used to extend an identifier, but is ignored for the purposes of
 129    --  matching of identiers. This means that it is in one of the following
 130    --  categories:
 131    --    Other, Format (Cf)
 132
 133    function Is_UTF_32_Punctuation (U : Char_Code) return Boolean;
 134    pragma Inline (Is_UTF_32_Punctuation);
 135    --  Returns true iff U is a punctuation character that can be used to
 136    --  separate pices of an identifier. This means that it is in one of the
 137    --  following categories:
 138    --    Punctuation, Connector (Pc)
 139
 140    function Is_UTF_32_Space (U : Char_Code) return Boolean;
 141    pragma Inline (Is_UTF_32_Space);
 142    --  Returns true iff U is considered a space to be ignored, which means
 143    --  that it is in one of the following categories:
 144    --    Separator, Space (Zs)
 145
 146    function Is_UTF_32_Non_Graphic (U : Char_Code) return Boolean;
 147    pragma Inline (Is_UTF_32_Non_Graphic);
 148    --  Returns true iff U is considered to be a non-graphic character,
 149    --  which means that it is in one of the following categories:
 150    --    Other, Control (Cc)
 151    --    Other, Private Use (Co)
 152    --    Other, Surrogate (Cs)
 153    --    Other, Format (Cf)
 154    --    Separator, Line (Zl)
 155    --    Separator, Paragraph (Zp)
 156    --
 157    --  Note that the Ada category format effector is subsumed by the above
 158    --  list of Unicode categories.
 159
 160    function UTF_32_To_Upper_Case (U : Char_Code) return Char_Code;
 161    pragma Inline (UTF_32_To_Upper_Case);
 162    --  If U represents a lower case letter, returns the corresponding upper
 163    --  case letter, otherwise U is returned unchanged. The folding is locale
 164    --  independent as defined by documents referenced in the note in section
 165    --  1 of ISO/IEC 10646:2003
 166
 167 end Widechar;