1 ------------------------------------------------------------------------------
3 -- GNAT RUN-TIME COMPONENTS --
5 -- A D A . C H A R A C T E R S . H A N D L I N G --
10 -- Copyright (C) 1992-2001 Free Software Foundation, Inc. --
12 -- GNAT is free software; you can redistribute it and/or modify it under --
13 -- terms of the GNU General Public License as published by the Free Soft- --
14 -- ware Foundation; either version 2, or (at your option) any later ver- --
15 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
16 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
17 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
18 -- for more details. You should have received a copy of the GNU General --
19 -- Public License distributed with GNAT; see file COPYING. If not, write --
20 -- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, --
21 -- MA 02111-1307, USA. --
23 -- As a special exception, if other files instantiate generics from this --
24 -- unit, or you link this unit with other files to produce an executable, --
25 -- this unit does not by itself cause the resulting executable to be --
26 -- covered by the GNU General Public License. This exception does not --
27 -- however invalidate any other reasons why the executable file might be --
28 -- covered by the GNU Public License. --
30 -- GNAT was originally developed by the GNAT team at New York University. --
31 -- It is now maintained by Ada Core Technologies Inc (http://www.gnat.com). --
33 ------------------------------------------------------------------------------
35 with Ada.Characters.Latin_1; use Ada.Characters.Latin_1;
36 with Ada.Strings.Maps; use Ada.Strings.Maps;
37 with Ada.Strings.Maps.Constants; use Ada.Strings.Maps.Constants;
39 package body Ada.Characters.Handling is
41 ------------------------------------
42 -- Character Classification Table --
43 ------------------------------------
45 type Character_Flags is mod 256;
46 for Character_Flags'Size use 8;
48 Control : constant Character_Flags := 1;
49 Lower : constant Character_Flags := 2;
50 Upper : constant Character_Flags := 4;
51 Basic : constant Character_Flags := 8;
52 Hex_Digit : constant Character_Flags := 16;
53 Digit : constant Character_Flags := 32;
54 Special : constant Character_Flags := 64;
56 Letter : constant Character_Flags := Lower or Upper;
57 Alphanum : constant Character_Flags := Letter or Digit;
58 Graphic : constant Character_Flags := Alphanum or Special;
60 Char_Map : constant array (Character) of Character_Flags :=
97 Exclamation => Special,
99 Number_Sign => Special,
100 Dollar_Sign => Special,
101 Percent_Sign => Special,
102 Ampersand => Special,
103 Apostrophe => Special,
104 Left_Parenthesis => Special,
105 Right_Parenthesis => Special,
107 Plus_Sign => Special,
110 Full_Stop => Special,
113 '0' .. '9' => Digit + Hex_Digit,
116 Semicolon => Special,
117 Less_Than_Sign => Special,
118 Equals_Sign => Special,
119 Greater_Than_Sign => Special,
121 Commercial_At => Special,
123 'A' .. 'F' => Upper + Basic + Hex_Digit,
124 'G' .. 'Z' => Upper + Basic,
126 Left_Square_Bracket => Special,
127 Reverse_Solidus => Special,
128 Right_Square_Bracket => Special,
129 Circumflex => Special,
133 'a' .. 'f' => Lower + Basic + Hex_Digit,
134 'g' .. 'z' => Lower + Basic,
136 Left_Curly_Bracket => Special,
137 Vertical_Line => Special,
138 Right_Curly_Bracket => Special,
142 Reserved_128 => Control,
143 Reserved_129 => Control,
146 Reserved_132 => Control,
169 Reserved_153 => Control,
177 No_Break_Space => Special,
178 Inverted_Exclamation => Special,
179 Cent_Sign => Special,
180 Pound_Sign => Special,
181 Currency_Sign => Special,
183 Broken_Bar => Special,
184 Section_Sign => Special,
185 Diaeresis => Special,
186 Copyright_Sign => Special,
187 Feminine_Ordinal_Indicator => Special,
188 Left_Angle_Quotation => Special,
190 Soft_Hyphen => Special,
191 Registered_Trade_Mark_Sign => Special,
193 Degree_Sign => Special,
194 Plus_Minus_Sign => Special,
195 Superscript_Two => Special,
196 Superscript_Three => Special,
198 Micro_Sign => Special,
199 Pilcrow_Sign => Special,
200 Middle_Dot => Special,
202 Superscript_One => Special,
203 Masculine_Ordinal_Indicator => Special,
204 Right_Angle_Quotation => Special,
205 Fraction_One_Quarter => Special,
206 Fraction_One_Half => Special,
207 Fraction_Three_Quarters => Special,
208 Inverted_Question => Special,
212 UC_A_Circumflex => Upper,
214 UC_A_Diaeresis => Upper,
216 UC_AE_Diphthong => Upper + Basic,
217 UC_C_Cedilla => Upper,
220 UC_E_Circumflex => Upper,
221 UC_E_Diaeresis => Upper,
224 UC_I_Circumflex => Upper,
225 UC_I_Diaeresis => Upper,
226 UC_Icelandic_Eth => Upper + Basic,
230 UC_O_Circumflex => Upper,
232 UC_O_Diaeresis => Upper,
234 Multiplication_Sign => Special,
236 UC_O_Oblique_Stroke => Upper,
239 UC_U_Circumflex => Upper,
240 UC_U_Diaeresis => Upper,
242 UC_Icelandic_Thorn => Upper + Basic,
244 LC_German_Sharp_S => Lower + Basic,
247 LC_A_Circumflex => Lower,
249 LC_A_Diaeresis => Lower,
251 LC_AE_Diphthong => Lower + Basic,
252 LC_C_Cedilla => Lower,
255 LC_E_Circumflex => Lower,
256 LC_E_Diaeresis => Lower,
259 LC_I_Circumflex => Lower,
260 LC_I_Diaeresis => Lower,
261 LC_Icelandic_Eth => Lower + Basic,
265 LC_O_Circumflex => Lower,
267 LC_O_Diaeresis => Lower,
269 Division_Sign => Special,
271 LC_O_Oblique_Stroke => Lower,
274 LC_U_Circumflex => Lower,
275 LC_U_Diaeresis => Lower,
277 LC_Icelandic_Thorn => Lower + Basic,
278 LC_Y_Diaeresis => Lower
281 ---------------------
282 -- Is_Alphanumeric --
283 ---------------------
285 function Is_Alphanumeric (Item : in Character) return Boolean is
287 return (Char_Map (Item) and Alphanum) /= 0;
294 function Is_Basic (Item : in Character) return Boolean is
296 return (Char_Map (Item) and Basic) /= 0;
303 function Is_Character (Item : in Wide_Character) return Boolean is
305 return Wide_Character'Pos (Item) < 256;
312 function Is_Control (Item : in Character) return Boolean is
314 return (Char_Map (Item) and Control) /= 0;
321 function Is_Digit (Item : in Character) return Boolean is
323 return Item in '0' .. '9';
330 function Is_Graphic (Item : in Character) return Boolean is
332 return (Char_Map (Item) and Graphic) /= 0;
335 --------------------------
336 -- Is_Hexadecimal_Digit --
337 --------------------------
339 function Is_Hexadecimal_Digit (Item : in Character) return Boolean is
341 return (Char_Map (Item) and Hex_Digit) /= 0;
342 end Is_Hexadecimal_Digit;
348 function Is_ISO_646 (Item : in Character) return Boolean is
350 return Item in ISO_646;
353 -- Note: much more efficient coding of the following function is possible
354 -- by testing several 16#80# bits in a complete word in a single operation
356 function Is_ISO_646 (Item : in String) return Boolean is
358 for J in Item'Range loop
359 if Item (J) not in ISO_646 then
371 function Is_Letter (Item : in Character) return Boolean is
373 return (Char_Map (Item) and Letter) /= 0;
380 function Is_Lower (Item : in Character) return Boolean is
382 return (Char_Map (Item) and Lower) /= 0;
389 function Is_Special (Item : in Character) return Boolean is
391 return (Char_Map (Item) and Special) /= 0;
398 function Is_String (Item : in Wide_String) return Boolean is
400 for J in Item'Range loop
401 if Wide_Character'Pos (Item (J)) >= 256 then
413 function Is_Upper (Item : in Character) return Boolean is
415 return (Char_Map (Item) and Upper) /= 0;
422 function To_Basic (Item : in Character) return Character is
424 return Value (Basic_Map, Item);
427 function To_Basic (Item : in String) return String is
428 Result : String (1 .. Item'Length);
431 for J in Item'Range loop
432 Result (J - (Item'First - 1)) := Value (Basic_Map, Item (J));
442 function To_Character
443 (Item : in Wide_Character;
444 Substitute : in Character := ' ')
448 if Is_Character (Item) then
449 return Character'Val (Wide_Character'Pos (Item));
460 (Item : in Character;
461 Substitute : in ISO_646 := ' ')
465 if Item in ISO_646 then
474 Substitute : in ISO_646 := ' ')
477 Result : String (1 .. Item'Length);
480 for J in Item'Range loop
481 if Item (J) in ISO_646 then
482 Result (J - (Item'First - 1)) := Item (J);
484 Result (J - (Item'First - 1)) := Substitute;
495 function To_Lower (Item : in Character) return Character is
497 return Value (Lower_Case_Map, Item);
500 function To_Lower (Item : in String) return String is
501 Result : String (1 .. Item'Length);
504 for J in Item'Range loop
505 Result (J - (Item'First - 1)) := Value (Lower_Case_Map, Item (J));
516 (Item : in Wide_String;
517 Substitute : in Character := ' ')
520 Result : String (1 .. Item'Length);
523 for J in Item'Range loop
524 Result (J - (Item'First - 1)) := To_Character (Item (J), Substitute);
534 (Item : in Character)
538 return Value (Upper_Case_Map, Item);
545 Result : String (1 .. Item'Length);
548 for J in Item'Range loop
549 Result (J - (Item'First - 1)) := Value (Upper_Case_Map, Item (J));
555 -----------------------
556 -- To_Wide_Character --
557 -----------------------
559 function To_Wide_Character
560 (Item : in Character)
561 return Wide_Character
564 return Wide_Character'Val (Character'Pos (Item));
565 end To_Wide_Character;
571 function To_Wide_String
575 Result : Wide_String (1 .. Item'Length);
578 for J in Item'Range loop
579 Result (J - (Item'First - 1)) := To_Wide_Character (Item (J));
584 end Ada.Characters.Handling;