1 ------------------------------------------------------------------------------
3 -- GNAT RUN-TIME COMPONENTS --
5 -- A D A . C H A R A C T E R S . H A N D L I N G --
9 -- Copyright (C) 1992-2005, Free Software Foundation, Inc. --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 51 Franklin Street, Fifth Floor, --
20 -- Boston, MA 02110-1301, USA. --
22 -- As a special exception, if other files instantiate generics from this --
23 -- unit, or you link this unit with other files to produce an executable, --
24 -- this unit does not by itself cause the resulting executable to be --
25 -- covered by the GNU General Public License. This exception does not --
26 -- however invalidate any other reasons why the executable file might be --
27 -- covered by the GNU Public License. --
29 -- GNAT was originally developed by the GNAT team at New York University. --
30 -- Extensive contributions were provided by Ada Core Technologies Inc. --
32 ------------------------------------------------------------------------------
34 with Ada.Characters.Latin_1; use Ada.Characters.Latin_1;
35 with Ada.Strings.Maps; use Ada.Strings.Maps;
36 with Ada.Strings.Maps.Constants; use Ada.Strings.Maps.Constants;
38 package body Ada.Characters.Handling is
40 ------------------------------------
41 -- Character Classification Table --
42 ------------------------------------
44 type Character_Flags is mod 256;
45 for Character_Flags'Size use 8;
47 Control : constant Character_Flags := 1;
48 Lower : constant Character_Flags := 2;
49 Upper : constant Character_Flags := 4;
50 Basic : constant Character_Flags := 8;
51 Hex_Digit : constant Character_Flags := 16;
52 Digit : constant Character_Flags := 32;
53 Special : constant Character_Flags := 64;
55 Letter : constant Character_Flags := Lower or Upper;
56 Alphanum : constant Character_Flags := Letter or Digit;
57 Graphic : constant Character_Flags := Alphanum or Special;
59 Char_Map : constant array (Character) of Character_Flags :=
96 Exclamation => Special,
98 Number_Sign => Special,
99 Dollar_Sign => Special,
100 Percent_Sign => Special,
101 Ampersand => Special,
102 Apostrophe => Special,
103 Left_Parenthesis => Special,
104 Right_Parenthesis => Special,
106 Plus_Sign => Special,
109 Full_Stop => Special,
112 '0' .. '9' => Digit + Hex_Digit,
115 Semicolon => Special,
116 Less_Than_Sign => Special,
117 Equals_Sign => Special,
118 Greater_Than_Sign => Special,
120 Commercial_At => Special,
122 'A' .. 'F' => Upper + Basic + Hex_Digit,
123 'G' .. 'Z' => Upper + Basic,
125 Left_Square_Bracket => Special,
126 Reverse_Solidus => Special,
127 Right_Square_Bracket => Special,
128 Circumflex => Special,
132 'a' .. 'f' => Lower + Basic + Hex_Digit,
133 'g' .. 'z' => Lower + Basic,
135 Left_Curly_Bracket => Special,
136 Vertical_Line => Special,
137 Right_Curly_Bracket => Special,
141 Reserved_128 => Control,
142 Reserved_129 => Control,
145 Reserved_132 => Control,
168 Reserved_153 => Control,
176 No_Break_Space => Special,
177 Inverted_Exclamation => Special,
178 Cent_Sign => Special,
179 Pound_Sign => Special,
180 Currency_Sign => Special,
182 Broken_Bar => Special,
183 Section_Sign => Special,
184 Diaeresis => Special,
185 Copyright_Sign => Special,
186 Feminine_Ordinal_Indicator => Special,
187 Left_Angle_Quotation => Special,
189 Soft_Hyphen => Special,
190 Registered_Trade_Mark_Sign => Special,
192 Degree_Sign => Special,
193 Plus_Minus_Sign => Special,
194 Superscript_Two => Special,
195 Superscript_Three => Special,
197 Micro_Sign => Special,
198 Pilcrow_Sign => Special,
199 Middle_Dot => Special,
201 Superscript_One => Special,
202 Masculine_Ordinal_Indicator => Special,
203 Right_Angle_Quotation => Special,
204 Fraction_One_Quarter => Special,
205 Fraction_One_Half => Special,
206 Fraction_Three_Quarters => Special,
207 Inverted_Question => Special,
211 UC_A_Circumflex => Upper,
213 UC_A_Diaeresis => Upper,
215 UC_AE_Diphthong => Upper + Basic,
216 UC_C_Cedilla => Upper,
219 UC_E_Circumflex => Upper,
220 UC_E_Diaeresis => Upper,
223 UC_I_Circumflex => Upper,
224 UC_I_Diaeresis => Upper,
225 UC_Icelandic_Eth => Upper + Basic,
229 UC_O_Circumflex => Upper,
231 UC_O_Diaeresis => Upper,
233 Multiplication_Sign => Special,
235 UC_O_Oblique_Stroke => Upper,
238 UC_U_Circumflex => Upper,
239 UC_U_Diaeresis => Upper,
241 UC_Icelandic_Thorn => Upper + Basic,
243 LC_German_Sharp_S => Lower + Basic,
246 LC_A_Circumflex => Lower,
248 LC_A_Diaeresis => Lower,
250 LC_AE_Diphthong => Lower + Basic,
251 LC_C_Cedilla => Lower,
254 LC_E_Circumflex => Lower,
255 LC_E_Diaeresis => Lower,
258 LC_I_Circumflex => Lower,
259 LC_I_Diaeresis => Lower,
260 LC_Icelandic_Eth => Lower + Basic,
264 LC_O_Circumflex => Lower,
266 LC_O_Diaeresis => Lower,
268 Division_Sign => Special,
270 LC_O_Oblique_Stroke => Lower,
273 LC_U_Circumflex => Lower,
274 LC_U_Diaeresis => Lower,
276 LC_Icelandic_Thorn => Lower + Basic,
277 LC_Y_Diaeresis => Lower
280 ---------------------
281 -- Is_Alphanumeric --
282 ---------------------
284 function Is_Alphanumeric (Item : Character) return Boolean is
286 return (Char_Map (Item) and Alphanum) /= 0;
293 function Is_Basic (Item : Character) return Boolean is
295 return (Char_Map (Item) and Basic) /= 0;
302 function Is_Character (Item : Wide_Character) return Boolean is
304 return Wide_Character'Pos (Item) < 256;
311 function Is_Control (Item : Character) return Boolean is
313 return (Char_Map (Item) and Control) /= 0;
320 function Is_Digit (Item : Character) return Boolean is
322 return Item in '0' .. '9';
329 function Is_Graphic (Item : Character) return Boolean is
331 return (Char_Map (Item) and Graphic) /= 0;
334 --------------------------
335 -- Is_Hexadecimal_Digit --
336 --------------------------
338 function Is_Hexadecimal_Digit (Item : Character) return Boolean is
340 return (Char_Map (Item) and Hex_Digit) /= 0;
341 end Is_Hexadecimal_Digit;
347 function Is_ISO_646 (Item : Character) return Boolean is
349 return Item in ISO_646;
352 -- Note: much more efficient coding of the following function is possible
353 -- by testing several 16#80# bits in a complete word in a single operation
355 function Is_ISO_646 (Item : String) return Boolean is
357 for J in Item'Range loop
358 if Item (J) not in ISO_646 then
370 function Is_Letter (Item : Character) return Boolean is
372 return (Char_Map (Item) and Letter) /= 0;
379 function Is_Lower (Item : Character) return Boolean is
381 return (Char_Map (Item) and Lower) /= 0;
388 function Is_Special (Item : Character) return Boolean is
390 return (Char_Map (Item) and Special) /= 0;
397 function Is_String (Item : Wide_String) return Boolean is
399 for J in Item'Range loop
400 if Wide_Character'Pos (Item (J)) >= 256 then
412 function Is_Upper (Item : Character) return Boolean is
414 return (Char_Map (Item) and Upper) /= 0;
421 function To_Basic (Item : Character) return Character is
423 return Value (Basic_Map, Item);
426 function To_Basic (Item : String) return String is
427 Result : String (1 .. Item'Length);
430 for J in Item'Range loop
431 Result (J - (Item'First - 1)) := Value (Basic_Map, Item (J));
441 function To_Character
442 (Item : Wide_Character;
443 Substitute : Character := ' ') return Character
446 if Is_Character (Item) then
447 return Character'Val (Wide_Character'Pos (Item));
459 Substitute : ISO_646 := ' ') return ISO_646
462 if Item in ISO_646 then
471 Substitute : ISO_646 := ' ') return String
473 Result : String (1 .. Item'Length);
476 for J in Item'Range loop
477 if Item (J) in ISO_646 then
478 Result (J - (Item'First - 1)) := Item (J);
480 Result (J - (Item'First - 1)) := Substitute;
491 function To_Lower (Item : Character) return Character is
493 return Value (Lower_Case_Map, Item);
496 function To_Lower (Item : String) return String is
497 Result : String (1 .. Item'Length);
500 for J in Item'Range loop
501 Result (J - (Item'First - 1)) := Value (Lower_Case_Map, Item (J));
513 Substitute : Character := ' ') return String
515 Result : String (1 .. Item'Length);
518 for J in Item'Range loop
519 Result (J - (Item'First - 1)) := To_Character (Item (J), Substitute);
530 (Item : Character) return Character
533 return Value (Upper_Case_Map, Item);
537 (Item : String) return String
539 Result : String (1 .. Item'Length);
542 for J in Item'Range loop
543 Result (J - (Item'First - 1)) := Value (Upper_Case_Map, Item (J));
549 -----------------------
550 -- To_Wide_Character --
551 -----------------------
553 function To_Wide_Character
554 (Item : Character) return Wide_Character
557 return Wide_Character'Val (Character'Pos (Item));
558 end To_Wide_Character;
564 function To_Wide_String
565 (Item : String) return Wide_String
567 Result : Wide_String (1 .. Item'Length);
570 for J in Item'Range loop
571 Result (J - (Item'First - 1)) := To_Wide_Character (Item (J));
577 end Ada.Characters.Handling;