1 ------------------------------------------------------------------------------
3 -- GNAT RUN-TIME COMPONENTS --
5 -- G N A T . E N C O D E _ S T R I N G --
9 -- Copyright (C) 2007, AdaCore --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 51 Franklin Street, Fifth Floor, --
20 -- Boston, MA 02110-1301, USA. --
22 -- As a special exception, if other files instantiate generics from this --
23 -- unit, or you link this unit with other files to produce an executable, --
24 -- this unit does not by itself cause the resulting executable to be --
25 -- covered by the GNU General Public License. This exception does not --
26 -- however invalidate any other reasons why the executable file might be --
27 -- covered by the GNU Public License. --
29 -- GNAT was originally developed by the GNAT team at New York University. --
30 -- Extensive contributions were provided by Ada Core Technologies Inc. --
32 ------------------------------------------------------------------------------
34 with Interfaces; use Interfaces;
36 with System.WCh_Con; use System.WCh_Con;
37 with System.WCh_Cnv; use System.WCh_Cnv;
39 package body GNAT.Encode_String is
41 -----------------------
42 -- Local Subprograms --
43 -----------------------
46 pragma No_Return (Bad);
47 -- Raise error for bad character code
50 pragma No_Return (Past_End);
51 -- Raise error for off end of string
59 raise Constraint_Error with
60 "character cannot be encoded with given Encoding_Method";
63 ------------------------
64 -- Encode_Wide_String --
65 ------------------------
67 function Encode_Wide_String (S : Wide_String) return String is
68 Long : constant Natural := WC_Longest_Sequences (Encoding_Method);
69 Result : String (1 .. S'Length * Long);
72 Encode_Wide_String (S, Result, Length);
73 return Result (1 .. Length);
74 end Encode_Wide_String;
76 procedure Encode_Wide_String
86 Encode_Wide_Character (S (J), Result, Ptr);
89 Length := Ptr - S'First;
90 end Encode_Wide_String;
92 -----------------------------
93 -- Encode_Wide_Wide_String --
94 -----------------------------
96 function Encode_Wide_Wide_String (S : Wide_Wide_String) return String is
97 Long : constant Natural := WC_Longest_Sequences (Encoding_Method);
98 Result : String (1 .. S'Length * Long);
101 Encode_Wide_Wide_String (S, Result, Length);
102 return Result (1 .. Length);
103 end Encode_Wide_Wide_String;
105 procedure Encode_Wide_Wide_String
106 (S : Wide_Wide_String;
108 Length : out Natural)
114 for J in S'Range loop
115 Encode_Wide_Wide_Character (S (J), Result, Ptr);
118 Length := Ptr - S'First;
119 end Encode_Wide_Wide_String;
121 ---------------------------
122 -- Encode_Wide_Character --
123 ---------------------------
125 procedure Encode_Wide_Character
126 (Char : Wide_Character;
127 Result : in out String;
128 Ptr : in out Natural)
131 Encode_Wide_Wide_Character
132 (Wide_Wide_Character'Val (Wide_Character'Pos (Char)), Result, Ptr);
135 when Constraint_Error =>
137 end Encode_Wide_Character;
139 --------------------------------
140 -- Encode_Wide_Wide_Character --
141 --------------------------------
143 procedure Encode_Wide_Wide_Character
144 (Char : Wide_Wide_Character;
145 Result : in out String;
146 Ptr : in out Natural)
150 procedure Out_Char (C : Character);
151 pragma Inline (Out_Char);
152 -- Procedure to store one character for instantiation below
158 procedure Out_Char (C : Character) is
160 if Ptr > Result'Last then
168 -- Start of processing for Encode_Wide_Wide_Character;
171 -- Efficient code for UTF-8 case
173 if Encoding_Method = WCEM_UTF8 then
175 -- Note: for details of UTF8 encoding see RFC 3629
177 U := Unsigned_32 (Wide_Wide_Character'Pos (Char));
179 -- 16#00_0000#-16#00_007F#: 0xxxxxxx
181 if U <= 16#00_007F# then
182 Out_Char (Character'Val (U));
184 -- 16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx
186 elsif U <= 16#00_07FF# then
187 Out_Char (Character'Val (2#11000000# or Shift_Right (U, 6)));
188 Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
190 -- 16#00_0800#-16#00_FFFF#: 1110xxxx 10xxxxxx 10xxxxxx
192 elsif U <= 16#00_FFFF# then
193 Out_Char (Character'Val (2#11100000# or Shift_Right (U, 12)));
194 Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
196 Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
198 -- 16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
200 elsif U <= 16#10_FFFF# then
201 Out_Char (Character'Val (2#11110000# or Shift_Right (U, 18)));
202 Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12)
204 Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
206 Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
208 -- 16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx
211 elsif U <= 16#03FF_FFFF# then
212 Out_Char (Character'Val (2#11111000# or Shift_Right (U, 24)));
213 Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 18)
215 Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12)
217 Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
219 Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
221 -- All other cases are invalid character codes, not this includes:
223 -- 16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx
224 -- 10xxxxxx 10xxxxxx 10xxxxxx
226 -- since Wide_Wide_Character values cannot exceed 16#3F_FFFF#
232 -- All encoding methods other than UTF-8
236 procedure UTF_32_To_String is
237 new UTF_32_To_Char_Sequence (Out_Char);
238 -- Instantiate conversion procedure with above Out_Char routine
242 (UTF_32_Code (Wide_Wide_Character'Pos (Char)), Encoding_Method);
245 when Constraint_Error =>
249 end Encode_Wide_Wide_Character;
255 procedure Past_End is
257 raise Constraint_Error with "past end of string";
260 end GNAT.Encode_String;