1 // 2000-08-22 Benjamin Kosnik <bkoz@cygnus.com>
3 // Copyright (C) 2000, 2001, 2002 Free Software Foundation
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 2, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // You should have received a copy of the GNU General Public License along
17 // with this library; see the file COPYING. If not, write to the Free
18 // Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
21 // 22.2.1.5 - Template class codecvt [lib.locale.codecvt]
24 #include <testsuite_hooks.h>
28 #ifdef _GLIBCPP_USE___ENC_TRAITS
30 // Need some char_traits specializations for this to work.
31 typedef unsigned short unicode_t;
36 struct char_traits<unicode_t>
38 typedef unicode_t char_type;
39 // Unsigned as wint_t is unsigned.
40 typedef unsigned long int_type;
41 typedef streampos pos_type;
42 typedef streamoff off_type;
43 typedef mbstate_t state_type;
46 assign(char_type& __c1, const char_type& __c2);
49 eq(const char_type& __c1, const char_type& __c2);
52 lt(const char_type& __c1, const char_type& __c2);
55 compare(const char_type* __s1, const char_type* __s2, size_t __n)
56 { return memcmp(__s1, __s2, __n); }
59 length(const char_type* __s);
61 static const char_type*
62 find(const char_type* __s, size_t __n, const char_type& __a);
65 move(char_type* __s1, const char_type* __s2, size_t __n);
68 copy(char_type* __s1, const char_type* __s2, size_t __n)
69 { return static_cast<char_type*>(memcpy(__s1, __s2, __n)); }
72 assign(char_type* __s, size_t __n, char_type __a);
75 to_char_type(const int_type& __c);
78 to_int_type(const char_type& __c);
81 eq_int_type(const int_type& __c1, const int_type& __c2);
87 not_eof(const int_type& __c);
92 > how do I check that these conversions are correct?
93 Very easy. Since all the characters are from ASCII you simply
94 zero-extend the values.
96 drepper$ echo 'black pearl jasmine tea' | od -t x1
97 0000000 62 6c 61 63 6b 20 70 65 61 72 6c 20 6a 61 73 6d
98 0000020 69 6e 65 20 74 65 61 0a
100 So the UCS-2 string is
102 0x0062, 0x006c, 0x0061, ...
104 You get the idea. With iconv() you have to take care of the
105 byte-order, though. UCS-2 can mean little- or big endian. Looking at
110 it shows that the other byte-order is used (25856 == 0x6500).
115 initialize_state(std::__enc_traits& state)
118 // Partial specialization using __enc_traits.
119 // codecvt<unicode_t, char, __enc_traits>
120 // UNICODE - UCS2 (big endian)
124 typedef codecvt_base::result result;
125 typedef unicode_t int_type;
126 typedef char ext_type;
127 typedef __enc_traits enc_type;
128 typedef codecvt<int_type, ext_type, enc_type> unicode_codecvt;
129 typedef char_traits<int_type> int_traits;
130 typedef char_traits<ext_type> ext_traits;
133 const ext_type* e_lit = "black pearl jasmine tea";
134 int size = strlen(e_lit);
136 char i_lit_base[50] __attribute__((aligned(__alignof__(int_type)))) =
138 0x00, 0x62, 0x00, 0x6c, 0x00, 0x61, 0x00, 0x63, 0x00, 0x6b, 0x00, 0x20,
139 0x00, 0x70, 0x00, 0x65, 0x00, 0x61, 0x00, 0x72, 0x00, 0x6c, 0x00, 0x20,
140 0x00, 0x6a, 0x00, 0x61, 0x00, 0x73, 0x00, 0x6d, 0x00, 0x69, 0x00, 0x6e,
141 0x00, 0x65, 0x00, 0x20, 0x00, 0x74, 0x00, 0x65, 0x00, 0x61, 0x00, 0xa0
143 const int_type* i_lit = reinterpret_cast<int_type*>(i_lit_base);
145 const ext_type* efrom_next;
146 const int_type* ifrom_next;
147 ext_type* e_arr = new ext_type[size + 1];
149 int_type* i_arr = new int_type[size + 1];
152 // construct a locale object with the specialized facet.
153 locale loc(locale::classic(), new unicode_codecvt);
154 // sanity check the constructed locale has the specialized facet.
155 VERIFY( has_facet<unicode_codecvt>(loc) );
156 const unicode_codecvt& cvt = use_facet<unicode_codecvt>(loc);
159 unicode_codecvt::state_type state01("UCS-2BE", "ISO-8859-15", 0xfeff, 0);
160 initialize_state(state01);
161 // internal encoding is bigger because of bom
162 result r1 = cvt.in(state01, e_lit, e_lit + size, efrom_next,
163 i_arr, i_arr + size + 1, ito_next);
164 VERIFY( r1 == codecvt_base::ok );
165 VERIFY( !int_traits::compare(i_arr, i_lit, size) );
166 VERIFY( efrom_next == e_lit + size );
167 VERIFY( ito_next == i_arr + size );
170 unicode_codecvt::state_type state02("UCS-2BE", "ISO-8859-15", 0xfeff, 0);
171 initialize_state(state02);
172 result r2 = cvt.out(state02, i_lit, i_lit + size, ifrom_next,
173 e_arr, e_arr + size, eto_next);
174 VERIFY( r2 == codecvt_base::ok );
175 VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
176 VERIFY( ifrom_next == i_lit + size );
177 VERIFY( eto_next == e_arr + size );
180 ext_traits::copy(e_arr, e_lit, size);
181 unicode_codecvt::state_type state03("UCS-2BE", "ISO-8859-15", 0xfeff, 0);
182 initialize_state(state03);
183 result r3 = cvt.unshift(state03, e_arr, e_arr + size, eto_next);
184 VERIFY( r3 == codecvt_base::noconv );
185 VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
186 VERIFY( eto_next == e_arr );
188 int i = cvt.encoding();
189 VERIFY( i == 2 ); // Target-dependent.
191 VERIFY( !cvt.always_noconv() );
193 unicode_codecvt::state_type state04("UCS-2BE", "ISO-8859-15", 0xfeff, 0);
194 initialize_state(state04);
195 int j = cvt.length(state03, e_lit, e_lit + size, 5);
198 int k = cvt.max_length();
205 // Partial specialization using __enc_traits.
206 // codecvt<unicode_t, char, __enc_traits>
207 // UNICODE - UCS2 (little endian)
211 typedef codecvt_base::result result;
212 typedef unsigned short unicode_t;
213 typedef unicode_t int_type;
214 typedef char ext_type;
215 typedef __enc_traits enc_type;
216 typedef codecvt<int_type, ext_type, enc_type> unicode_codecvt;
217 typedef char_traits<int_type> int_traits;
218 typedef char_traits<ext_type> ext_traits;
221 const ext_type* e_lit = "black pearl jasmine tea";
222 int size = strlen(e_lit);
224 char i_lit_base[50] __attribute__((aligned(__alignof__(int_type)))) =
226 0x62, 0x00, 0x6c, 0x00, 0x61, 0x00, 0x63, 0x00, 0x6b, 0x00, 0x20, 0x00,
227 0x70, 0x00, 0x65, 0x00, 0x61, 0x00, 0x72, 0x00, 0x6c, 0x00, 0x20, 0x00,
228 0x6a, 0x00, 0x61, 0x00, 0x73, 0x00, 0x6d, 0x00, 0x69, 0x00, 0x6e, 0x00,
229 0x65, 0x00, 0x20, 0x00, 0x74, 0x00, 0x65, 0x00, 0x61, 0x00, 0xa0, 0x00
231 const int_type* i_lit = reinterpret_cast<int_type*>(i_lit_base);
233 const ext_type* efrom_next;
234 const int_type* ifrom_next;
235 ext_type* e_arr = new ext_type[size + 1];
237 int_type* i_arr = new int_type[size + 1];
240 // construct a locale object with the specialized facet.
241 locale loc(locale::classic(), new unicode_codecvt);
242 // sanity check the constructed locale has the specialized facet.
243 VERIFY( has_facet<unicode_codecvt>(loc) );
244 const unicode_codecvt& cvt = use_facet<unicode_codecvt>(loc);
247 unicode_codecvt::state_type state01("UCS-2LE", "ISO-8859-15", 0xfeff, 0);
248 initialize_state(state01);
249 // internal encoding is bigger because of bom
250 result r1 = cvt.in(state01, e_lit, e_lit + size, efrom_next,
251 i_arr, i_arr + size + 1, ito_next);
252 VERIFY( r1 == codecvt_base::ok );
253 VERIFY( !int_traits::compare(i_arr, i_lit, size) );
254 VERIFY( efrom_next == e_lit + size );
255 VERIFY( ito_next == i_arr + size );
258 unicode_codecvt::state_type state02("UCS-2LE", "ISO-8859-15", 0xfeff, 0);
259 initialize_state(state02);
260 result r2 = cvt.out(state02, i_lit, i_lit + size, ifrom_next,
261 e_arr, e_arr + size, eto_next);
262 VERIFY( r2 == codecvt_base::ok );
263 VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
264 VERIFY( ifrom_next == i_lit + size );
265 VERIFY( eto_next == e_arr + size );
268 ext_traits::copy(e_arr, e_lit, size);
269 unicode_codecvt::state_type state03("UCS-2LE", "ISO-8859-15", 0xfeff, 0);
270 initialize_state(state03);
271 result r3 = cvt.unshift(state03, e_arr, e_arr + size, eto_next);
272 VERIFY( r3 == codecvt_base::noconv );
273 VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
274 VERIFY( eto_next == e_arr );
276 int i = cvt.encoding();
277 VERIFY( i == 2 ); // Target-dependent.
279 VERIFY( !cvt.always_noconv() );
281 unicode_codecvt::state_type state04("UCS-2LE", "ISO-8859-15", 0xfeff, 0);
282 initialize_state(state04);
283 int j = cvt.length(state03, e_lit, e_lit + size, 5);
286 int k = cvt.max_length();
293 #endif // _GLIBCPP_USE___ENC_TRAITS
297 #if _GLIBCPP_USE___ENC_TRAITS