libstdc++-v3/testsuite/22_locale/codecvt_members_unicode_char.cc

   1 // 2000-08-22 Benjamin Kosnik <bkoz@cygnus.com>
   2
   3 // Copyright (C) 2000, 2001, 2002 Free Software Foundation
   4 //
   5 // This file is part of the GNU ISO C++ Library.  This library is free
   6 // software; you can redistribute it and/or modify it under the
   7 // terms of the GNU General Public License as published by the
   8 // Free Software Foundation; either version 2, or (at your option)
   9 // any later version.
  10
  11 // This library is distributed in the hope that it will be useful,
  12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 // GNU General Public License for more details.
  15
  16 // You should have received a copy of the GNU General Public License along
  17 // with this library; see the file COPYING.  If not, write to the Free
  18 // Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
  19 // USA.
  20
  21 // 22.2.1.5 - Template class codecvt [lib.locale.codecvt]
  22
  23 #include <locale>
  24 #include <testsuite_hooks.h>
  25
  26
  27
  28 #ifdef _GLIBCPP_USE___ENC_TRAITS
  29
  30 // Need some char_traits specializations for this to work.
  31 typedef unsigned short                  unicode_t;
  32
  33 namespace std
  34 {
  35   template<>
  36     struct char_traits<unicode_t>
  37     {
  38       typedef unicode_t         char_type;
  39       // Unsigned as wint_t is unsigned.
  40       typedef unsigned long     int_type;
  41       typedef streampos         pos_type;
  42       typedef streamoff         off_type;
  43       typedef mbstate_t         state_type;
  44
  45       static void
  46       assign(char_type& __c1, const char_type& __c2);
  47
  48       static bool
  49       eq(const char_type& __c1, const char_type& __c2);
  50
  51       static bool
  52       lt(const char_type& __c1, const char_type& __c2);
  53
  54       static int
  55       compare(const char_type* __s1, const char_type* __s2, size_t __n)
  56       { return memcmp(__s1, __s2, __n); }
  57
  58       static size_t
  59       length(const char_type* __s);
  60
  61       static const char_type*
  62       find(const char_type* __s, size_t __n, const char_type& __a);
  63
  64       static char_type*
  65       move(char_type* __s1, const char_type* __s2, size_t __n);
  66
  67       static char_type*
  68       copy(char_type* __s1, const char_type* __s2, size_t __n)
  69       {  return static_cast<char_type*>(memcpy(__s1, __s2, __n)); }
  70
  71       static char_type*
  72       assign(char_type* __s, size_t __n, char_type __a);
  73
  74       static char_type
  75       to_char_type(const int_type& __c);
  76
  77       static int_type
  78       to_int_type(const char_type& __c);
  79
  80       static bool
  81       eq_int_type(const int_type& __c1, const int_type& __c2);
  82
  83       static int_type
  84       eof();
  85
  86       static int_type
  87       not_eof(const int_type& __c);
  88     };
  89 }
  90
  91 /*
  92 > how do I check that these conversions are correct?
  93 Very easy.  Since all the characters are from ASCII you simply
  94 zero-extend the values.
  95
  96 drepper$ echo 'black pearl jasmine tea' | od -t x1
  97 0000000 62 6c 61 63 6b 20 70 65 61 72 6c 20 6a 61 73 6d
  98 0000020 69 6e 65 20 74 65 61 0a
  99
 100 So the UCS-2 string is
 101
 102 0x0062, 0x006c, 0x0061, ...
 103
 104 You get the idea.  With iconv() you have to take care of the
 105 byte-order, though.  UCS-2 can mean little- or big endian.  Looking at
 106 your result
 107
 108 > $9 = 25856
 109
 110 it shows that the other byte-order is used (25856 == 0x6500).
 111 */
 112
 113
 114 void
 115 initialize_state(std::__enc_traits& state)
 116 { state._M_init(); }
 117
 118 // Partial specialization using __enc_traits.
 119 // codecvt<unicode_t, char, __enc_traits>
 120 // UNICODE - UCS2 (big endian)
 121 void test01()
 122 {
 123   using namespace std;
 124   typedef codecvt_base::result                  result;
 125   typedef unicode_t                             int_type;
 126   typedef char                                  ext_type;
 127   typedef __enc_traits                          enc_type;
 128   typedef codecvt<int_type, ext_type, enc_type> unicode_codecvt;
 129   typedef char_traits<int_type>                 int_traits;
 130   typedef char_traits<ext_type>                 ext_traits;
 131
 132   bool                  test = true;
 133   const ext_type*       e_lit = "black pearl jasmine tea";
 134   int                   size = strlen(e_lit);
 135
 136   char  i_lit_base[50] __attribute__((aligned(__alignof__(int_type)))) =
 137   {
 138     0x00, 0x62, 0x00, 0x6c, 0x00, 0x61, 0x00, 0x63, 0x00, 0x6b, 0x00, 0x20,
 139     0x00, 0x70, 0x00, 0x65, 0x00, 0x61, 0x00, 0x72, 0x00, 0x6c, 0x00, 0x20,
 140     0x00, 0x6a, 0x00, 0x61, 0x00, 0x73, 0x00, 0x6d, 0x00, 0x69, 0x00, 0x6e,
 141     0x00, 0x65, 0x00, 0x20, 0x00, 0x74, 0x00, 0x65, 0x00, 0x61, 0x00, 0xa0
 142   };
 143   const int_type*       i_lit = reinterpret_cast<int_type*>(i_lit_base);
 144
 145   const ext_type*       efrom_next;
 146   const int_type*       ifrom_next;
 147   ext_type*             e_arr = new ext_type[size + 1];
 148   ext_type*             eto_next;
 149   int_type*             i_arr = new int_type[size + 1];
 150   int_type*             ito_next;
 151
 152   // construct a locale object with the specialized facet.
 153   locale                loc(locale::classic(), new unicode_codecvt);
 154   // sanity check the constructed locale has the specialized facet.
 155   VERIFY( has_facet<unicode_codecvt>(loc) );
 156   const unicode_codecvt&        cvt = use_facet<unicode_codecvt>(loc);
 157
 158   // in
 159   unicode_codecvt::state_type state01("UCS-2BE", "ISO-8859-15", 0xfeff, 0);
 160   initialize_state(state01);
 161   // internal encoding is bigger because of bom
 162   result r1 = cvt.in(state01, e_lit, e_lit + size, efrom_next,
 163                      i_arr, i_arr + size + 1, ito_next);
 164   VERIFY( r1 == codecvt_base::ok );
 165   VERIFY( !int_traits::compare(i_arr, i_lit, size) );
 166   VERIFY( efrom_next == e_lit + size );
 167   VERIFY( ito_next == i_arr + size );
 168
 169   // out
 170   unicode_codecvt::state_type state02("UCS-2BE", "ISO-8859-15", 0xfeff, 0);
 171   initialize_state(state02);
 172   result r2 = cvt.out(state02, i_lit, i_lit + size, ifrom_next,
 173                        e_arr, e_arr + size, eto_next);
 174   VERIFY( r2 == codecvt_base::ok );
 175   VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
 176   VERIFY( ifrom_next == i_lit + size );
 177   VERIFY( eto_next == e_arr + size );
 178
 179   // unshift
 180   ext_traits::copy(e_arr, e_lit, size);
 181   unicode_codecvt::state_type state03("UCS-2BE", "ISO-8859-15", 0xfeff, 0);
 182   initialize_state(state03);
 183   result r3 = cvt.unshift(state03, e_arr, e_arr + size, eto_next);
 184   VERIFY( r3 == codecvt_base::noconv );
 185   VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
 186   VERIFY( eto_next == e_arr );
 187
 188   int i = cvt.encoding();
 189   VERIFY( i == 2 ); // Target-dependent.
 190
 191   VERIFY( !cvt.always_noconv() );
 192
 193   unicode_codecvt::state_type state04("UCS-2BE", "ISO-8859-15", 0xfeff, 0);
 194   initialize_state(state04);
 195   int j = cvt.length(state03, e_lit, e_lit + size, 5);
 196   VERIFY( j == 5 );
 197
 198   int k = cvt.max_length();
 199   VERIFY( k == 1 );
 200
 201   delete [] e_arr;
 202   delete [] i_arr;
 203 }
 204
 205 // Partial specialization using __enc_traits.
 206 // codecvt<unicode_t, char, __enc_traits>
 207 // UNICODE - UCS2 (little endian)
 208 void test02()
 209 {
 210   using namespace std;
 211   typedef codecvt_base::result                  result;
 212   typedef unsigned short                        unicode_t;
 213   typedef unicode_t                             int_type;
 214   typedef char                                  ext_type;
 215   typedef __enc_traits                          enc_type;
 216   typedef codecvt<int_type, ext_type, enc_type> unicode_codecvt;
 217   typedef char_traits<int_type>                 int_traits;
 218   typedef char_traits<ext_type>                 ext_traits;
 219
 220   bool                  test = true;
 221   const ext_type*       e_lit = "black pearl jasmine tea";
 222   int                   size = strlen(e_lit);
 223
 224   char  i_lit_base[50] __attribute__((aligned(__alignof__(int_type)))) =
 225   {
 226     0x62, 0x00, 0x6c, 0x00, 0x61, 0x00, 0x63, 0x00, 0x6b, 0x00, 0x20, 0x00,
 227     0x70, 0x00, 0x65, 0x00, 0x61, 0x00, 0x72, 0x00, 0x6c, 0x00, 0x20, 0x00,
 228     0x6a, 0x00, 0x61, 0x00, 0x73, 0x00, 0x6d, 0x00, 0x69, 0x00, 0x6e, 0x00,
 229     0x65, 0x00, 0x20, 0x00, 0x74, 0x00, 0x65, 0x00, 0x61, 0x00, 0xa0, 0x00
 230   };
 231   const int_type*       i_lit = reinterpret_cast<int_type*>(i_lit_base);
 232
 233   const ext_type*       efrom_next;
 234   const int_type*       ifrom_next;
 235   ext_type*             e_arr = new ext_type[size + 1];
 236   ext_type*             eto_next;
 237   int_type*             i_arr = new int_type[size + 1];
 238   int_type*             ito_next;
 239
 240   // construct a locale object with the specialized facet.
 241   locale                loc(locale::classic(), new unicode_codecvt);
 242   // sanity check the constructed locale has the specialized facet.
 243   VERIFY( has_facet<unicode_codecvt>(loc) );
 244   const unicode_codecvt&        cvt = use_facet<unicode_codecvt>(loc);
 245
 246   // in
 247   unicode_codecvt::state_type state01("UCS-2LE", "ISO-8859-15", 0xfeff, 0);
 248   initialize_state(state01);
 249   // internal encoding is bigger because of bom
 250   result r1 = cvt.in(state01, e_lit, e_lit + size, efrom_next,
 251                      i_arr, i_arr + size + 1, ito_next);
 252   VERIFY( r1 == codecvt_base::ok );
 253   VERIFY( !int_traits::compare(i_arr, i_lit, size) );
 254   VERIFY( efrom_next == e_lit + size );
 255   VERIFY( ito_next == i_arr + size );
 256
 257   // out
 258   unicode_codecvt::state_type state02("UCS-2LE", "ISO-8859-15", 0xfeff, 0);
 259   initialize_state(state02);
 260   result r2 = cvt.out(state02, i_lit, i_lit + size, ifrom_next,
 261                        e_arr, e_arr + size, eto_next);
 262   VERIFY( r2 == codecvt_base::ok );
 263   VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
 264   VERIFY( ifrom_next == i_lit + size );
 265   VERIFY( eto_next == e_arr + size );
 266
 267   // unshift
 268   ext_traits::copy(e_arr, e_lit, size);
 269   unicode_codecvt::state_type state03("UCS-2LE", "ISO-8859-15", 0xfeff, 0);
 270   initialize_state(state03);
 271   result r3 = cvt.unshift(state03, e_arr, e_arr + size, eto_next);
 272   VERIFY( r3 == codecvt_base::noconv );
 273   VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
 274   VERIFY( eto_next == e_arr );
 275
 276   int i = cvt.encoding();
 277   VERIFY( i == 2 ); // Target-dependent.
 278
 279   VERIFY( !cvt.always_noconv() );
 280
 281   unicode_codecvt::state_type state04("UCS-2LE", "ISO-8859-15", 0xfeff, 0);
 282   initialize_state(state04);
 283   int j = cvt.length(state03, e_lit, e_lit + size, 5);
 284   VERIFY( j == 5 );
 285
 286   int k = cvt.max_length();
 287   VERIFY( k == 1 );
 288
 289   delete [] e_arr;
 290   delete [] i_arr;
 291 }
 292
 293 #endif // _GLIBCPP_USE___ENC_TRAITS
 294
 295 int main ()
 296 {
 297 #if _GLIBCPP_USE___ENC_TRAITS
 298   test01();
 299   test02();
 300 #endif
 301
 302   return 0;
 303 }