2 /* Copyright (C) 2002, 2003, 2004 Manuel Novoa III
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, write to the Free
16 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 /* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION!
21 * Besides uClibc, I'm using this code in my libc for elks, which is
22 * a 16-bit environment with a fairly limited compiler. It would make
23 * things much easier for me if this file isn't modified unnecessarily.
24 * In particular, please put any new or replacement functions somewhere
25 * else, and modify the makefile to use your version instead.
28 * ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */
31 /* May 23, 2002 Initial Notes:
33 * I'm still tweaking this stuff, but it passes the tests I've thrown
34 * at it, and Erik needs it for the gcc port. The glibc extension
35 * __wcsnrtombs() hasn't been tested, as I didn't find a test for it
36 * in the glibc source. I also need to fix the behavior of
37 * _wchar_utf8sntowcs() if the max number of wchars to convert is 0.
39 * UTF-8 -> wchar -> UTF-8 conversion tests on Markus Kuhn's UTF-8-demo.txt
40 * file on my platform (x86) show about 5-10% faster conversion speed than
41 * glibc with mbsrtowcs()/wcsrtombs() and almost twice as fast as glibc with
42 * individual mbrtowc()/wcrtomb() calls.
44 * If 'DECODER' is defined, then _wchar_utf8sntowcs() will be compiled
45 * as a fail-safe UTF-8 decoder appropriate for a terminal, etc. which
46 * needs to deal gracefully with whatever is sent to it. In that mode,
47 * it passes Markus Kuhn's UTF-8-test.txt stress test. I plan to add
48 * an arg to force that behavior, so the interface will be changing.
50 * I need to fix the error checking for 16-bit wide chars. This isn't
51 * an issue for uClibc, but may be for ELKS. I'm currently not sure
52 * if I'll use 16-bit, 32-bit, or configureable wchars in ELKS.
56 * Fixed _wchar_utf8sntowcs() for the max number of wchars == 0 case.
57 * Fixed nul-char bug in btowc(), and another in __mbsnrtowcs() for 8-bit
59 * Enabled building of a C/POSIX-locale-only version, so full locale support
60 * no longer needs to be enabled.
64 * Fixed a bug in _wchar_wcsntoutf8s(). Don't store wcs position if dst is NULL.
65 * Also, introduce an awful hack into _wchar_wcsntoutf8s() and wcsrtombs() in
66 * order to support %ls in printf. See comments below for details.
67 * Change behaviour of wc<->mb functions when in the C locale. Now they do
68 * a 1-1 map for the range 0x80-UCHAR_MAX. This is for backwards compatibility
69 * and consistency with the stds requirements that a printf format string by
70 * a valid multibyte string beginning and ending in it's initial shift state.
74 * Forgot to change btowc and wctob when I changed the wc<->mb functions yesterday.
78 * Add wcwidth and wcswidth, based on Markus Kuhn's wcwidth of 2002-05-08.
79 * Added some size/speed optimizations and integrated it into my locale
80 * framework. Minimally tested at the moment, but the stub C-locale
81 * version (which most people would probably be using) should be fine.
85 * Revert the wc<->mb changes from earlier this month involving the C-locale.
86 * Add a couple of ugly hacks to support *wprintf.
87 * Add a mini iconv() and iconv implementation (requires locale support).
90 * Bug fix for mbrtowc.
93 * Bug fix: _wchar_utf8sntowcs and _wchar_wcsntoutf8s now set errno if EILSEQ.
96 * Bug fix: Fix size check for remaining output space in iconv().
102 #define _ISOC99_SOURCE
107 #include <inttypes.h>
113 #include <bits/uClibc_uwchar.h>
115 /**********************************************************************/
116 #ifdef __UCLIBC_HAS_LOCALE__
117 #ifdef __UCLIBC_MJN3_ONLY__
119 /* generates one warning */
120 #warning TODO: Fix Cc2wc* and Cwc2c* defines!
122 #endif /* __UCLIBC_MJN3_ONLY__ */
124 #define ENCODING ((__UCLIBC_CURLOCALE_DATA).encoding)
126 #define Cc2wc_IDX_SHIFT __LOCALE_DATA_Cc2wc_IDX_SHIFT
127 #define Cc2wc_ROW_LEN __LOCALE_DATA_Cc2wc_ROW_LEN
128 #define Cwc2c_DOMAIN_MAX __LOCALE_DATA_Cwc2c_DOMAIN_MAX
129 #define Cwc2c_TI_SHIFT __LOCALE_DATA_Cwc2c_TI_SHIFT
130 #define Cwc2c_TT_SHIFT __LOCALE_DATA_Cwc2c_TT_SHIFT
131 #define Cwc2c_TI_LEN __LOCALE_DATA_Cwc2c_TI_LEN
133 #ifndef __CTYPE_HAS_UTF_8_LOCALES
134 #warning __CTYPE_HAS_UTF_8_LOCALES not set!
137 #else /* __UCLIBC_HAS_LOCALE__ */
139 #ifdef __UCLIBC_MJN3_ONLY__
142 #warning fix preprocessor logic testing locale settings
146 #define ENCODING (__ctype_encoding_7_bit)
147 #ifdef __CTYPE_HAS_8_BIT_LOCALES
148 #error __CTYPE_HAS_8_BIT_LOCALES is defined!
150 #ifdef __CTYPE_HAS_UTF_8_LOCALES
151 #error __CTYPE_HAS_UTF_8_LOCALES is defined!
153 #undef L__wchar_utf8sntowcs
154 #undef L__wchar_wcsntoutf8s
156 #endif /* __UCLIBC_HAS_LOCALE__ */
157 /**********************************************************************/
159 #if WCHAR_MAX > 0xffffUL
160 #define UTF_8_MAX_LEN 6
162 #define UTF_8_MAX_LEN 3
167 extern size_t __mbrtowc (wchar_t *__restrict __pwc,
168 __const char *__restrict __s, size_t __n,
169 mbstate_t *__p) attribute_hidden;
171 extern size_t __wcrtomb (char *__restrict __s, wchar_t __wc,
172 mbstate_t *__restrict __ps) attribute_hidden;
174 /* Implementation-specific work functions. */
176 extern size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
177 const char **__restrict src, size_t n,
178 mbstate_t *ps, int allow_continuation) attribute_hidden;
180 extern size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
181 const wchar_t **__restrict src, size_t wn) attribute_hidden;
183 /* glibc extensions. */
185 extern size_t __mbsnrtowcs(wchar_t *__restrict dst,
186 const char **__restrict src,
187 size_t NMC, size_t len, mbstate_t *__restrict ps) attribute_hidden;
189 extern size_t __wcsnrtombs(char *__restrict dst,
190 const wchar_t **__restrict src,
191 size_t NWC, size_t len, mbstate_t *__restrict ps) attribute_hidden;
193 /**********************************************************************/
196 wint_t attribute_hidden __btowc(int c)
198 #ifdef __CTYPE_HAS_8_BIT_LOCALES
201 unsigned char buf[1];
205 *buf = (unsigned char) c;
206 mbstate.__mask = 0; /* Initialize the mbstate. */
207 if (__mbrtowc(&wc, buf, 1, &mbstate) <= 1) {
213 #else /* __CTYPE_HAS_8_BIT_LOCALES */
215 #ifdef __UCLIBC_HAS_LOCALE__
216 assert((ENCODING == __ctype_encoding_7_bit)
217 || (ENCODING == __ctype_encoding_utf8));
218 #endif /* __UCLIBC_HAS_LOCALE__ */
220 /* If we don't have 8-bit locale support, then this is trivial since
221 * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */
222 return (((unsigned int)c) < 0x80) ? c : WEOF;
224 #endif /* __CTYPE_HAS_8_BIT_LOCALES */
226 strong_alias(__btowc,btowc)
229 /**********************************************************************/
232 /* Note: We completely ignore ps in all currently supported conversions. */
236 #ifdef __CTYPE_HAS_8_BIT_LOCALES
238 unsigned char buf[MB_LEN_MAX];
240 return (__wcrtomb(buf, c, NULL) == 1) ? *buf : EOF;
242 #else /* __CTYPE_HAS_8_BIT_LOCALES */
244 #ifdef __UCLIBC_HAS_LOCALE__
245 assert((ENCODING == __ctype_encoding_7_bit)
246 || (ENCODING == __ctype_encoding_utf8));
247 #endif /* __UCLIBC_HAS_LOCALE__ */
249 /* If we don't have 8-bit locale support, then this is trivial since
250 * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */
252 /* TODO: need unsigned version of wint_t... */
253 /* return (((unsigned int)c) < 0x80) ? c : WEOF; */
254 return ((c >= 0) && (c < 0x80)) ? c : EOF;
256 #endif /* __CTYPE_HAS_8_BIT_LOCALES */
260 /**********************************************************************/
263 int mbsinit(const mbstate_t *ps)
265 return !ps || !ps->__mask;
269 /**********************************************************************/
272 size_t attribute_hidden __mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps)
274 static mbstate_t mbstate; /* Rely on bss 0-init. */
276 return __mbrtowc(NULL, s, n, (ps != NULL) ? ps : &mbstate);
278 strong_alias(__mbrlen,mbrlen)
281 /**********************************************************************/
284 size_t attribute_hidden __mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,
285 size_t n, mbstate_t *__restrict ps)
287 static mbstate_t mbstate; /* Rely on bss 0-init. */
291 char empty_string[1]; /* Avoid static to be fPIC friendly. */
298 pwc = (wchar_t *) s; /* NULL */
299 empty_string[0] = 0; /* Init the empty string when necessary. */
303 /* TODO: change error code? */
304 return (ps->__mask && (ps->__wc == 0xffffU))
305 ? ((size_t) -1) : ((size_t) -2);
310 #ifdef __CTYPE_HAS_UTF_8_LOCALES
311 /* Need to do this here since mbsrtowcs doesn't allow incompletes. */
312 if (ENCODING == __ctype_encoding_utf8) {
316 r = _wchar_utf8sntowcs(pwc, 1, &p, n, ps, 1);
317 return (r == 1) ? (p-s) : r; /* Need to return 0 if nul char. */
321 #ifdef __UCLIBC_MJN3_ONLY__
322 #warning TODO: This adds a trailing nul!
323 #endif /* __UCLIBC_MJN3_ONLY__ */
325 r = __mbsnrtowcs(wcbuf, &p, SIZE_MAX, 1, ps);
327 if (((ssize_t) r) >= 0) {
334 strong_alias(__mbrtowc,mbrtowc)
337 /**********************************************************************/
340 /* Note: We completely ignore ps in all currently supported conversions. */
341 /* TODO: Check for valid state anyway? */
343 size_t attribute_hidden __wcrtomb(register char *__restrict s, wchar_t wc,
344 mbstate_t *__restrict ps)
346 #ifdef __UCLIBC_MJN3_ONLY__
347 #warning TODO: Should wcsnrtombs nul-terminate unconditionally? Check glibc.
348 #endif /* __UCLIBC_MJN3_ONLY__ */
352 char buf[MB_LEN_MAX];
362 r = __wcsnrtombs(s, &pwc, 1, MB_LEN_MAX, ps);
363 return (r != 0) ? r : 1;
365 strong_alias(__wcrtomb,wcrtomb)
368 /**********************************************************************/
371 size_t attribute_hidden __mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
372 size_t len, mbstate_t *__restrict ps)
374 static mbstate_t mbstate; /* Rely on bss 0-init. */
376 return __mbsnrtowcs(dst, src, SIZE_MAX, len,
377 ((ps != NULL) ? ps : &mbstate));
379 strong_alias(__mbsrtowcs,mbsrtowcs)
382 /**********************************************************************/
385 /* Note: We completely ignore ps in all currently supported conversions.
387 * TODO: Check for valid state anyway? */
389 size_t attribute_hidden __wcsrtombs(char *__restrict dst, const wchar_t **__restrict src,
390 size_t len, mbstate_t *__restrict ps)
392 return __wcsnrtombs(dst, src, SIZE_MAX, len, ps);
394 strong_alias(__wcsrtombs,wcsrtombs)
397 /**********************************************************************/
398 #ifdef L__wchar_utf8sntowcs
400 /* Define DECODER to generate a UTF-8 decoder which passes Markus Kuhn's
401 * UTF-8-test.txt strss test.
403 /* #define DECODER */
411 size_t attribute_hidden _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
412 const char **__restrict src, size_t n,
413 mbstate_t *ps, int allow_continuation)
415 register const char *s;
428 /* NOTE: The following is an AWFUL HACK! In order to support %s in
429 * wprintf, we need to be able to compute the number of wchars needed
430 * for the mbs conversion, not to exceed the precision specified.
431 * But if dst is NULL, the return value is the length assuming a
432 * sufficiently sized buffer. So, we allow passing of (wchar_t *) ps
433 * as pwc in order to flag that we really want the length, subject
434 * to the restricted buffer size and no partial conversions.
435 * See mbsnrtowcs() as well. */
436 if (!pwc || (pwc == ((wchar_t *)ps))) {
444 /* This is really here only to support the glibc extension function
445 * __mbsnrtowcs which apparently returns 0 if wn == 0 without any
446 * check on the validity of the mbstate. */
451 if ((mask = (__uwchar_t) ps->__mask) != 0) { /* A continuation... */
453 wc = (__uwchar_t) ps->__wc;
459 if ((wc = (__uwchar_t) ps->__wc) != 0xffffU) {
460 /* TODO: change error code here and below? */
467 return (size_t) -1; /* We're in an error state. */
476 if ((wc = ((unsigned char) *s++)) >= 0x80) { /* Not ASCII... */
478 #ifdef __UCLIBC_MJN3_ONLY__
479 #warning TODO: Fix range for 16 bit wchar_t case.
481 if ( ((unsigned char)(s[-1] - 0xc0)) < (0xfe - 0xc0) ) {
492 return (size_t) -1; /* Illegal start byte! */
498 if ((*s & 0xc0) != 0x80) {
503 wc += (*s & 0x3f); /* keep seperate for bcc (smaller code) */
508 if ((wc & mask) == 0) { /* Character completed. */
509 if ((mask >>= 5) == 0x40) {
512 /* Check for invalid sequences (longer than necessary)
513 * and invalid chars. */
514 if ( (wc < mask) /* Sequence not minimal length. */
516 #if UTF_8_MAX_LEN == 3
517 #error broken since mask can overflow!!
518 /* For plane 0, these are the only defined values.*/
521 /* Note that we don't need to worry about exceeding */
522 /* 31 bits as that is the most that UTF-8 provides. */
523 || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
525 || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
533 /* Character potentially valid but incomplete. */
534 if (!allow_continuation) {
538 /* NOTE: The following can fail if you allow and then disallow
540 #if UTF_8_MAX_LEN == 3
541 #error broken since mask can overflow!!
543 /* Need to back up... */
546 } while ((mask >>= 5) >= 0x40);
549 ps->__mask = (wchar_t) mask;
550 ps->__wc = (wchar_t) wc;
561 while (wc && --count);
569 /* ps->__wc is irrelavent here. */
579 /**********************************************************************/
580 #ifdef L__wchar_wcsntoutf8s
582 size_t attribute_hidden _wchar_wcsntoutf8s(char *__restrict s, size_t n,
583 const wchar_t **__restrict src, size_t wn)
588 const __uwchar_t *swc;
590 char buf[MB_LEN_MAX];
594 /* NOTE: The following is an AWFUL HACK! In order to support %ls in
595 * printf, we need to be able to compute the number of bytes needed
596 * for the mbs conversion, not to exceed the precision specified.
597 * But if dst is NULL, the return value is the length assuming a
598 * sufficiently sized buffer. So, we allow passing of (char *) src
599 * as dst in order to flag that we really want the length, subject
600 * to the restricted buffer size and no partial conversions.
601 * See wcsnrtombs() as well. */
602 if (!s || (s == ((char *) src))) {
611 swc = (const __uwchar_t *) *src;
624 #if UTF_8_MAX_LEN == 3
625 /* For plane 0, these are the only defined values.*/
626 /* Note that we don't need to worry about exceeding */
627 /* 31 bits as that is the most that UTF-8 provides. */
630 /* UTF_8_MAX_LEN == 6 */
632 || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
634 || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
640 #if UTF_8_MAX_LEN != 3
641 if (wc > 0x7fffffffUL) { /* Value too large. */
654 if ((len = p - s) > t) { /* Not enough space. */
661 *--p = (wc & 0x3f) | 0x80;
665 } else if (wc == 0) { /* End of string. */
679 *src = (const wchar_t *) swc;
687 /**********************************************************************/
688 #ifdef L___mbsnrtowcs
690 /* WARNING: We treat len as SIZE_MAX when dst is NULL! */
692 size_t attribute_hidden __mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
693 size_t NMC, size_t len, mbstate_t *__restrict ps)
695 static mbstate_t mbstate; /* Rely on bss 0-init. */
705 #ifdef __CTYPE_HAS_UTF_8_LOCALES
706 if (ENCODING == __ctype_encoding_utf8) {
708 return ((r = _wchar_utf8sntowcs(dst, len, src, NMC, ps, 1))
709 != (size_t) -2) ? r : 0;
713 /* NOTE: The following is an AWFUL HACK! In order to support %s in
714 * wprintf, we need to be able to compute the number of wchars needed
715 * for the mbs conversion, not to exceed the precision specified.
716 * But if dst is NULL, the return value is the length assuming a
717 * sufficiently sized buffer. So, we allow passing of ((wchar_t *)ps)
718 * as dst in order to flag that we really want the length, subject
719 * to the restricted buffer size and no partial conversions.
720 * See _wchar_utf8sntowcs() as well. */
721 if (!dst || (dst == ((wchar_t *)ps))) {
729 /* Since all the following encodings are single-byte encodings... */
737 #ifdef __CTYPE_HAS_8_BIT_LOCALES
738 if (ENCODING == __ctype_encoding_8_bit) {
741 if ((wc = ((unsigned char)(*s))) >= 0x80) { /* Non-ASCII... */
743 wc = __UCLIBC_CURLOCALE_DATA.tbl8c2wc[
744 (__UCLIBC_CURLOCALE_DATA.idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
745 << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
765 #ifdef __UCLIBC_HAS_LOCALE__
766 assert(ENCODING == __ctype_encoding_7_bit);
770 if ((*dst = (unsigned char) *s) == 0) {
775 #ifdef __CTYPE_HAS_8_BIT_LOCALES
791 size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
792 size_t NMC, size_t len, mbstate_t *__restrict ps)
793 __attribute__ ((__weak__, __alias__("__mbsnrtowcs")));
796 /**********************************************************************/
797 #ifdef L___wcsnrtombs
799 /* WARNING: We treat len as SIZE_MAX when dst is NULL! */
801 /* Note: We completely ignore ps in all currently supported conversions.
802 * TODO: Check for valid state anyway? */
804 size_t attribute_hidden __wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
805 size_t NWC, size_t len, mbstate_t *__restrict ps)
810 char buf[MB_LEN_MAX];
812 #ifdef __CTYPE_HAS_UTF_8_LOCALES
813 if (ENCODING == __ctype_encoding_utf8) {
814 return _wchar_wcsntoutf8s(dst, len, src, NWC);
816 #endif /* __CTYPE_HAS_UTF_8_LOCALES */
819 /* NOTE: The following is an AWFUL HACK! In order to support %ls in
820 * printf, we need to be able to compute the number of bytes needed
821 * for the mbs conversion, not to exceed the precision specified.
822 * But if dst is NULL, the return value is the length assuming a
823 * sufficiently sized buffer. So, we allow passing of (char *) src
824 * as dst in order to flag that we really want the length, subject
825 * to the restricted buffer size and no partial conversions.
826 * See _wchar_wcsntoutf8s() as well. */
827 if (!dst || (dst == ((char *) src))) {
835 /* Since all the following encodings are single-byte encodings... */
841 s = (const __uwchar_t *) *src;
843 #ifdef __CTYPE_HAS_8_BIT_LOCALES
844 if (ENCODING == __ctype_encoding_8_bit) {
848 if ((wc = *s) <= 0x7f) {
849 if (!(*dst = (unsigned char) wc)) {
855 if (wc <= Cwc2c_DOMAIN_MAX) {
856 u = __UCLIBC_CURLOCALE_DATA.idx8wc2c[wc >> (Cwc2c_TI_SHIFT
858 u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
859 + ((wc >> Cwc2c_TT_SHIFT)
860 & ((1 << Cwc2c_TI_SHIFT)-1))];
861 u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[Cwc2c_TI_LEN
862 + (u << Cwc2c_TT_SHIFT)
863 + (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
866 #define __WCHAR_REPLACEMENT_CHAR '?'
867 #ifdef __WCHAR_REPLACEMENT_CHAR
868 *dst = (unsigned char) ( u ? u : __WCHAR_REPLACEMENT_CHAR );
869 #else /* __WCHAR_REPLACEMENT_CHAR */
873 *dst = (unsigned char) u;
874 #endif /* __WCHAR_REPLACEMENT_CHAR */
881 *src = (const wchar_t *) s;
885 #endif /* __CTYPE_HAS_8_BIT_LOCALES */
887 #ifdef __UCLIBC_HAS_LOCALE__
888 assert(ENCODING == __ctype_encoding_7_bit);
893 #if defined(__CTYPE_HAS_8_BIT_LOCALES) && !defined(__WCHAR_REPLACEMENT_CHAR)
899 if ((*dst = (unsigned char) *s) == 0) {
908 *src = (const wchar_t *) s;
913 size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
914 size_t NWC, size_t len, mbstate_t *__restrict ps)
915 __attribute__ ((__weak__, __alias__("__wcsnrtombs")));
918 /**********************************************************************/
921 #ifdef __UCLIBC_MJN3_ONLY__
922 #warning REMINDER: If we start doing translit, wcwidth and wcswidth will need updating.
923 #warning TODO: Update wcwidth to match latest by Kuhn.
926 #if defined(__UCLIBC_HAS_LOCALE__) && \
927 ( defined(__CTYPE_HAS_8_BIT_LOCALES) || defined(__CTYPE_HAS_UTF_8_LOCALES) )
929 static const unsigned char new_idx[] = {
930 0, 5, 5, 6, 10, 15, 28, 39,
931 48, 48, 71, 94, 113, 128, 139, 154,
932 175, 186, 188, 188, 188, 188, 188, 188,
933 203, 208, 208, 208, 208, 208, 208, 208,
934 208, 219, 219, 219, 222, 222, 222, 222,
935 222, 222, 222, 222, 222, 222, 222, 224,
936 224, 231, 231, 231, 231, 231, 231, 231,
937 231, 231, 231, 231, 231, 231, 231, 231,
938 231, 231, 231, 231, 231, 231, 231, 231,
939 231, 231, 231, 231, 231, 231, 231, 231,
940 231, 231, 231, 231, 231, 231, 231, 231,
941 231, 231, 231, 231, 231, 231, 231, 231,
942 231, 231, 231, 231, 231, 231, 231, 231,
943 231, 231, 231, 231, 231, 231, 231, 231,
944 231, 231, 231, 231, 231, 231, 231, 231,
945 231, 231, 231, 231, 231, 231, 231, 231,
946 231, 231, 231, 231, 231, 231, 231, 231,
947 231, 231, 231, 231, 231, 231, 231, 231,
948 231, 231, 231, 231, 231, 231, 231, 231,
949 231, 231, 231, 231, 231, 231, 231, 231,
950 231, 231, 231, 231, 231, 233, 233, 233,
951 233, 233, 233, 233, 234, 234, 234, 234,
952 234, 234, 234, 234, 234, 234, 234, 234,
953 234, 234, 234, 234, 234, 234, 234, 234,
954 234, 234, 234, 234, 234, 234, 234, 234,
955 234, 234, 234, 234, 234, 234, 234, 234,
956 234, 234, 234, 234, 234, 234, 234, 234,
957 236, 236, 236, 236, 236, 236, 236, 236,
958 236, 236, 236, 236, 236, 236, 236, 236,
959 236, 236, 236, 236, 236, 236, 236, 236,
960 236, 236, 236, 236, 236, 236, 236, 236,
961 236, 237, 237, 238, 241, 241, 242, 249,
965 static const unsigned char new_tbl[] = {
966 0x00, 0x01, 0x20, 0x7f, 0xa0, 0x00, 0x00, 0x50,
967 0x60, 0x70, 0x00, 0x83, 0x87, 0x88, 0x8a, 0x00,
968 0x91, 0xa2, 0xa3, 0xba, 0xbb, 0xbe, 0xbf, 0xc0,
969 0xc1, 0xc3, 0xc4, 0xc5, 0x00, 0x4b, 0x56, 0x70,
970 0x71, 0xd6, 0xe5, 0xe7, 0xe9, 0xea, 0xee, 0x00,
971 0x0f, 0x10, 0x11, 0x12, 0x30, 0x4b, 0xa6, 0xb1,
972 0x00, 0x01, 0x03, 0x3c, 0x3d, 0x41, 0x49, 0x4d,
973 0x4e, 0x51, 0x55, 0x62, 0x64, 0x81, 0x82, 0xbc,
974 0xbd, 0xc1, 0xc5, 0xcd, 0xce, 0xe2, 0xe4, 0x00,
975 0x02, 0x03, 0x3c, 0x3d, 0x41, 0x43, 0x47, 0x49,
976 0x4b, 0x4e, 0x70, 0x72, 0x81, 0x83, 0xbc, 0xbd,
977 0xc1, 0xc6, 0xc7, 0xc9, 0xcd, 0xce, 0x00, 0x01,
978 0x02, 0x3c, 0x3d, 0x3f, 0x40, 0x41, 0x44, 0x4d,
979 0x4e, 0x56, 0x57, 0x82, 0x83, 0xc0, 0xc1, 0xcd,
980 0xce, 0x00, 0x3e, 0x41, 0x46, 0x49, 0x4a, 0x4e,
981 0x55, 0x57, 0xbf, 0xc0, 0xc6, 0xc7, 0xcc, 0xce,
982 0x00, 0x41, 0x44, 0x4d, 0x4e, 0xca, 0xcb, 0xd2,
983 0xd5, 0xd6, 0xd7, 0x00, 0x31, 0x32, 0x34, 0x3b,
984 0x47, 0x4f, 0xb1, 0xb2, 0xb4, 0xba, 0xbb, 0xbd,
985 0xc8, 0xce, 0x00, 0x18, 0x1a, 0x35, 0x36, 0x37,
986 0x38, 0x39, 0x3a, 0x71, 0x7f, 0x80, 0x85, 0x86,
987 0x88, 0x90, 0x98, 0x99, 0xbd, 0xc6, 0xc7, 0x00,
988 0x2d, 0x31, 0x32, 0x33, 0x36, 0x38, 0x39, 0x3a,
989 0x58, 0x5a, 0x00, 0x60, 0x00, 0x12, 0x15, 0x32,
990 0x35, 0x52, 0x54, 0x72, 0x74, 0xb7, 0xbe, 0xc6,
991 0xc7, 0xc9, 0xd4, 0x00, 0x0b, 0x0f, 0xa9, 0xaa,
992 0x00, 0x0b, 0x10, 0x2a, 0x2f, 0x60, 0x64, 0x6a,
993 0x70, 0xd0, 0xeb, 0x00, 0x29, 0x2b, 0x00, 0x80,
994 0x00, 0x2a, 0x30, 0x3f, 0x40, 0x99, 0x9b, 0x00,
995 0xd0, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x1e,
996 0x1f, 0x00, 0x00, 0x10, 0x20, 0x24, 0x30, 0x70,
997 0xff, 0x00, 0x61, 0xe0, 0xe7, 0xf9, 0xfc,
1000 static const signed char new_wtbl[] = {
1001 0, -1, 1, -1, 1, 1, 0, 1,
1002 0, 1, 1, 0, 1, 0, 1, 1,
1003 0, 1, 0, 1, 0, 1, 0, 1,
1004 0, 1, 0, 1, 1, 0, 1, 0,
1005 1, 0, 1, 0, 1, 0, 1, 1,
1006 0, 1, 0, 1, 0, 1, 0, 1,
1007 1, 0, 1, 0, 1, 0, 1, 0,
1008 1, 0, 1, 0, 1, 0, 1, 0,
1009 1, 0, 1, 0, 1, 0, 1, 1,
1010 0, 1, 0, 1, 0, 1, 0, 1,
1011 0, 1, 0, 1, 0, 1, 0, 1,
1012 0, 1, 0, 1, 0, 1, 1, 0,
1013 1, 0, 1, 0, 1, 0, 1, 0,
1014 1, 0, 1, 0, 1, 0, 1, 0,
1015 1, 1, 0, 1, 0, 1, 0, 1,
1016 0, 1, 0, 1, 0, 1, 0, 1,
1017 1, 0, 1, 0, 1, 0, 1, 0,
1018 1, 0, 1, 1, 0, 1, 0, 1,
1019 0, 1, 0, 1, 0, 1, 0, 1,
1020 0, 1, 1, 0, 1, 0, 1, 0,
1021 1, 0, 1, 0, 1, 0, 1, 0,
1022 1, 0, 1, 0, 1, 0, 1, 1,
1023 0, 1, 0, 1, 0, 1, 0, 1,
1024 0, 1, 2, 0, 1, 0, 1, 0,
1025 1, 0, 1, 0, 1, 0, 1, 0,
1026 1, 0, 1, 1, 0, 1, 0, 1,
1027 1, 0, 1, 0, 1, 0, 1, 0,
1028 1, 0, 1, 1, 2, 1, 1, 2,
1029 2, 0, 2, 1, 2, 0, 2, 2,
1030 1, 1, 2, 1, 1, 2, 1, 0,
1031 1, 1, 0, 1, 0, 1, 2, 1,
1032 0, 2, 1, 2, 1, 0, 1,
1035 int attribute_hidden __wcswidth(const wchar_t *pwcs, size_t n)
1041 if (ENCODING == __ctype_encoding_7_bit) {
1044 for (i = 0 ; (i < n) && pwcs[i] ; i++) {
1045 if (pwcs[i] != ((unsigned char)(pwcs[i]))) {
1050 #ifdef __CTYPE_HAS_8_BIT_LOCALES
1051 else if (ENCODING == __ctype_encoding_8_bit) {
1054 mbstate.__mask = 0; /* Initialize the mbstate. */
1055 if (__wcsnrtombs(NULL, &pwcs, n, SIZE_MAX, &mbstate) == ((size_t) - 1)) {
1059 #endif /* __CTYPE_HAS_8_BIT_LOCALES */
1060 #if defined(__CTYPE_HAS_UTF_8_LOCALES) && defined(KUHN)
1061 /* For stricter handling of allowed unicode values... see comments above. */
1062 else if (ENCODING == __ctype_encoding_utf8) {
1065 for (i = 0 ; (i < n) && pwcs[i] ; i++) {
1066 if ( (((__uwchar_t)((pwcs[i]) - 0xfffeU)) < 2)
1067 || (((__uwchar_t)((pwcs[i]) - 0xd800U)) < (0xe000U - 0xd800U))
1073 #endif /* __CTYPE_HAS_UTF_8_LOCALES */
1075 for (count = 0 ; n && (wc = *pwcs++) ; n--) {
1077 /* If we're here, wc != 0. */
1078 if ((wc < 32) || ((wc >= 0x7f) && (wc < 0xa0))) {
1084 if (((unsigned int) wc) <= 0xffff) {
1089 while ((m = (l+h) >> 1) != l) {
1090 if (b >= new_tbl[m]) {
1092 } else { /* wc < tbl[m] */
1096 count += new_wtbl[l]; /* none should be -1. */
1100 /* Redo this to minimize average number of compares?*/
1101 if (wc >= 0x1d167) {
1102 if (wc <= 0x1d1ad) {
1108 || (wc >= 0x1d1aa))))))
1112 } else if (((wc >= 0xe0020) && (wc <= 0xe007f)) || (wc == 0xe0001)) {
1114 } else if ((wc >= 0x20000) && (wc <= 0x2ffff)) {
1115 ++count; /* need 2.. add one here */
1117 #if (WCHAR_MAX > 0x7fffffffL)
1118 else if (wc > 0x7fffffffL) {
1121 #endif /* (WCHAR_MAX > 0x7fffffffL) */
1130 #else /* __UCLIBC_HAS_LOCALE__ */
1132 int attribute_hidden __wcswidth(const wchar_t *pwcs, size_t n)
1137 for (count = 0 ; n && (wc = *pwcs++) ; n--) {
1139 /* If we're here, wc != 0. */
1140 if ((wc < 32) || ((wc >= 0x7f) && (wc < 0xa0))) {
1153 #endif /* __UCLIBC_HAS_LOCALE__ */
1155 strong_alias(__wcswidth,wcswidth)
1158 /**********************************************************************/
1161 extern int __wcswidth (__const wchar_t *__s, size_t __n) attribute_hidden;
1163 int wcwidth(wchar_t wc)
1165 return __wcswidth(&wc, 1);
1169 /**********************************************************************/
1174 mbstate_t fromstate;
1182 int skip_invalid_input; /* To support iconv -c option. */
1192 #include <byteswap.h>
1194 #if (__BYTE_ORDER != __BIG_ENDIAN) && (__BYTE_ORDER != __LITTLE_ENDIAN)
1195 #error unsupported endianness for iconv
1198 #ifndef __CTYPE_HAS_8_BIT_LOCALES
1199 #error currently iconv requires 8 bit locales
1201 #ifndef __CTYPE_HAS_UTF_8_LOCALES
1202 #error currently iconv requires UTF-8 locales
1208 IC_MULTIBYTE = 0xe0,
1209 #if __BYTE_ORDER == __BIG_ENDIAN
1224 /* For the multibyte
1225 * bit 0 means swap endian
1226 * bit 1 means 2 byte
1227 * bit 2 means 4 byte
1231 const unsigned char __iconv_codesets[] =
1232 "\x0a\xe0""WCHAR_T\x00" /* superset of UCS-4 but platform-endian */
1233 #if __BYTE_ORDER == __BIG_ENDIAN
1234 "\x08\xec""UCS-4\x00" /* always BE */
1235 "\x0a\xec""UCS-4BE\x00"
1236 "\x0a\xed""UCS-4LE\x00"
1237 "\x09\fe4""UTF-32\x00" /* platform endian with BOM */
1238 "\x0b\xe4""UTF-32BE\x00"
1239 "\x0b\xe5""UTF-32LE\x00"
1240 "\x08\xe2""UCS-2\x00" /* always BE */
1241 "\x0a\xe2""UCS-2BE\x00"
1242 "\x0a\xe3""UCS-2LE\x00"
1243 "\x09\xea""UTF-16\x00" /* platform endian with BOM */
1244 "\x0b\xea""UTF-16BE\x00"
1245 "\x0b\xeb""UTF-16LE\x00"
1246 #elif __BYTE_ORDER == __LITTLE_ENDIAN
1247 "\x08\xed""UCS-4\x00" /* always BE */
1248 "\x0a\xed""UCS-4BE\x00"
1249 "\x0a\xec""UCS-4LE\x00"
1250 "\x09\xf4""UTF-32\x00" /* platform endian with BOM */
1251 "\x0b\xe5""UTF-32BE\x00"
1252 "\x0b\xe4""UTF-32LE\x00"
1253 "\x08\xe3""UCS-2\x00" /* always BE */
1254 "\x0a\xe3""UCS-2BE\x00"
1255 "\x0a\xe2""UCS-2LE\x00"
1256 "\x09\xfa""UTF-16\x00" /* platform endian with BOM */
1257 "\x0b\xeb""UTF-16BE\x00"
1258 "\x0b\xea""UTF-16LE\x00"
1260 "\x08\x02""UTF-8\x00"
1261 "\x0b\x01""US-ASCII\x00"
1262 "\x07\x01""ASCII"; /* Must be last! (special case to save a nul) */
1264 static int find_codeset(const char *name)
1266 const unsigned char *s;
1269 for (s = __iconv_codesets ; *s ; s += *s) {
1270 if (!strcasecmp(s+2, name)) {
1275 /* The following is ripped from find_locale in locale.c. */
1277 /* TODO: maybe CODESET_LIST + *s ??? */
1278 /* 7bit is 1, UTF-8 is 2, 8-bit is >= 3 */
1280 s = __LOCALE_DATA_CODESET_LIST;
1282 ++codeset; /* Increment codeset first. */
1283 if (!strcasecmp(__LOCALE_DATA_CODESET_LIST+*s, name)) {
1288 return 0; /* No matching codeset! */
1291 iconv_t weak_function iconv_open(const char *tocode, const char *fromcode)
1293 register _UC_iconv_t *px;
1294 int tocodeset, fromcodeset;
1296 if (((tocodeset = find_codeset(tocode)) != 0)
1297 && ((fromcodeset = find_codeset(fromcode)) != 0)) {
1298 if ((px = malloc(sizeof(_UC_iconv_t))) != NULL) {
1299 px->tocodeset = tocodeset;
1300 px->tobom0 = px->tobom = (tocodeset & 0x10) >> 4;
1301 px->fromcodeset0 = px->fromcodeset = fromcodeset;
1302 px->frombom0 = px->frombom = (fromcodeset & 0x10) >> 4;
1303 px->skip_invalid_input = px->tostate.__mask
1304 = px->fromstate.__mask = 0;
1305 return (iconv_t) px;
1308 __set_errno(EINVAL);
1310 return (iconv_t)(-1);
1313 int weak_function iconv_close(iconv_t cd)
1320 size_t weak_function iconv(iconv_t cd, char **__restrict inbuf,
1321 size_t *__restrict inbytesleft,
1322 char **__restrict outbuf,
1323 size_t *__restrict outbytesleft)
1325 _UC_iconv_t *px = (_UC_iconv_t *) cd;
1330 assert(px != (_UC_iconv_t *)(-1));
1331 assert(sizeof(wchar_t) == 4);
1333 if (!inbuf || !*inbuf) { /* Need to reinitialze conversion state. */
1334 /* Note: For shift-state encodings we possibly need to output the
1335 * shift sequence to return to initial state! */
1336 if ((px->fromcodeset & 0xf0) == 0xe0) {
1338 px->tostate.__mask = px->fromstate.__mask = 0;
1339 px->fromcodeset = px->fromcodeset0;
1340 px->tobom = px->tobom0;
1341 px->frombom = px->frombom0;
1346 while (*inbytesleft) {
1347 if (!*outbytesleft) {
1354 if (px->fromcodeset >= IC_MULTIBYTE) {
1355 inci = (px->fromcodeset == IC_WCHAR_T) ? 4: (px->fromcodeset & 6);
1356 if (*inbytesleft < inci) goto INVALID;
1357 wc = (((unsigned int)((unsigned char)((*inbuf)[0]))) << 8)
1358 + ((unsigned char)((*inbuf)[1]));
1360 wc = (((unsigned int)((unsigned char)((*inbuf)[2]))) << 8)
1361 + ((unsigned char)((*inbuf)[3])) + (wc << 16);
1362 if (!(px->fromcodeset & 1)) wc = bswap_32(wc);
1364 if (!(px->fromcodeset & 1)) wc = bswap_16(wc);
1365 if (((px->fromcodeset & IC_UTF_16) == IC_UTF_16)
1366 && (((__uwchar_t)(wc - 0xd800U)) < (0xdc00U - 0xd800U))
1369 if (*inbytesleft < 4) goto INVALID;
1370 wc2 = (((unsigned int)((unsigned char)((*inbuf)[2]))) << 8)
1371 + ((unsigned char)((*inbuf)[3]));
1372 if (!(px->fromcodeset & 1)) wc = bswap_16(wc2);
1373 if (((__uwchar_t)(wc2 -= 0xdc00U)) < (0xe0000U - 0xdc00U)) {
1376 inci = 4; /* Change inci here in case skipping illegals. */
1377 wc = 0x10000UL + (wc << 10) + wc2;
1384 || (wc == ((inci == 4)
1385 ? (((wchar_t) 0xfffe0000UL))
1386 : ((wchar_t)(0xfffeUL))))
1388 if (wc != 0xfeffU) {
1389 px->fromcodeset ^= 1; /* toggle endianness */
1393 goto BOM_SKIP_OUTPUT;
1399 if (px->fromcodeset != IC_WCHAR_T) {
1400 if (((__uwchar_t) wc) > (((px->fromcodeset & IC_UCS_4) == IC_UCS_4)
1401 ? 0x7fffffffUL : 0x10ffffUL)
1403 || (((__uwchar_t)(wc - 0xfffeU)) < 2)
1404 || (((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U))
1410 } else if (px->fromcodeset == IC_UTF_8) {
1411 const char *p = *inbuf;
1412 r = _wchar_utf8sntowcs(&wc, 1, &p, *inbytesleft, &px->fromstate, 0);
1413 if (((ssize_t) r) <= 0) { /* either EILSEQ or incomplete or nul */
1414 if (((ssize_t) r) < 0) { /* either EILSEQ or incomplete or nul */
1415 assert((r == (size_t)(-1)) || (r == (size_t)(-2)));
1416 if (r == (size_t)(-2)) {
1418 __set_errno(EINVAL);
1420 px->fromstate.__mask = 0;
1423 if (px->skip_invalid_input) {
1424 px->skip_invalid_input = 2; /* flag for iconv utility */
1425 goto BOM_SKIP_OUTPUT;
1427 __set_errno(EILSEQ);
1429 return (size_t)(-1);
1431 #ifdef __UCLIBC_MJN3_ONLY__
1432 #warning TODO: optimize this.
1434 if (p != NULL) { /* incomplete char case */
1437 p = *inbuf + 1; /* nul */
1440 } else if ((wc = ((unsigned char)(**inbuf))) >= 0x80) { /* Non-ASCII... */
1441 if (px->fromcodeset == IC_ASCII) { /* US-ASCII codeset */
1443 } else { /* some other 8-bit ascii-extension codeset */
1444 const __codeset_8_bit_t *c8b
1445 = __locale_mmap->codeset_8_bit + px->fromcodeset - 3;
1447 wc = __UCLIBC_CURLOCALE_DATA.tbl8c2wc[
1448 (c8b->idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
1449 << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
1464 if (px->tocodeset >= IC_MULTIBYTE) {
1465 inco = (px->tocodeset == IC_WCHAR_T) ? 4: (px->tocodeset & 6);
1466 if (*outbytesleft < inco) goto TOO_BIG;
1467 if (px->tocodeset != IC_WCHAR_T) {
1468 if (((__uwchar_t) wc) > (((px->tocodeset & IC_UCS_4) == IC_UCS_4)
1469 ? 0x7fffffffUL : 0x10ffffUL)
1471 || (((__uwchar_t)(wc - 0xfffeU)) < 2)
1472 || (((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U))
1481 if (px->tocodeset & 1) wc = bswap_32(wc);
1483 if (((__uwchar_t)wc ) > 0xffffU) {
1484 if ((px->tocodeset & IC_UTF_16) != IC_UTF_16) {
1487 if (*outbytesleft < (inco = 4)) goto TOO_BIG;
1488 wc2 = 0xdc00U + (wc & 0x3ff);
1489 wc = 0xd800U + ((wc >> 10) & 0x3ff);
1490 if (px->tocodeset & 1) {
1492 wc2 = bswap_16(wc2);
1495 } else if (px->tocodeset & 1) wc = bswap_16(wc);
1497 (*outbuf)[0] = (char)((unsigned char)(wc));
1498 (*outbuf)[1] = (char)((unsigned char)(wc >> 8));
1500 (*outbuf)[2] = (char)((unsigned char)(wc >> 16));
1501 (*outbuf)[3] = (char)((unsigned char)(wc >> 24));
1503 } else if (px->tocodeset == IC_UTF_8) {
1504 const wchar_t *pw = &wc;
1506 r = _wchar_wcsntoutf8s(*outbuf, *outbytesleft, &pw, 1);
1507 if (r != (size_t)(-1)) {
1508 #ifdef __UCLIBC_MJN3_ONLY__
1509 #warning TODO: What happens for a nul?
1523 } else if (((__uwchar_t)(wc)) < 0x80) {
1527 if ((px->tocodeset != 0x01) && (wc <= Cwc2c_DOMAIN_MAX)) {
1528 const __codeset_8_bit_t *c8b
1529 = __locale_mmap->codeset_8_bit + px->tocodeset - 3;
1531 u = c8b->idx8wc2c[wc >> (Cwc2c_TI_SHIFT + Cwc2c_TT_SHIFT)];
1532 u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
1533 + ((wc >> Cwc2c_TT_SHIFT)
1534 & ((1 << Cwc2c_TI_SHIFT)-1))];
1535 wc = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[Cwc2c_TI_LEN
1536 + (u << Cwc2c_TT_SHIFT)
1537 + (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
1547 *outbytesleft -= inco;
1550 *inbytesleft -= inci;
1556 /**********************************************************************/
1567 extern const unsigned char __iconv_codesets[];
1575 static void error_msg(const char *fmt, ...)
1576 __attribute__ ((noreturn, format (printf, 1, 2)));
1578 static void error_msg(const char *fmt, ...)
1583 fprintf(stderr, "%s: ", progname);
1585 vfprintf(stderr, fmt, arg);
1592 int main(int argc, char **argv)
1595 FILE *ofile = stdout;
1598 static const char opt_chars[] = "tfocsl";
1600 const char *opts[sizeof(opt_chars)]; /* last is infile name */
1606 size_t ni, no, r, pos;
1610 for (s = opt_chars ; *s ; s++) {
1611 opts[ s - opt_chars ] = NULL;
1617 if ((*p != '-') || (*++p == 0)) {
1621 if ((s = __strchr(opt_chars,*p)) == NULL) {
1623 s = basename(progname);
1625 "%s [-cs] -f fromcode -t tocode [-o outputfile] [inputfile ...]\n"
1626 " or\n%s -l\n", s, s);
1627 return EXIT_FAILURE;
1629 if ((s - opt_chars) < 3) {
1630 if ((--argc == 0) || opts[s - opt_chars]) {
1633 opts[s - opt_chars] = *++argv;
1635 opts[s - opt_chars] = p;
1640 if (opts[5]) { /* -l */
1641 fprintf(stderr, "Recognized codesets:\n");
1642 for (s = __iconv_codesets ; *s ; s += *s) {
1643 fprintf(stderr," %s\n", s+2);
1645 s = __LOCALE_DATA_CODESET_LIST;
1647 fprintf(stderr," %s\n", __LOCALE_DATA_CODESET_LIST+ (unsigned char)(*s));
1650 return EXIT_SUCCESS;
1657 if (!opts[0] || !opts[1]) {
1660 if ((ic = iconv_open(opts[0],opts[1])) == ((iconv_t)(-1))) {
1661 error_msg( "unsupported codeset in %s -> %s conversion\n", opts[0], opts[1]);
1663 if (opts[3]) { /* -c */
1664 ((_UC_iconv_t *) ic)->skip_invalid_input = 1;
1667 if ((s = opts[2]) != NULL) {
1668 if (!(ofile = fopen(s, "w"))) {
1669 error_msg( "couldn't open %s for writing\n", s);
1675 if (!argc || ((**argv == '-') && !((*argv)[1]))) {
1676 ifile = stdin; /* we don't check for duplicates */
1677 } else if (!(ifile = fopen(*argv, "r"))) {
1678 error_msg( "couldn't open %s for reading\n", *argv);
1681 while ((r = fread(ibuf + ni, 1, IBUF - ni, ifile)) > 0) {
1687 if ((r = iconv(ic, &pi, &ni, &po, &no)) == ((size_t)(-1))) {
1688 if ((errno != EINVAL) && (errno != E2BIG)) {
1689 error_msg( "iconv failed at pos %lu : %m\n", (unsigned long) (pos - ni));
1692 if ((r = OBUF - no) > 0) {
1693 if (fwrite(obuf, 1, OBUF - no, ofile) < r) {
1694 error_msg( "write error\n");
1697 if (ni) { /* still bytes in buffer! */
1698 __memmove(ibuf, pi, ni);
1702 if (ferror(ifile)) {
1703 error_msg( "read error\n");
1708 if (ifile != stdin) {
1712 } while (--argc > 0);
1717 error_msg( "incomplete sequence\n");
1720 return (((_UC_iconv_t *) ic)->skip_invalid_input < 2)
1721 ? EXIT_SUCCESS : EXIT_FAILURE;
1725 /**********************************************************************/