src/TortoisePlink/CHARSET/CHARSET.H

   1 /*\r
   2  * charset.h - header file for general character set conversion\r
   3  * routines.\r
   4  */\r
   5 \r
   6 #ifndef charset_charset_h\r
   7 #define charset_charset_h\r
   8 \r
   9 #include <stddef.h>\r
  10 \r
  11 /*\r
  12  * Enumeration that lists all the multibyte or single-byte\r
  13  * character sets known to this library.\r
  14  */\r
  15 typedef enum {\r
  16     CS_NONE,                           /* used for reporting errors, etc */\r
  17     CS_ISO8859_1,\r
  18     CS_ISO8859_1_X11,                  /* X font encoding with VT100 glyphs */\r
  19     CS_ISO8859_2,\r
  20     CS_ISO8859_3,\r
  21     CS_ISO8859_4,\r
  22     CS_ISO8859_5,\r
  23     CS_ISO8859_6,\r
  24     CS_ISO8859_7,\r
  25     CS_ISO8859_8,\r
  26     CS_ISO8859_9,\r
  27     CS_ISO8859_10,\r
  28     CS_ISO8859_11,\r
  29     CS_ISO8859_13,\r
  30     CS_ISO8859_14,\r
  31     CS_ISO8859_15,\r
  32     CS_ISO8859_16,\r
  33     CS_CP437,\r
  34     CS_CP850,\r
  35     CS_CP866,\r
  36     CS_CP1250,\r
  37     CS_CP1251,\r
  38     CS_CP1252,\r
  39     CS_CP1253,\r
  40     CS_CP1254,\r
  41     CS_CP1255,\r
  42     CS_CP1256,\r
  43     CS_CP1257,\r
  44     CS_CP1258,\r
  45     CS_KOI8_R,\r
  46     CS_KOI8_U,\r
  47     CS_MAC_ROMAN,\r
  48     CS_MAC_TURKISH,\r
  49     CS_MAC_CROATIAN,\r
  50     CS_MAC_ICELAND,\r
  51     CS_MAC_ROMANIAN,\r
  52     CS_MAC_GREEK,\r
  53     CS_MAC_CYRILLIC,\r
  54     CS_MAC_THAI,\r
  55     CS_MAC_CENTEURO,\r
  56     CS_MAC_SYMBOL,\r
  57     CS_MAC_DINGBATS,\r
  58     CS_MAC_ROMAN_OLD,\r
  59     CS_MAC_CROATIAN_OLD,\r
  60     CS_MAC_ICELAND_OLD,\r
  61     CS_MAC_ROMANIAN_OLD,\r
  62     CS_MAC_GREEK_OLD,\r
  63     CS_MAC_CYRILLIC_OLD,\r
  64     CS_MAC_UKRAINE,\r
  65     CS_MAC_VT100,\r
  66     CS_MAC_VT100_OLD,\r
  67     CS_VISCII,\r
  68     CS_HP_ROMAN8,\r
  69     CS_DEC_MCS,\r
  70     CS_UTF8\r
  71 } charset_t;\r
  72 \r
  73 typedef struct {\r
  74     unsigned long s0;\r
  75 } charset_state;\r
  76 \r
  77 /*\r
  78  * Routine to convert a MB/SB character set to Unicode.\r
  79  * \r
  80  * This routine accepts some number of bytes, updates a state\r
  81  * variable, and outputs some number of Unicode characters. There\r
  82  * are no guarantees. You can't even guarantee that at most one\r
  83  * Unicode character will be output per byte you feed in; for\r
  84  * example, suppose you're reading UTF-8, you've seen E1 80, and\r
  85  * then you suddenly see FE. Now you need to output _two_ error\r
  86  * characters - one for the incomplete sequence E1 80, and one for\r
  87  * the completely invalid UTF-8 byte FE.\r
  88  * \r
  89  * Returns the number of wide characters output; will never output\r
  90  * more than the size of the buffer (as specified on input).\r
  91  * Advances the `input' pointer and decrements `inlen', to indicate\r
  92  * how far along the input string it got.\r
  93  * \r
  94  * The sequence of `errlen' wide characters pointed to by `errstr'\r
  95  * will be used to indicate a conversion error. If `errstr' is\r
  96  * NULL, `errlen' will be ignored, and the library will choose\r
  97  * something sensible to do on its own. For Unicode, this will be\r
  98  * U+FFFD (REPLACEMENT CHARACTER).\r
  99  */\r
 100 \r
 101 int charset_to_unicode(char **input, int *inlen, wchar_t *output, int outlen,\r
 102                        int charset, charset_state *state,\r
 103                        const wchar_t *errstr, int errlen);\r
 104 \r
 105 /*\r
 106  * Routine to convert Unicode to an MB/SB character set.\r
 107  * \r
 108  * This routine accepts some number of Unicode characters, updates\r
 109  * a state variable, and outputs some number of bytes.\r
 110  * \r
 111  * Returns the number of bytes characters output; will never output\r
 112  * more than the size of the buffer (as specified on input), and\r
 113  * will never output a partial MB character. Advances the `input'\r
 114  * pointer and decrements `inlen', to indicate how far along the\r
 115  * input string it got.\r
 116  * \r
 117  * The sequence of `errlen' characters pointed to by `errstr' will\r
 118  * be used to indicate a conversion error. If `errstr' is NULL,\r
 119  * `errlen' will be ignored, and the library will choose something\r
 120  * sensible to do on its own (which will vary depending on the\r
 121  * output charset).\r
 122  */\r
 123 \r
 124 int charset_from_unicode(wchar_t **input, int *inlen, char *output, int outlen,\r
 125                          int charset, charset_state *state,\r
 126                          const char *errstr, int errlen);\r
 127 \r
 128 /*\r
 129  * Convert X11 encoding names to and from our charset identifiers.\r
 130  */\r
 131 const char *charset_to_xenc(int charset);\r
 132 int charset_from_xenc(const char *name);\r
 133 \r
 134 /*\r
 135  * Convert MIME encoding names to and from our charset identifiers.\r
 136  */\r
 137 const char *charset_to_mimeenc(int charset);\r
 138 int charset_from_mimeenc(const char *name);\r
 139 \r
 140 /*\r
 141  * Convert our own encoding names to and from our charset\r
 142  * identifiers.\r
 143  */\r
 144 const char *charset_to_localenc(int charset);\r
 145 int charset_from_localenc(const char *name);\r
 146 int charset_localenc_nth(int n);\r
 147 \r
 148 /*\r
 149  * Convert Mac OS script/region/font to our charset identifiers.\r
 150  */\r
 151 int charset_from_macenc(int script, int region, int sysvers,\r
 152                         const char *fontname);\r
 153 \r
 154 #endif /* charset_charset_h */\r