\r
#if defined(_MFC_VER) || defined(CSTRING_AVAILABLE)\r
\r
+struct CodeMap\r
+{\r
+ int m_Code;\r
+ TCHAR * m_CodeName;\r
+};\r
+int CUnicodeUtils::GetCPCode(CString &codename)\r
+{\r
+ static CodeMap map[]=\r
+ { \r
+ {037, _T("IBM037")},// IBM EBCDIC US-Canada \r
+ {437, _T("IBM437")},// OEM United States \r
+ {500, _T("IBM500")},// IBM EBCDIC International \r
+ {708, _T("ASMO-708")},// Arabic (ASMO 708) \r
+ {709, _T("Arabic")},// (ASMO-449+, BCON V4) \r
+ {710, _T("Arabic")},// - Transparent Arabic \r
+ {720, _T("DOS-720")},// Arabic (Transparent ASMO); Arabic (DOS) \r
+ {737, _T("ibm737")},// OEM Greek (formerly 437G); Greek (DOS) \r
+ {775, _T("ibm775")},// OEM Baltic; Baltic (DOS) \r
+ {850, _T("ibm850")},// OEM Multilingual Latin 1; Western European (DOS) \r
+ {852, _T("ibm852")},// OEM Latin 2; Central European (DOS) \r
+ {855, _T("IBM855")},// OEM Cyrillic (primarily Russian) \r
+ {857, _T("ibm857")},// OEM Turkish; Turkish (DOS) \r
+ {858, _T("IBM00858")},// OEM Multilingual Latin 1 + Euro symbol \r
+ {860, _T("IBM860")},// OEM Portuguese; Portuguese (DOS) \r
+ {861, _T("ibm861")},// OEM Icelandic; Icelandic (DOS) \r
+ {862, _T("DOS-862")},// OEM Hebrew; Hebrew (DOS) \r
+ {863, _T("IBM863")},// OEM French Canadian; French Canadian (DOS) \r
+ {864, _T("IBM864")},// OEM Arabic; Arabic (864) \r
+ {865, _T("IBM865")},// OEM Nordic; Nordic (DOS) \r
+ {866, _T("cp866")},// OEM Russian; Cyrillic (DOS) \r
+ {869, _T("ibm869")},// OEM Modern Greek; Greek, Modern (DOS) \r
+ {870, _T("IBM870")},// IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 \r
+ {874, _T("windows-874")},// ANSI/OEM Thai (same as 28605, ISO 8859-15); Thai (Windows) \r
+ {875, _T("cp875")},// IBM EBCDIC Greek Modern \r
+ {932, _T("shift_jis")},// ANSI/OEM Japanese; Japanese (Shift-JIS) \r
+ {936, _T("gb2312")},// ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) \r
+ {949, _T("ks_c_5601-1987")},// ANSI/OEM Korean (Unified Hangul Code) \r
+ {950, _T("big5")},// ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) \r
+ {1026,_T("IBM1026")},// IBM EBCDIC Turkish (Latin 5) \r
+ {1047,_T("IBM01047")},// IBM EBCDIC Latin 1/Open System \r
+ {1140,_T("IBM01140")},// IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro) \r
+ {1141, _T("IBM01141")},// IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro) \r
+ {1142, _T("IBM01142")},// IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro) \r
+ {1143, _T("IBM01143")},// IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro) \r
+ {1144, _T("IBM01144")},// IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro) \r
+ {1145, _T("IBM01145")},// IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro) \r
+ {1146, _T("IBM01146")},// IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro) \r
+ {1147, _T("IBM01147")},// IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro) \r
+ {1148, _T("IBM01148")},// IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro) \r
+ {1149, _T("IBM01149")},// IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro) \r
+ {1200, _T("utf-16")},// Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications \r
+ {1201, _T("unicodeFFFE")},// Unicode UTF-16, big endian byte order; available only to managed applications \r
+ {1250, _T("windows-1250")},// ANSI Central European; Central European (Windows) \r
+ {1251, _T("windows-1251")},// ANSI Cyrillic; Cyrillic (Windows) \r
+ {1252, _T("windows-1252")},// ANSI Latin 1; Western European (Windows) \r
+ {1253, _T("windows-1253")},// ANSI Greek; Greek (Windows) \r
+ {1254, _T("windows-1254")},// ANSI Turkish; Turkish (Windows) \r
+ {1255, _T("windows-1255")},// ANSI Hebrew; Hebrew (Windows) \r
+ {1256, _T("windows-1256")},// ANSI Arabic; Arabic (Windows) \r
+ {1257, _T("windows-1257")},// ANSI Baltic; Baltic (Windows) \r
+ {1258, _T("windows-1258")},// ANSI/OEM Vietnamese; Vietnamese (Windows) \r
+ {1361, _T("Johab")},// Korean (Johab) \r
+ {10000,_T("macintosh")},// MAC Roman; Western European (Mac) \r
+ {10001, _T("x-mac-japanese")},// Japanese (Mac) \r
+ {10002, _T("x-mac-chinesetrad")},// MAC Traditional Chinese (Big5); Chinese Traditional (Mac) \r
+ {10003, _T("x-mac-korean")},// Korean (Mac) \r
+ {10004, _T("x-mac-arabic")},// Arabic (Mac) \r
+ {10005, _T("x-mac-hebrew")},// Hebrew (Mac) \r
+ {10006, _T("x-mac-greek")},// Greek (Mac) \r
+ {10007, _T("x-mac-cyrillic")},// Cyrillic (Mac) \r
+ {10008, _T("x-mac-chinesesimp")},// MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac) \r
+ {10010, _T("x-mac-romanian")},// Romanian (Mac) \r
+ {10017, _T("x-mac-ukrainian")},// Ukrainian (Mac) \r
+ {10021, _T("x-mac-thai")},// Thai (Mac) \r
+ {10029, _T("x-mac-ce")},// MAC Latin 2; Central European (Mac) \r
+ {10079, _T("x-mac-icelandic")},// Icelandic (Mac) \r
+ {10081, _T("x-mac-turkish")},// Turkish (Mac) \r
+ {10082, _T("x-mac-croatian")},// Croatian (Mac) \r
+ {12000, _T("utf-32")},// Unicode UTF-32, little endian byte order; available only to managed applications \r
+ {12001, _T("utf-32BE")},// Unicode UTF-32, big endian byte order; available only to managed applications \r
+ {20000, _T("x-Chinese_CNS")},// CNS Taiwan; Chinese Traditional (CNS) \r
+ {20001, _T("x-cp20001")},// TCA Taiwan \r
+ {20002, _T("x_Chinese-Eten")},// Eten Taiwan; Chinese Traditional (Eten) \r
+ {20003, _T("x-cp20003")},// IBM5550 Taiwan \r
+ {20004, _T("x-cp20004")},// TeleText Taiwan \r
+ {20005, _T("x-cp20005")},// Wang Taiwan \r
+ {20105, _T("x-IA5")},// IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5) \r
+ {20106, _T("x-IA5-German")},// IA5 German (7-bit) \r
+ {20107, _T("x-IA5-Swedish")},// IA5 Swedish (7-bit) \r
+ {20108, _T("x-IA5-Norwegian")},// IA5 Norwegian (7-bit) \r
+ {20127, _T("us-ascii")},// US-ASCII (7-bit) \r
+ {20261, _T("x-cp20261")},// T.61 \r
+ {20269, _T("x-cp20269")},// ISO 6937 Non-Spacing Accent \r
+ {20273, _T("IBM273")},// IBM EBCDIC Germany \r
+ {20277, _T("IBM277")},//IBM EBCDIC Denmark-Norway \r
+ {20278, _T("IBM278")},// IBM EBCDIC Finland-Sweden \r
+ {20280, _T("IBM280")},// IBM EBCDIC Italy \r
+ {20284, _T("IBM284")},// IBM EBCDIC Latin America-Spain \r
+ {20285, _T("IBM285")},// IBM EBCDIC United Kingdom \r
+ {20290, _T("IBM290")},// IBM EBCDIC Japanese Katakana Extended \r
+ {20297, _T("IBM297")},// IBM EBCDIC France \r
+ {20420, _T("IBM420")},// IBM EBCDIC Arabic \r
+ {20423, _T("IBM423")},// IBM EBCDIC Greek \r
+ {20424, _T("IBM424")},// IBM EBCDIC Hebrew \r
+ {20833, _T("x-EBCDIC-KoreanExtended")},// IBM EBCDIC Korean Extended \r
+ {20838, _T("IBM-Thai")},// IBM EBCDIC Thai \r
+ {20866, _T("koi8-r")},// Russian (KOI8-R); Cyrillic (KOI8-R) \r
+ {20871, _T("IBM871")},// IBM EBCDIC Icelandic \r
+ {20880, _T("IBM880")},// IBM EBCDIC Cyrillic Russian \r
+ {20905, _T("IBM905")},// IBM EBCDIC Turkish \r
+ {20924, _T("IBM00924")},// IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) \r
+ {20932, _T("EUC-JP")},// Japanese (JIS 0208-1990 and 0121-1990) \r
+ {20936, _T("x-cp20936")},// Simplified Chinese (GB2312); Chinese Simplified (GB2312-80) \r
+ {20949, _T("x-cp20949")},// Korean Wansung \r
+ {21025, _T("cp1025")},// IBM EBCDIC Cyrillic Serbian-Bulgarian \r
+ {21027, _T("21027")},// (deprecated) \r
+ {21866, _T("koi8-u")},// Ukrainian (KOI8-U); Cyrillic (KOI8-U) \r
+ {28591, _T("iso-8859-1")},// ISO 8859-1 Latin 1; Western European (ISO) \r
+ {28592, _T("iso-8859-2")},// ISO 8859-2 Central European; Central European (ISO) \r
+ {28593, _T("iso-8859-3")},// ISO 8859-3 Latin 3 \r
+ {28594, _T("iso-8859-4")},// ISO 8859-4 Baltic \r
+ {28595, _T("iso-8859-5")},// ISO 8859-5 Cyrillic \r
+ {28596, _T("iso-8859-6")},// ISO 8859-6 Arabic \r
+ {28597, _T("iso-8859-7")},// ISO 8859-7 Greek \r
+ {28598, _T("iso-8859-8")},// ISO 8859-8 Hebrew; Hebrew (ISO-Visual) \r
+ {28599, _T("iso-8859-9")},// ISO 8859-9 Turkish \r
+ {28603, _T("iso-8859-13")},// ISO 8859-13 Estonian \r
+ {28605, _T("iso-8859-15")},// ISO 8859-15 Latin 9 \r
+ {29001, _T("x-Europa")},// Europa 3 \r
+ {38598, _T("iso-8859-8-i")},// ISO 8859-8 Hebrew; Hebrew (ISO-Logical) \r
+ {50220, _T("iso-2022-jp")},// ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) \r
+ {50221, _T("csISO2022JP")},// ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana) \r
+ {50222, _T("iso-2022-jp")},// ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI) \r
+ {50225, _T("iso-2022-kr")},// ISO 2022 Korean \r
+ {50227, _T("x-cp50227")},// ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022) \r
+ {50229, _T("ISO")},// 2022 Traditional Chinese \r
+ {50930, _T("EBCDIC")},// Japanese (Katakana) Extended \r
+ {50931, _T("EBCDIC")},// US-Canada and Japanese \r
+ {50933, _T("EBCDIC")},// Korean Extended and Korean \r
+ {50935, _T("EBCDIC")},// Simplified Chinese Extended and Simplified Chinese \r
+ {50936, _T("EBCDIC")},// Simplified Chinese \r
+ {50937, _T("EBCDIC")},// US-Canada and Traditional Chinese \r
+ {50939, _T("EBCDIC")},// Japanese (Latin) Extended and Japanese \r
+ {51932, _T("euc-jp")},// EUC Japanese \r
+ {51936, _T("EUC-CN")},// EUC Simplified Chinese; Chinese Simplified (EUC) \r
+ {51949, _T("euc-kr")},// EUC Korean \r
+ {51950, _T("EUC")},// Traditional Chinese \r
+ {52936, _T("hz-gb-2312")},// HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ) \r
+ {54936, _T("GB18030")},// Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030) \r
+ {57002, _T("x-iscii-de")},// ISCII Devanagari \r
+ {57003, _T("x-iscii-be")},// ISCII Bengali \r
+ {57004, _T("x-iscii-ta")},// ISCII Tamil \r
+ {57005, _T("x-iscii-te")},// ISCII Telugu \r
+ {57006, _T("x-iscii-as")},// ISCII Assamese \r
+ {57007, _T("x-iscii-or")},// ISCII Oriya \r
+ {57008, _T("x-iscii-ka")},// ISCII Kannada \r
+ {57009, _T("x-iscii-ma")},// ISCII Malayalam \r
+ {57010, _T("x-iscii-gu")},// ISCII Gujarati \r
+ {57011, _T("x-iscii-pa")},// ISCII Punjabi \r
+ {65000, _T("utf-7")},// Unicode (UTF-7) \r
+ {65001, _T("utf-8")},// Unicode (UTF-8) \r
+ {0,NULL}\r
+ \r
+ };\r
+ static CodeMap *p=map;\r
+ codename=codename.MakeLower();\r
+ while(p->m_CodeName != NULL)\r
+ {\r
+ CString str = p->m_CodeName;\r
+ str=str.MakeLower();\r
+\r
+ if( str == codename)\r
+ return p->m_Code;\r
+ p++;\r
+ }\r
+\r
+ return CP_UTF8;\r
+}\r
CStringA CUnicodeUtils::GetUTF8(const CStringW& string)\r
{\r
char * buf;\r