OSDN Git Service

Fix bugs of corruption during conversion from Shift_JIS to UTF-8.
[ffftp/ffftp.git] / codecnv.c
index 2ac55e7..f9a9e37 100644 (file)
--- a/codecnv.c
+++ b/codecnv.c
@@ -31,7 +31,9 @@
 #include <stdio.h>\r
 #include <stdlib.h>\r
 #include <string.h>\r
-#include <winsock.h>\r
+// IPv6対応\r
+//#include <winsock.h>\r
+#include <winsock2.h>\r
 #include <mbstring.h>\r
 #include <windowsx.h>\r
 \r
@@ -368,6 +370,8 @@ void InitCodeConvInfo(CODECONVINFO *cInfo)
        cInfo->KanaProc = NULL;\r
        // UTF-8対応\r
        cInfo->EscUTF8Len = 0;\r
+       cInfo->EscFlush = NO;\r
+       cInfo->FlushProc = NULL;\r
        return;\r
 }\r
 \r
@@ -388,6 +392,13 @@ int FlushRestData(CODECONVINFO *cInfo)
 {\r
        char *Put;\r
 \r
+       // UTF-8対応\r
+       if(cInfo->FlushProc != NULL)\r
+       {\r
+               cInfo->EscFlush = YES;\r
+               return cInfo->FlushProc(cInfo);\r
+       }\r
+\r
        Put = cInfo->Buf;\r
 \r
        if(cInfo->KanaProc != NULL)\r
@@ -399,9 +410,6 @@ int FlushRestData(CODECONVINFO *cInfo)
                *Put++ = cInfo->EscCode[0];\r
        if(cInfo->EscProc == 2)\r
                *Put++ = cInfo->EscCode[1];\r
-       // UTF-8対応\r
-       memcpy(Put, cInfo->EscUTF8, sizeof(char) * cInfo->EscUTF8Len);\r
-       Put += cInfo->EscUTF8Len;\r
 \r
        cInfo->OutLen = Put - cInfo->Buf;\r
 \r
@@ -1479,12 +1487,61 @@ static int CheckOnEUC(uchar *Pos, uchar *Btm)
 *              くり返しフラグがYESの時は、cInfoの内容を変えずにもう一度呼ぶこと\r
 *----------------------------------------------------------------------------*/\r
 \r
+// UTF-8対応\r
+// UTF-8からShift_JISへの変換後のバイト列が確定可能な長さを変換後の長さで返す\r
+// バイナリ            UTF-8       戻り値 Shift_JIS\r
+// E3 81 82 E3 81 84   あい     -> 2      82 A0   あ+結合文字の先頭バイトの可能性(い゛等)\r
+// E3 81 82 E3 81      あ+E3 81 -> 0              結合文字の先頭バイトの可能性\r
+// E3 81 82 E3         あ+E3    -> 0              結合文字の先頭バイトの可能性\r
+// E3 81 82            あ       -> 0              結合文字の先頭バイトの可能性\r
+int ConvUTF8NtoSJIS_TruncateToDelimiter(char* pUTF8, int UTF8Length, int* pNewUTF8Length)\r
+{\r
+       int UTF16Length;\r
+       wchar_t* pUTF16;\r
+       int SJISLength;\r
+       int NewSJISLength;\r
+       int NewUTF16Length;\r
+       // UTF-8の場合、不完全な文字は常に変換されない\r
+       // バイナリ            UTF-8       バイナリ      UTF-16 LE\r
+       // E3 81 82 E3 81 84   あい     -> 42 30 44 30   あい\r
+       // E3 81 82 E3 81      あ+E3 81 -> 42 30         あ\r
+       // E3 81 82 E3         あ+E3    -> 42 30         あ\r
+       UTF16Length = MultiByteToWideChar(CP_UTF8, 0, pUTF8, UTF8Length, NULL, 0);\r
+       if(!(pUTF16 = (wchar_t*)malloc(sizeof(wchar_t) * UTF16Length)))\r
+               return -1;\r
+       // Shift_JISへ変換した時に文字数が増減する位置がUnicode結合文字の区切り\r
+       UTF16Length = MultiByteToWideChar(CP_UTF8, 0, pUTF8, UTF8Length, pUTF16, UTF16Length);\r
+       SJISLength = WideCharToMultiByte(CP_ACP, 0, pUTF16, UTF16Length, NULL, 0, NULL, NULL);\r
+       NewSJISLength = SJISLength;\r
+       while(UTF8Length > 0 && NewSJISLength >= SJISLength)\r
+       {\r
+               UTF8Length--;\r
+               UTF16Length = MultiByteToWideChar(CP_UTF8, 0, pUTF8, UTF8Length, pUTF16, UTF16Length);\r
+               NewSJISLength = WideCharToMultiByte(CP_ACP, 0, pUTF16, UTF16Length, NULL, 0, NULL, NULL);\r
+       }\r
+       free(pUTF16);\r
+       // UTF-16 LE変換した時に文字数が増減する位置がUTF-8の区切り\r
+       if(pNewUTF8Length)\r
+       {\r
+               NewUTF16Length = UTF16Length;\r
+               while(UTF8Length > 0 && NewUTF16Length >= UTF16Length)\r
+               {\r
+                       UTF8Length--;\r
+                       NewUTF16Length = MultiByteToWideChar(CP_UTF8, 0, pUTF8, UTF8Length, NULL, 0);\r
+               }\r
+               if(UTF16Length > 0)\r
+                       UTF8Length++;\r
+               *pNewUTF8Length = UTF8Length;\r
+       }\r
+       return NewSJISLength;\r
+}\r
+\r
 int ConvUTF8NtoSJIS(CODECONVINFO *cInfo)\r
 {\r
        int Continue;\r
 \r
 //     char temp_string[2048];\r
-       int string_length;\r
+//     int string_length;\r
 \r
        // 大きいサイズに対応\r
        // 終端のNULLを含むバグを修正\r
@@ -1492,7 +1549,6 @@ int ConvUTF8NtoSJIS(CODECONVINFO *cInfo)
        char* pSrc;\r
        wchar_t* pUTF16;\r
        int UTF16Length;\r
-       int Count;\r
 \r
        Continue = NO;\r
 \r
@@ -1516,7 +1572,14 @@ int ConvUTF8NtoSJIS(CODECONVINFO *cInfo)
        memcpy(pSrc, cInfo->EscUTF8, sizeof(char) * cInfo->EscUTF8Len);\r
        memcpy(pSrc + cInfo->EscUTF8Len, cInfo->Str, sizeof(char) * cInfo->StrLen);\r
        *(pSrc + SrcLength) = '\0';\r
-       // UTF-8の場合、不完全な文字は常に変換されない\r
+       if(cInfo->EscFlush == NO)\r
+       {\r
+               // バッファに収まらないため変換文字数を半減\r
+               while(SrcLength > 0 && ConvUTF8NtoSJIS_TruncateToDelimiter(pSrc, SrcLength, &SrcLength) > cInfo->BufSize)\r
+               {\r
+                       SrcLength = SrcLength / 2;\r
+               }\r
+       }\r
        UTF16Length = MultiByteToWideChar(CP_UTF8, 0, pSrc, SrcLength, NULL, 0);\r
 \r
        // サイズ0 or バッファサイズより大きい場合は\r
@@ -1557,7 +1620,6 @@ int ConvUTF8NtoSJIS(CODECONVINFO *cInfo)
 //                                             0,                              // 格納先サイズ\r
 //                                             NULL,NULL\r
 //                                     );\r
-       string_length = WideCharToMultiByte(CP_ACP, 0, pUTF16, UTF16Length, NULL, 0, NULL, NULL);\r
 \r
        // サイズ0 or 出力バッファサイズより大きい場合は、\r
        // cInfo->Bufの最初に'\0'を入れて、\r
@@ -1583,20 +1645,10 @@ int ConvUTF8NtoSJIS(CODECONVINFO *cInfo)
 //             NULL,NULL\r
 //     );\r
        cInfo->OutLen = WideCharToMultiByte(CP_ACP, 0, pUTF16, UTF16Length, cInfo->Buf, cInfo->BufSize, NULL, NULL);\r
-       // バッファに収まらないため変換文字数を半減\r
-       while(cInfo->OutLen == 0 && UTF16Length > 0)\r
-       {\r
-               UTF16Length = UTF16Length / 2;\r
-               cInfo->OutLen = WideCharToMultiByte(CP_ACP, 0, pUTF16, UTF16Length, cInfo->Buf, cInfo->BufSize, NULL, NULL);\r
-       }\r
-       // 変換された元の文字列での文字数を取得\r
-       Count = WideCharToMultiByte(CP_UTF8, 0, pUTF16, UTF16Length, NULL, 0, NULL, NULL);\r
-       // 変換可能な残りの文字数を取得\r
-       UTF16Length = MultiByteToWideChar(CP_UTF8, 0, pSrc + Count, SrcLength - Count, NULL, 0);\r
-       cInfo->Str += Count - cInfo->EscUTF8Len;\r
-       cInfo->StrLen -= Count - cInfo->EscUTF8Len;\r
+       cInfo->Str += SrcLength - cInfo->EscUTF8Len;\r
+       cInfo->StrLen -= SrcLength - cInfo->EscUTF8Len;\r
        cInfo->EscUTF8Len = 0;\r
-       if(UTF16Length > 0)\r
+       if(ConvUTF8NtoSJIS_TruncateToDelimiter(cInfo->Str, cInfo->StrLen, NULL) > 0)\r
                Continue = YES;\r
        else\r
        {\r
@@ -1605,6 +1657,7 @@ int ConvUTF8NtoSJIS(CODECONVINFO *cInfo)
                cInfo->EscUTF8Len = cInfo->StrLen;\r
                cInfo->Str += cInfo->StrLen;\r
                cInfo->StrLen = 0;\r
+               cInfo->FlushProc = ConvUTF8NtoSJIS;\r
                Continue = NO;\r
        }\r
 \r
@@ -1662,25 +1715,28 @@ int ConvSJIStoUTF8N(CODECONVINFO *cInfo)
        memcpy(pSrc, cInfo->EscUTF8, sizeof(char) * cInfo->EscUTF8Len);\r
        memcpy(pSrc + cInfo->EscUTF8Len, cInfo->Str, sizeof(char) * cInfo->StrLen);\r
        *(pSrc + SrcLength) = '\0';\r
-       // Shift_JISの場合、不完全な文字でも変換されることがあるため、末尾の不完全な部分を削る\r
-       Count = 0;\r
-       while(Count < SrcLength)\r
+       if(cInfo->EscFlush == NO)\r
        {\r
-               if(((unsigned char)*(pSrc + Count) >= 0x81 && (unsigned char)*(pSrc + Count) <= 0x9f) || (unsigned char)*(pSrc + Count) >= 0xe0)\r
+               // Shift_JISの場合、不完全な文字でも変換されることがあるため、末尾の不完全な部分を削る\r
+               Count = 0;\r
+               while(Count < SrcLength)\r
                {\r
-                       if((unsigned char)*(pSrc + Count + 1) >= 0x40)\r
-                               Count += 2;\r
-                       else\r
+                       if(((unsigned char)*(pSrc + Count) >= 0x81 && (unsigned char)*(pSrc + Count) <= 0x9f) || (unsigned char)*(pSrc + Count) >= 0xe0)\r
                        {\r
-                               if(Count + 2 > SrcLength)\r
-                                       break;\r
-                               Count += 1;\r
+                               if((unsigned char)*(pSrc + Count + 1) >= 0x40)\r
+                                       Count += 2;\r
+                               else\r
+                               {\r
+                                       if(Count + 2 > SrcLength)\r
+                                               break;\r
+                                       Count += 1;\r
+                               }\r
                        }\r
+                       else\r
+                               Count += 1;\r
                }\r
-               else\r
-                       Count += 1;\r
+               SrcLength = Count;\r
        }\r
-       SrcLength = Count;\r
        UTF16Length = MultiByteToWideChar(CP_ACP, 0, pSrc, SrcLength, NULL, 0);\r
 \r
        // サイズ0 or バッファサイズより大きい場合は、\r
@@ -1777,6 +1833,7 @@ int ConvSJIStoUTF8N(CODECONVINFO *cInfo)
                cInfo->EscUTF8Len = cInfo->StrLen;\r
                cInfo->Str += cInfo->StrLen;\r
                cInfo->StrLen = 0;\r
+               cInfo->FlushProc = ConvSJIStoUTF8N;\r
                Continue = NO;\r
        }\r
 \r