OSDN Git Service

Del RefLog work.
[tortoisegit/TortoiseGitJp.git] / src / TortoiseMerge / FileTextLines.cpp
1 // TortoiseMerge - a Diff/Patch program\r
2 \r
3 // Copyright (C) 2007-2008 - TortoiseSVN\r
4 \r
5 // This program is free software; you can redistribute it and/or\r
6 // modify it under the terms of the GNU General Public License\r
7 // as published by the Free Software Foundation; either version 2\r
8 // of the License, or (at your option) any later version.\r
9 \r
10 // This program is distributed in the hope that it will be useful,\r
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of\r
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\r
13 // GNU General Public License for more details.\r
14 \r
15 // You should have received a copy of the GNU General Public License\r
16 // along with this program; if not, write to the Free Software Foundation,\r
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\r
18 //\r
19 #include "StdAfx.h"\r
20 #include "Resource.h"\r
21 #include "UnicodeUtils.h"\r
22 #include "registry.h"\r
23 #include ".\filetextlines.h"\r
24 \r
25 \r
26 CFileTextLines::CFileTextLines(void)\r
27 {\r
28 }\r
29 \r
30 CFileTextLines::~CFileTextLines(void)\r
31 {\r
32 }\r
33 \r
34 CFileTextLines::UnicodeType CFileTextLines::CheckUnicodeType(LPVOID pBuffer, int cb)\r
35 {\r
36         if (cb < 2)\r
37                 return CFileTextLines::ASCII;\r
38         UINT16 * pVal = (UINT16 *)pBuffer;\r
39         UINT8 * pVal2 = (UINT8 *)(pVal+1);\r
40         // scan the whole buffer for a 0x0000 sequence\r
41         // if found, we assume a binary file\r
42         for (int i=0; i<(cb-2); i=i+2)\r
43         {\r
44                 if (0x0000 == *pVal++)\r
45                         return CFileTextLines::BINARY;\r
46         }\r
47         pVal = (UINT16 *)pBuffer;\r
48         if (*pVal == 0xFEFF)\r
49                 return CFileTextLines::UNICODE_LE;\r
50         if (cb < 3)\r
51                 return ASCII;\r
52         if (*pVal == 0xBBEF)\r
53         {\r
54                 if (*pVal2 == 0xBF)\r
55                         return CFileTextLines::UTF8BOM;\r
56         }\r
57         // check for illegal UTF8 chars\r
58         pVal2 = (UINT8 *)pBuffer;\r
59         for (int i=0; i<cb; ++i)\r
60         {\r
61                 if ((*pVal2 == 0xC0)||(*pVal2 == 0xC1)||(*pVal2 >= 0xF5))\r
62                         return CFileTextLines::ASCII;\r
63                 pVal2++;\r
64         }\r
65         pVal2 = (UINT8 *)pBuffer;\r
66         bool bUTF8 = false;\r
67         bool bNonANSI = false;\r
68         for (int i=0; i<(cb-3); ++i)\r
69         {\r
70                 if (*pVal2 > 127)\r
71                         bNonANSI = true;\r
72                 if ((*pVal2 & 0xE0)==0xC0)\r
73                 {\r
74                         pVal2++;i++;\r
75                         if ((*pVal2 & 0xC0)!=0x80)\r
76                                 return CFileTextLines::ASCII;\r
77                         bUTF8 = true;\r
78                 }\r
79                 if ((*pVal2 & 0xF0)==0xE0)\r
80                 {\r
81                         pVal2++;i++;\r
82                         if ((*pVal2 & 0xC0)!=0x80)\r
83                                 return CFileTextLines::ASCII;\r
84                         pVal2++;i++;\r
85                         if ((*pVal2 & 0xC0)!=0x80)\r
86                                 return CFileTextLines::ASCII;\r
87                         bUTF8 = true;\r
88                 }\r
89                 if ((*pVal2 & 0xF8)==0xF0)\r
90                 {\r
91                         pVal2++;i++;\r
92                         if ((*pVal2 & 0xC0)!=0x80)\r
93                                 return CFileTextLines::ASCII;\r
94                         pVal2++;i++;\r
95                         if ((*pVal2 & 0xC0)!=0x80)\r
96                                 return CFileTextLines::ASCII;\r
97                         pVal2++;i++;\r
98                         if ((*pVal2 & 0xC0)!=0x80)\r
99                                 return CFileTextLines::ASCII;\r
100                         bUTF8 = true;\r
101                 }\r
102                 pVal2++;\r
103         }\r
104         if (bUTF8)\r
105                 return CFileTextLines::UTF8;\r
106         if ((!bNonANSI)&&(DWORD(CRegDWORD(_T("Software\\TortoiseMerge\\UseUTF8"), FALSE))))\r
107                 return CFileTextLines::UTF8;\r
108         return CFileTextLines::ASCII;\r
109 }\r
110 \r
111 \r
112 EOL CFileTextLines::CheckLineEndings(LPVOID pBuffer, int cb)\r
113 {\r
114         EOL retval = EOL_AUTOLINE;\r
115         char * buf = (char *)pBuffer;\r
116         for (int i=0; i<cb; i++)\r
117         {\r
118                 //now search the buffer for line endings\r
119                 if (buf[i] == 0x0a)\r
120                 {\r
121                         if ((i+1)<cb)\r
122                         {\r
123                                 if (buf[i+1] == 0)\r
124                                 {\r
125                                         //UNICODE\r
126                                         if ((i+2)<cb)\r
127                                         {\r
128                                                 if (buf[i+2] == 0x0d)\r
129                                                 {\r
130                                                         retval = EOL_LFCR;\r
131                                                         break;\r
132                                                 }\r
133                                                 else\r
134                                                 {\r
135                                                         retval = EOL_LF;\r
136                                                         break;\r
137                                                 }\r
138                                         }\r
139                                 }\r
140                                 else if (buf[i+1] == 0x0d)\r
141                                 {\r
142                                         retval = EOL_LFCR;\r
143                                         break;\r
144                                 }\r
145                         }\r
146                         retval = EOL_LF;\r
147                         break;\r
148                 }\r
149                 else if (buf[i] == 0x0d)\r
150                 {\r
151                         if ((i+1)<cb)\r
152                         {\r
153                                 if (buf[i+1] == 0)\r
154                                 {\r
155                                         //UNICODE\r
156                                         if ((i+2)<cb)\r
157                                         {\r
158                                                 if (buf[i+2] == 0x0a)\r
159                                                 {\r
160                                                         retval = EOL_CRLF;\r
161                                                         break;\r
162                                                 }\r
163                                                 else\r
164                                                 {\r
165                                                         retval = EOL_CR;\r
166                                                         break;\r
167                                                 }\r
168                                         }\r
169                                 }\r
170                                 else if (buf[i+1] == 0x0a)\r
171                                 {\r
172                                         retval = EOL_CRLF;\r
173                                         break;\r
174                                 }\r
175                         }\r
176                         retval = EOL_CR;\r
177                         break;\r
178                 }\r
179         } \r
180         return retval;  \r
181 }\r
182 \r
183 BOOL CFileTextLines::Load(const CString& sFilePath, int lengthHint /* = 0*/)\r
184 {\r
185         m_LineEndings = EOL_AUTOLINE;\r
186         m_UnicodeType = CFileTextLines::AUTOTYPE;\r
187         RemoveAll();\r
188         m_endings.clear();\r
189         if(lengthHint != 0)\r
190         {\r
191                 Reserve(lengthHint);\r
192         }\r
193         \r
194         if (PathIsDirectory(sFilePath))\r
195         {\r
196                 m_sErrorString.Format(IDS_ERR_FILE_NOTAFILE, (LPCTSTR)sFilePath);\r
197                 return FALSE;\r
198         }\r
199         \r
200         if (!PathFileExists(sFilePath))\r
201         {\r
202                 //file does not exist, so just return SUCCESS\r
203                 return TRUE;\r
204         }\r
205 \r
206         HANDLE hFile = CreateFile(sFilePath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, NULL, NULL);\r
207         if (hFile == INVALID_HANDLE_VALUE)\r
208         {\r
209                 SetErrorString();\r
210                 return FALSE;\r
211         }\r
212 \r
213         LARGE_INTEGER fsize;\r
214         if (!GetFileSizeEx(hFile, &fsize))\r
215         {\r
216                 SetErrorString();\r
217                 CloseHandle(hFile);\r
218                 return false;\r
219         }\r
220         if (fsize.HighPart)\r
221         {\r
222                 // file is way too big for us\r
223                 CloseHandle(hFile);\r
224                 m_sErrorString.LoadString(IDS_ERR_FILE_TOOBIG);\r
225                 return FALSE;\r
226         }\r
227 \r
228         LPVOID pFileBuf = new BYTE[fsize.LowPart];\r
229         DWORD dwReadBytes = 0;\r
230         if (!ReadFile(hFile, pFileBuf, fsize.LowPart, &dwReadBytes, NULL))\r
231         {\r
232                 SetErrorString();\r
233                 CloseHandle(hFile);\r
234                 return FALSE;\r
235         }\r
236         if (m_UnicodeType == CFileTextLines::AUTOTYPE)\r
237         {\r
238                 m_UnicodeType = this->CheckUnicodeType(pFileBuf, dwReadBytes);\r
239         }\r
240         if (m_LineEndings == EOL_AUTOLINE)\r
241         {\r
242                 m_LineEndings = CheckLineEndings(pFileBuf, min(10000, dwReadBytes));\r
243         }\r
244         CloseHandle(hFile);\r
245 \r
246         if (m_UnicodeType == CFileTextLines::BINARY)\r
247         {\r
248                 m_sErrorString.Format(IDS_ERR_FILE_BINARY, (LPCTSTR)sFilePath);\r
249                 delete [] pFileBuf;\r
250                 return FALSE;\r
251         }\r
252 \r
253         // we may have to convert the file content\r
254         if ((m_UnicodeType == UTF8)||(m_UnicodeType == UTF8BOM))\r
255         {\r
256                 int ret = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)pFileBuf, dwReadBytes, NULL, 0);\r
257                 wchar_t * pWideBuf = new wchar_t[ret];\r
258                 int ret2 = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)pFileBuf, dwReadBytes, pWideBuf, ret);\r
259                 if (ret2 == ret)\r
260                 {\r
261                         delete [] pFileBuf;\r
262                         pFileBuf = pWideBuf;\r
263                         dwReadBytes = ret2;\r
264                 }\r
265         }\r
266         else if (m_UnicodeType == ASCII)\r
267         {\r
268                 int ret = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, (LPCSTR)pFileBuf, dwReadBytes, NULL, 0);\r
269                 wchar_t * pWideBuf = new wchar_t[ret];\r
270                 int ret2 = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, (LPCSTR)pFileBuf, dwReadBytes, pWideBuf, ret);\r
271                 if (ret2 == ret)\r
272                 {\r
273                         delete [] pFileBuf;\r
274                         pFileBuf = pWideBuf;\r
275                         dwReadBytes = ret2;\r
276                 }\r
277         }\r
278         // fill in the lines into the array\r
279         wchar_t * pTextBuf = (wchar_t *)pFileBuf;\r
280         wchar_t * pLineStart = (wchar_t *)pFileBuf;\r
281         if (m_UnicodeType == UNICODE_LE) \r
282         {\r
283                 // UTF16 have two bytes per char\r
284                 dwReadBytes/=2; \r
285         }\r
286         if ((m_UnicodeType == UTF8BOM)||(m_UnicodeType == UNICODE_LE))\r
287         {\r
288                 // ignore the BOM\r
289                 ++pTextBuf; \r
290                 ++pLineStart;\r
291                 --dwReadBytes; \r
292         }\r
293 \r
294         for (DWORD i = 0; i<dwReadBytes; ++i)\r
295         {\r
296                 if (*pTextBuf == '\r')\r
297                 {\r
298                         if ((i + 1) < dwReadBytes)\r
299                         {\r
300                                 if (*(pTextBuf+1) == '\n')\r
301                                 {\r
302                                         // crlf line ending\r
303                                         CString line(pLineStart, pTextBuf-pLineStart);\r
304                                         Add(line, EOL_CRLF);\r
305                                         pLineStart = pTextBuf+2;\r
306                                         ++pTextBuf;\r
307                                         ++i;\r
308                                 }\r
309                                 else\r
310                                 {\r
311                                         // cr line ending\r
312                                         CString line(pLineStart, pTextBuf-pLineStart);\r
313                                         Add(line, EOL_CR);\r
314                                         pLineStart =pTextBuf+1;\r
315                                 }\r
316                         }\r
317                 }\r
318                 else if (*pTextBuf == '\n')\r
319                 {\r
320                         // lf line ending\r
321                         CString line(pLineStart, pTextBuf-pLineStart);\r
322                         Add(line, EOL_LF);\r
323                         pLineStart =pTextBuf+1;\r
324                 }\r
325                 ++pTextBuf;\r
326         }\r
327         if (pLineStart < pTextBuf)\r
328         {\r
329                 CString line(pLineStart, pTextBuf-pLineStart);\r
330                 Add(line, EOL_NOENDING);\r
331                 m_bReturnAtEnd = false;         \r
332         }\r
333         else\r
334                 m_bReturnAtEnd = true;\r
335 \r
336         delete [] pFileBuf;\r
337 \r
338 \r
339         return TRUE;\r
340 }\r
341 \r
342 void CFileTextLines::StripWhiteSpace(CString& sLine,DWORD dwIgnoreWhitespaces, bool blame)\r
343 {\r
344         if (blame)\r
345         {\r
346                 if (sLine.GetLength() > 66)\r
347                         sLine = sLine.Mid(66);\r
348         }\r
349         switch (dwIgnoreWhitespaces)\r
350         {\r
351         case 0:\r
352                 // Compare whitespaces\r
353                 // do nothing\r
354                 break;\r
355         case 1: \r
356                 // Ignore all whitespaces\r
357                 sLine.TrimLeft(_T(" \t"));\r
358                 sLine.TrimRight(_T(" \t"));\r
359                 break;\r
360         case 2:\r
361                 // Ignore leading whitespace\r
362                 sLine.TrimLeft(_T(" \t"));\r
363                 break;\r
364         case 3:\r
365                 // Ignore ending whitespace\r
366                 sLine.TrimRight(_T(" \t"));\r
367                 break;\r
368         }\r
369 }\r
370 \r
371 void CFileTextLines::StripAsciiWhiteSpace(CStringA& sLine,DWORD dwIgnoreWhitespaces, bool blame)\r
372 {\r
373         if (blame)\r
374         {\r
375                 if (sLine.GetLength() > 66)\r
376                         sLine = sLine.Mid(66);\r
377         }\r
378         switch (dwIgnoreWhitespaces)\r
379         {\r
380         case 0: // Compare whitespaces\r
381                 // do nothing\r
382                 break;\r
383         case 1:\r
384                 // Ignore all whitespaces\r
385                 StripAsciiWhiteSpace(sLine);\r
386                 break;\r
387         case 2:\r
388                 // Ignore leading whitespace\r
389                 sLine.TrimLeft(" \t");\r
390                 break;\r
391         case 3:\r
392                 // Ignore leading whitespace\r
393                 sLine.TrimRight(" \t");\r
394                 break;\r
395         }\r
396 }\r
397 \r
398 //\r
399 // Fast in-place removal of spaces and tabs from CStringA line\r
400 //\r
401 void CFileTextLines::StripAsciiWhiteSpace(CStringA& sLine)\r
402 {\r
403         int outputLen = 0;\r
404         char* pWriteChr = sLine.GetBuffer(sLine.GetLength());\r
405         const char* pReadChr = pWriteChr;\r
406         while(*pReadChr)\r
407         {\r
408                 if(*pReadChr != ' ' && *pReadChr != '\t')\r
409                 {\r
410                         *pWriteChr++ = *pReadChr;\r
411                         outputLen++;\r
412                 }\r
413                 ++pReadChr;\r
414         }\r
415         *pWriteChr = '\0';\r
416         sLine.ReleaseBuffer(outputLen);\r
417 }\r
418 \r
419 BOOL CFileTextLines::Save(const CString& sFilePath, bool bSaveAsUTF8, DWORD dwIgnoreWhitespaces /*=0*/, BOOL bIgnoreCase /*= FALSE*/, bool bBlame /*= false*/)\r
420 {\r
421         try\r
422         {\r
423                 CString destPath = sFilePath;\r
424                 // now make sure that the destination directory exists\r
425                 int ind = 0;\r
426                 while (destPath.Find('\\', ind)>=2)\r
427                 {\r
428                         if (!PathIsDirectory(destPath.Left(destPath.Find('\\', ind))))\r
429                         {\r
430                                 if (!CreateDirectory(destPath.Left(destPath.Find('\\', ind)), NULL))\r
431                                         return FALSE;\r
432                         }\r
433                         ind = destPath.Find('\\', ind)+1;\r
434                 }\r
435                 \r
436                 CStdioFile file;                        // Hugely faster than CFile for big file writes - because it uses buffering\r
437                 if (!file.Open(sFilePath, CFile::modeCreate | CFile::modeWrite | CFile::typeBinary))\r
438                 {\r
439                         m_sErrorString.Format(IDS_ERR_FILE_OPEN, (LPCTSTR)sFilePath);\r
440                         return FALSE;\r
441                 }\r
442                 if ((!bSaveAsUTF8)&&(m_UnicodeType == CFileTextLines::UNICODE_LE))\r
443                 {\r
444                         //first write the BOM\r
445                         UINT16 wBOM = 0xFEFF;\r
446                         file.Write(&wBOM, 2);\r
447                         for (int i=0; i<GetCount(); i++)\r
448                         {\r
449                                 CString sLine = GetAt(i);\r
450                                 EOL ending = GetLineEnding(i);\r
451                                 StripWhiteSpace(sLine,dwIgnoreWhitespaces, bBlame);\r
452                                 if (bIgnoreCase)\r
453                                         sLine = sLine.MakeLower();\r
454                                 file.Write((LPCTSTR)sLine, sLine.GetLength()*sizeof(TCHAR));\r
455                                 if ((ending == EOL_AUTOLINE)||(ending == EOL_NOENDING))\r
456                                         ending = m_LineEndings;\r
457                                 switch (ending)\r
458                                 {\r
459                                 case EOL_CR:\r
460                                         sLine = _T("\x0d");\r
461                                         break;\r
462                                 case EOL_CRLF:\r
463                                 case EOL_AUTOLINE:\r
464                                         sLine = _T("\x0d\x0a");\r
465                                         break;\r
466                                 case EOL_LF:\r
467                                         sLine = _T("\x0a");\r
468                                         break;\r
469                                 case EOL_LFCR:\r
470                                         sLine = _T("\x0a\x0d");\r
471                                         break;\r
472                                 }\r
473                                 if ((m_bReturnAtEnd)||(i != GetCount()-1))\r
474                                         file.Write((LPCTSTR)sLine, sLine.GetLength()*sizeof(TCHAR));\r
475                         }\r
476                 }\r
477                 else if ((!bSaveAsUTF8)&&((m_UnicodeType == CFileTextLines::ASCII)||(m_UnicodeType == CFileTextLines::AUTOTYPE)))\r
478                 {\r
479                         for (int i=0; i< GetCount(); i++)\r
480                         {\r
481                                 // Copy CString to 8 bit without conversion\r
482                                 CString sLineT = GetAt(i);\r
483                                 CStringA sLine = CStringA(sLineT);\r
484                                 EOL ending = GetLineEnding(i);\r
485 \r
486                                 StripAsciiWhiteSpace(sLine,dwIgnoreWhitespaces, bBlame);\r
487                                 if (bIgnoreCase)\r
488                                         sLine = sLine.MakeLower();\r
489                                 if ((m_bReturnAtEnd)||(i != GetCount()-1))\r
490                                 {\r
491                                         if ((ending == EOL_AUTOLINE)||(ending == EOL_NOENDING))\r
492                                                 ending = m_LineEndings;\r
493                                         switch (ending)\r
494                                         {\r
495                                         case EOL_CR:\r
496                                                 sLine += '\x0d';\r
497                                                 break;\r
498                                         case EOL_CRLF:\r
499                                         case EOL_AUTOLINE:\r
500                                                 sLine.Append("\x0d\x0a", 2);\r
501                                                 break;\r
502                                         case EOL_LF:\r
503                                                 sLine += '\x0a';\r
504                                                 break;\r
505                                         case EOL_LFCR:\r
506                                                 sLine.Append("\x0a\x0d", 2);\r
507                                                 break;\r
508                                         }\r
509                                 }\r
510                                 file.Write((LPCSTR)sLine, sLine.GetLength());\r
511                         }\r
512                 }\r
513                 else if ((bSaveAsUTF8)||((m_UnicodeType == CFileTextLines::UTF8BOM)||(m_UnicodeType == CFileTextLines::UTF8)))\r
514                 {\r
515                         if (m_UnicodeType == CFileTextLines::UTF8BOM)\r
516                         {\r
517                                 //first write the BOM\r
518                                 UINT16 wBOM = 0xBBEF;\r
519                                 file.Write(&wBOM, 2);\r
520                                 UINT8 uBOM = 0xBF;\r
521                                 file.Write(&uBOM, 1);\r
522                         }\r
523                         for (int i=0; i<GetCount(); i++)\r
524                         {\r
525                                 CStringA sLine = CUnicodeUtils::GetUTF8(GetAt(i));\r
526                                 EOL ending = GetLineEnding(i);\r
527                                 StripAsciiWhiteSpace(sLine,dwIgnoreWhitespaces, bBlame);\r
528                                 if (bIgnoreCase)\r
529                                         sLine = sLine.MakeLower();\r
530 \r
531                                 if ((m_bReturnAtEnd)||(i != GetCount()-1))\r
532                                 {\r
533                                         if ((ending == EOL_AUTOLINE)||(ending == EOL_NOENDING))\r
534                                                 ending = m_LineEndings;\r
535                                         switch (ending)\r
536                                         {\r
537                                         case EOL_CR:\r
538                                                 sLine += '\x0d';\r
539                                                 break;\r
540                                         case EOL_CRLF:\r
541                                         case EOL_AUTOLINE:\r
542                                                 sLine.Append("\x0d\x0a",2);\r
543                                                 break;\r
544                                         case EOL_LF:\r
545                                                 sLine += '\x0a';\r
546                                                 break;\r
547                                         case EOL_LFCR:\r
548                                                 sLine.Append("\x0a\x0d",2);\r
549                                                 break;\r
550                                         }\r
551                                 }\r
552                                 file.Write((LPCSTR)sLine, sLine.GetLength());\r
553                         }\r
554                 }\r
555                 file.Close();\r
556         }\r
557         catch (CException * e)\r
558         {\r
559                 e->GetErrorMessage(m_sErrorString.GetBuffer(4096), 4096);\r
560                 m_sErrorString.ReleaseBuffer();\r
561                 e->Delete();\r
562                 return FALSE;\r
563         }\r
564         return TRUE;\r
565 }\r
566 \r
567 void CFileTextLines::SetErrorString()\r
568 {\r
569                 LPVOID lpMsgBuf;\r
570                 FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | \r
571                         FORMAT_MESSAGE_FROM_SYSTEM | \r
572                         FORMAT_MESSAGE_IGNORE_INSERTS,\r
573                         NULL,\r
574                         ::GetLastError(),\r
575                         MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language\r
576                         (LPTSTR) &lpMsgBuf,\r
577                         0,\r
578                         NULL \r
579                         );\r
580                 m_sErrorString = (LPCTSTR)lpMsgBuf;\r
581                 LocalFree( lpMsgBuf );\r
582 }\r
583 \r
584 void CFileTextLines::CopySettings(CFileTextLines * pFileToCopySettingsTo)\r
585 {\r
586         if (pFileToCopySettingsTo)\r
587         {\r
588                 pFileToCopySettingsTo->m_UnicodeType = m_UnicodeType;\r
589                 pFileToCopySettingsTo->m_LineEndings = m_LineEndings;\r
590         }\r
591 }\r
592 \r
593 \r
594 \r
595 \r
596 \r