1 // TortoiseMerge - a Diff/Patch program
\r
3 // Copyright (C) 2007-2008 - TortoiseSVN
\r
5 // This program is free software; you can redistribute it and/or
\r
6 // modify it under the terms of the GNU General Public License
\r
7 // as published by the Free Software Foundation; either version 2
\r
8 // of the License, or (at your option) any later version.
\r
10 // This program is distributed in the hope that it will be useful,
\r
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
13 // GNU General Public License for more details.
\r
15 // You should have received a copy of the GNU General Public License
\r
16 // along with this program; if not, write to the Free Software Foundation,
\r
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\r
20 #include "Resource.h"
\r
21 #include "UnicodeUtils.h"
\r
22 #include "registry.h"
\r
23 #include ".\filetextlines.h"
\r
26 CFileTextLines::CFileTextLines(void)
\r
30 CFileTextLines::~CFileTextLines(void)
\r
34 CFileTextLines::UnicodeType CFileTextLines::CheckUnicodeType(LPVOID pBuffer, int cb)
\r
37 return CFileTextLines::ASCII;
\r
38 UINT16 * pVal = (UINT16 *)pBuffer;
\r
39 UINT8 * pVal2 = (UINT8 *)(pVal+1);
\r
40 // scan the whole buffer for a 0x0000 sequence
\r
41 // if found, we assume a binary file
\r
42 for (int i=0; i<(cb-2); i=i+2)
\r
44 if (0x0000 == *pVal++)
\r
45 return CFileTextLines::BINARY;
\r
47 pVal = (UINT16 *)pBuffer;
\r
48 if (*pVal == 0xFEFF)
\r
49 return CFileTextLines::UNICODE_LE;
\r
52 if (*pVal == 0xBBEF)
\r
55 return CFileTextLines::UTF8BOM;
\r
57 // check for illegal UTF8 chars
\r
58 pVal2 = (UINT8 *)pBuffer;
\r
59 for (int i=0; i<cb; ++i)
\r
61 if ((*pVal2 == 0xC0)||(*pVal2 == 0xC1)||(*pVal2 >= 0xF5))
\r
62 return CFileTextLines::ASCII;
\r
65 pVal2 = (UINT8 *)pBuffer;
\r
67 bool bNonANSI = false;
\r
68 for (int i=0; i<(cb-3); ++i)
\r
72 if ((*pVal2 & 0xE0)==0xC0)
\r
75 if ((*pVal2 & 0xC0)!=0x80)
\r
76 return CFileTextLines::ASCII;
\r
79 if ((*pVal2 & 0xF0)==0xE0)
\r
82 if ((*pVal2 & 0xC0)!=0x80)
\r
83 return CFileTextLines::ASCII;
\r
85 if ((*pVal2 & 0xC0)!=0x80)
\r
86 return CFileTextLines::ASCII;
\r
89 if ((*pVal2 & 0xF8)==0xF0)
\r
92 if ((*pVal2 & 0xC0)!=0x80)
\r
93 return CFileTextLines::ASCII;
\r
95 if ((*pVal2 & 0xC0)!=0x80)
\r
96 return CFileTextLines::ASCII;
\r
98 if ((*pVal2 & 0xC0)!=0x80)
\r
99 return CFileTextLines::ASCII;
\r
105 return CFileTextLines::UTF8;
\r
106 if ((!bNonANSI)&&(DWORD(CRegDWORD(_T("Software\\TortoiseMerge\\UseUTF8"), FALSE))))
\r
107 return CFileTextLines::UTF8;
\r
108 return CFileTextLines::ASCII;
\r
112 EOL CFileTextLines::CheckLineEndings(LPVOID pBuffer, int cb)
\r
114 EOL retval = EOL_AUTOLINE;
\r
115 char * buf = (char *)pBuffer;
\r
116 for (int i=0; i<cb; i++)
\r
118 //now search the buffer for line endings
\r
119 if (buf[i] == 0x0a)
\r
128 if (buf[i+2] == 0x0d)
\r
140 else if (buf[i+1] == 0x0d)
\r
149 else if (buf[i] == 0x0d)
\r
158 if (buf[i+2] == 0x0a)
\r
170 else if (buf[i+1] == 0x0a)
\r
183 BOOL CFileTextLines::Load(const CString& sFilePath, int lengthHint /* = 0*/)
\r
185 m_LineEndings = EOL_AUTOLINE;
\r
186 m_UnicodeType = CFileTextLines::AUTOTYPE;
\r
189 if(lengthHint != 0)
\r
191 Reserve(lengthHint);
\r
194 if (PathIsDirectory(sFilePath))
\r
196 m_sErrorString.Format(IDS_ERR_FILE_NOTAFILE, (LPCTSTR)sFilePath);
\r
200 if (!PathFileExists(sFilePath))
\r
202 //file does not exist, so just return SUCCESS
\r
206 HANDLE hFile = CreateFile(sFilePath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, NULL, NULL);
\r
207 if (hFile == INVALID_HANDLE_VALUE)
\r
213 LARGE_INTEGER fsize;
\r
214 if (!GetFileSizeEx(hFile, &fsize))
\r
217 CloseHandle(hFile);
\r
220 if (fsize.HighPart)
\r
222 // file is way too big for us
\r
223 CloseHandle(hFile);
\r
224 m_sErrorString.LoadString(IDS_ERR_FILE_TOOBIG);
\r
228 LPVOID pFileBuf = new BYTE[fsize.LowPart];
\r
229 DWORD dwReadBytes = 0;
\r
230 if (!ReadFile(hFile, pFileBuf, fsize.LowPart, &dwReadBytes, NULL))
\r
233 CloseHandle(hFile);
\r
236 if (m_UnicodeType == CFileTextLines::AUTOTYPE)
\r
238 m_UnicodeType = this->CheckUnicodeType(pFileBuf, dwReadBytes);
\r
240 if (m_LineEndings == EOL_AUTOLINE)
\r
242 m_LineEndings = CheckLineEndings(pFileBuf, min(10000, dwReadBytes));
\r
244 CloseHandle(hFile);
\r
246 if (m_UnicodeType == CFileTextLines::BINARY)
\r
248 m_sErrorString.Format(IDS_ERR_FILE_BINARY, (LPCTSTR)sFilePath);
\r
249 delete [] pFileBuf;
\r
253 // we may have to convert the file content
\r
254 if ((m_UnicodeType == UTF8)||(m_UnicodeType == UTF8BOM))
\r
256 int ret = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)pFileBuf, dwReadBytes, NULL, 0);
\r
257 wchar_t * pWideBuf = new wchar_t[ret];
\r
258 int ret2 = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)pFileBuf, dwReadBytes, pWideBuf, ret);
\r
261 delete [] pFileBuf;
\r
262 pFileBuf = pWideBuf;
\r
263 dwReadBytes = ret2;
\r
266 else if (m_UnicodeType == ASCII)
\r
268 int ret = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, (LPCSTR)pFileBuf, dwReadBytes, NULL, 0);
\r
269 wchar_t * pWideBuf = new wchar_t[ret];
\r
270 int ret2 = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, (LPCSTR)pFileBuf, dwReadBytes, pWideBuf, ret);
\r
273 delete [] pFileBuf;
\r
274 pFileBuf = pWideBuf;
\r
275 dwReadBytes = ret2;
\r
278 // fill in the lines into the array
\r
279 wchar_t * pTextBuf = (wchar_t *)pFileBuf;
\r
280 wchar_t * pLineStart = (wchar_t *)pFileBuf;
\r
281 if (m_UnicodeType == UNICODE_LE)
\r
283 // UTF16 have two bytes per char
\r
286 if ((m_UnicodeType == UTF8BOM)||(m_UnicodeType == UNICODE_LE))
\r
294 for (DWORD i = 0; i<dwReadBytes; ++i)
\r
296 if (*pTextBuf == '\r')
\r
298 if ((i + 1) < dwReadBytes)
\r
300 if (*(pTextBuf+1) == '\n')
\r
302 // crlf line ending
\r
303 CString line(pLineStart, pTextBuf-pLineStart);
\r
304 Add(line, EOL_CRLF);
\r
305 pLineStart = pTextBuf+2;
\r
312 CString line(pLineStart, pTextBuf-pLineStart);
\r
314 pLineStart =pTextBuf+1;
\r
318 else if (*pTextBuf == '\n')
\r
321 CString line(pLineStart, pTextBuf-pLineStart);
\r
323 pLineStart =pTextBuf+1;
\r
327 if (pLineStart < pTextBuf)
\r
329 CString line(pLineStart, pTextBuf-pLineStart);
\r
330 Add(line, EOL_NOENDING);
\r
331 m_bReturnAtEnd = false;
\r
334 m_bReturnAtEnd = true;
\r
336 delete [] pFileBuf;
\r
342 void CFileTextLines::StripWhiteSpace(CString& sLine,DWORD dwIgnoreWhitespaces, bool blame)
\r
346 if (sLine.GetLength() > 66)
\r
347 sLine = sLine.Mid(66);
\r
349 switch (dwIgnoreWhitespaces)
\r
352 // Compare whitespaces
\r
356 // Ignore all whitespaces
\r
357 sLine.TrimLeft(_T(" \t"));
\r
358 sLine.TrimRight(_T(" \t"));
\r
361 // Ignore leading whitespace
\r
362 sLine.TrimLeft(_T(" \t"));
\r
365 // Ignore ending whitespace
\r
366 sLine.TrimRight(_T(" \t"));
\r
371 void CFileTextLines::StripAsciiWhiteSpace(CStringA& sLine,DWORD dwIgnoreWhitespaces, bool blame)
\r
375 if (sLine.GetLength() > 66)
\r
376 sLine = sLine.Mid(66);
\r
378 switch (dwIgnoreWhitespaces)
\r
380 case 0: // Compare whitespaces
\r
384 // Ignore all whitespaces
\r
385 StripAsciiWhiteSpace(sLine);
\r
388 // Ignore leading whitespace
\r
389 sLine.TrimLeft(" \t");
\r
392 // Ignore leading whitespace
\r
393 sLine.TrimRight(" \t");
\r
399 // Fast in-place removal of spaces and tabs from CStringA line
\r
401 void CFileTextLines::StripAsciiWhiteSpace(CStringA& sLine)
\r
404 char* pWriteChr = sLine.GetBuffer(sLine.GetLength());
\r
405 const char* pReadChr = pWriteChr;
\r
408 if(*pReadChr != ' ' && *pReadChr != '\t')
\r
410 *pWriteChr++ = *pReadChr;
\r
416 sLine.ReleaseBuffer(outputLen);
\r
419 BOOL CFileTextLines::Save(const CString& sFilePath, bool bSaveAsUTF8, DWORD dwIgnoreWhitespaces /*=0*/, BOOL bIgnoreCase /*= FALSE*/, bool bBlame /*= false*/)
\r
423 CString destPath = sFilePath;
\r
424 // now make sure that the destination directory exists
\r
426 while (destPath.Find('\\', ind)>=2)
\r
428 if (!PathIsDirectory(destPath.Left(destPath.Find('\\', ind))))
\r
430 if (!CreateDirectory(destPath.Left(destPath.Find('\\', ind)), NULL))
\r
433 ind = destPath.Find('\\', ind)+1;
\r
436 CStdioFile file; // Hugely faster than CFile for big file writes - because it uses buffering
\r
437 if (!file.Open(sFilePath, CFile::modeCreate | CFile::modeWrite | CFile::typeBinary))
\r
439 m_sErrorString.Format(IDS_ERR_FILE_OPEN, (LPCTSTR)sFilePath);
\r
442 if ((!bSaveAsUTF8)&&(m_UnicodeType == CFileTextLines::UNICODE_LE))
\r
444 //first write the BOM
\r
445 UINT16 wBOM = 0xFEFF;
\r
446 file.Write(&wBOM, 2);
\r
447 for (int i=0; i<GetCount(); i++)
\r
449 CString sLine = GetAt(i);
\r
450 EOL ending = GetLineEnding(i);
\r
451 StripWhiteSpace(sLine,dwIgnoreWhitespaces, bBlame);
\r
453 sLine = sLine.MakeLower();
\r
454 file.Write((LPCTSTR)sLine, sLine.GetLength()*sizeof(TCHAR));
\r
455 if ((ending == EOL_AUTOLINE)||(ending == EOL_NOENDING))
\r
456 ending = m_LineEndings;
\r
460 sLine = _T("\x0d");
\r
464 sLine = _T("\x0d\x0a");
\r
467 sLine = _T("\x0a");
\r
470 sLine = _T("\x0a\x0d");
\r
473 if ((m_bReturnAtEnd)||(i != GetCount()-1))
\r
474 file.Write((LPCTSTR)sLine, sLine.GetLength()*sizeof(TCHAR));
\r
477 else if ((!bSaveAsUTF8)&&((m_UnicodeType == CFileTextLines::ASCII)||(m_UnicodeType == CFileTextLines::AUTOTYPE)))
\r
479 for (int i=0; i< GetCount(); i++)
\r
481 // Copy CString to 8 bit without conversion
\r
482 CString sLineT = GetAt(i);
\r
483 CStringA sLine = CStringA(sLineT);
\r
484 EOL ending = GetLineEnding(i);
\r
486 StripAsciiWhiteSpace(sLine,dwIgnoreWhitespaces, bBlame);
\r
488 sLine = sLine.MakeLower();
\r
489 if ((m_bReturnAtEnd)||(i != GetCount()-1))
\r
491 if ((ending == EOL_AUTOLINE)||(ending == EOL_NOENDING))
\r
492 ending = m_LineEndings;
\r
500 sLine.Append("\x0d\x0a", 2);
\r
506 sLine.Append("\x0a\x0d", 2);
\r
510 file.Write((LPCSTR)sLine, sLine.GetLength());
\r
513 else if ((bSaveAsUTF8)||((m_UnicodeType == CFileTextLines::UTF8BOM)||(m_UnicodeType == CFileTextLines::UTF8)))
\r
515 if (m_UnicodeType == CFileTextLines::UTF8BOM)
\r
517 //first write the BOM
\r
518 UINT16 wBOM = 0xBBEF;
\r
519 file.Write(&wBOM, 2);
\r
521 file.Write(&uBOM, 1);
\r
523 for (int i=0; i<GetCount(); i++)
\r
525 CStringA sLine = CUnicodeUtils::GetUTF8(GetAt(i));
\r
526 EOL ending = GetLineEnding(i);
\r
527 StripAsciiWhiteSpace(sLine,dwIgnoreWhitespaces, bBlame);
\r
529 sLine = sLine.MakeLower();
\r
531 if ((m_bReturnAtEnd)||(i != GetCount()-1))
\r
533 if ((ending == EOL_AUTOLINE)||(ending == EOL_NOENDING))
\r
534 ending = m_LineEndings;
\r
542 sLine.Append("\x0d\x0a",2);
\r
548 sLine.Append("\x0a\x0d",2);
\r
552 file.Write((LPCSTR)sLine, sLine.GetLength());
\r
557 catch (CException * e)
\r
559 e->GetErrorMessage(m_sErrorString.GetBuffer(4096), 4096);
\r
560 m_sErrorString.ReleaseBuffer();
\r
567 void CFileTextLines::SetErrorString()
\r
570 FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |
\r
571 FORMAT_MESSAGE_FROM_SYSTEM |
\r
572 FORMAT_MESSAGE_IGNORE_INSERTS,
\r
575 MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language
\r
576 (LPTSTR) &lpMsgBuf,
\r
580 m_sErrorString = (LPCTSTR)lpMsgBuf;
\r
581 LocalFree( lpMsgBuf );
\r
584 void CFileTextLines::CopySettings(CFileTextLines * pFileToCopySettingsTo)
\r
586 if (pFileToCopySettingsTo)
\r
588 pFileToCopySettingsTo->m_UnicodeType = m_UnicodeType;
\r
589 pFileToCopySettingsTo->m_LineEndings = m_LineEndings;
\r