1 /******************************************************************************
\r
4 Copyright (C) 2002 - 2006 Simon Large
\r
6 This program is free software; you can redistribute it and/or
\r
7 modify it under the terms of the GNU General Public License
\r
8 as published by the Free Software Foundation; either version 2
\r
9 of the License, or (at your option) any later version.
\r
11 This program is distributed in the hope that it will be useful,
\r
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
14 GNU General Public License for more details.
\r
16 You should have received a copy of the GNU General Public License
\r
17 along with this program; if not, write to the Free Software Foundation,
\r
18 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\r
21 This program checks text files for the presence of a byte-order-mark (BOM)
\r
22 and for a UTF-8 encoding indicator in the XML version tag. You can also
\r
23 opt to add either or both of these features.
\r
26 MakeUTF8 [ -b ] [ -x ] file [ file ... ]
\r
27 Wildcard filenames are supported. Subdirectory recursion is not at present.
\r
28 -b option adds/corrects BOM in file if not already present.
\r
29 -x option adds/corrects XML tag if not already present.
\r
30 With no options, the current stateis reported but nothing is changed.
\r
33 MakeUTF8 -b *.xml tsvn_dug\*.xml
\r
34 Fixes BOMs (but not XML tags) in all .xml files in the current directory,
\r
35 and in the tsvn_dug subdirectory.
\r
37 This program has only been built using the Microsoft Visual C++ compiler.
\r
38 Library calls for finding files (_findfirst64) will probably need to be
\r
39 changed in other environments.
\r
41 No special compiler options were used. CL MakeUTF8.c works OK.
\r
42 ******************************************************************************/
\r
49 // Status flags returned from the file processor.
\r
50 #define ADD_BOM 1 // BOM is missing
\r
51 #define DOUBLE_BOM 2 // Double BOM found
\r
52 #define XML_TAG 4 // XML tag missing, or UTF-8 not included
\r
53 #define FIXED_BOM 64 // BOM has been added or fixed
\r
54 #define FIXED_TAG 128 // XML tag has been added or fixed
\r
57 "MakeUTF8 Version 1.1\n"
\r
58 "Add UTF-8 byte-order-mark and XML-tag to start of text file.\n\n"
\r
59 "Use: MakeUTF8 [ -b ] [ -x ] file [ file ... ]\n"
\r
60 " -b option adds/corrects BOM in file if not already present\n"
\r
61 " -x option adds/corrects XML tag if not already present\n"
\r
62 " With no options, just report current state\n\n";
\r
64 int ProcessFile(const char *FName, const char *TName, int Action);
\r
66 main(int argc, char *argv[])
\r
68 int n, Action = 0, Result = 0;
\r
69 char Path[_MAX_PATH], Temp[_MAX_PATH];
\r
71 struct __finddata64_t FileInfo;
\r
76 fprintf(stderr, "%s", help);
\r
80 for (n = 1; n < argc; n++)
\r
82 if (stricmp(argv[n], "-b") == 0)
\r
84 Action |= ADD_BOM | DOUBLE_BOM;
\r
87 if (stricmp(argv[n], "-x") == 0)
\r
92 // Unscramble wildcard filenames
\r
93 if ((hFile = _findfirst64(argv[n], &FileInfo)) != -1)
\r
95 printf("BOM\tXML-tag\tFile\n");
\r
96 printf("--------------------\n");
\r
97 // Extract path from original argument.
\r
98 strcpy(Path, argv[n]);
\r
99 // Set FName to point to filename portion of path
\r
100 FName = strrchr(Path, '\\');
\r
101 if (FName == NULL) FName = strrchr(Path, '/');
\r
102 if (FName == NULL) FName = strrchr(Path, ':');
\r
103 if (FName == NULL) FName = Path;
\r
106 // Process all matching files.
\r
109 if (!(FileInfo.attrib & _A_SUBDIR))
\r
111 // Append filename to path
\r
113 strcpy(FName, FileInfo.name);
\r
114 // Create temp filename by replacing extension with $$$
\r
115 strcpy(Temp, Path);
\r
116 p = strrchr(Temp, '.');
\r
117 if (p != NULL) *p = '\0'; // Trim off extension
\r
118 strcat(Temp, ".$$$");
\r
119 Result = ProcessFile(Path, Temp, Action);
\r
120 if (Result < 0) break; // Failed.
\r
121 // Show results of analysis / repair
\r
122 if (Result & ADD_BOM)
\r
124 if (Result & FIXED_BOM)
\r
129 else if (Result & DOUBLE_BOM)
\r
131 if (Result & FIXED_BOM)
\r
139 if (Result & XML_TAG)
\r
141 if (Result & FIXED_TAG)
\r
148 printf("%s\t%s\n", p, FileInfo.name);
\r
151 while (_findnext64(hFile, &FileInfo) == 0);
\r
155 exit((Result < 0) ? 1 : 0);
\r
158 // These 3 bytes are the BOM we want
\r
159 char BOMbuf[3] = { 0xef, 0xbb, 0xbf };
\r
161 // This is the XML tag we want
\r
162 char *UTFtag = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
\r
164 // Read this amount at start of file to check for BOM and tag
\r
165 #define BUFSIZE 2048
\r
167 int ProcessFile(const char *FName, const char *TName, int Action)
\r
170 char Buffer[BUFSIZE + 1024];
\r
173 int Changed = 0, Checked = 0;
\r
175 char *TagStart, *TagStop;
\r
176 char *AfterBOM = Buffer;
\r
178 if ((fp = fopen(FName, "r")) == NULL)
\r
181 // Check if output file exists already
\r
182 if ((fpout = fopen(TName, "r")) != NULL) {
\r
183 fprintf(stderr, "%s:\tTemp file already exists\n", TName);
\r
189 while ((NumRead = fread(Buffer, 1, BUFSIZE, fp)) > 0)
\r
194 // Check for no BOM or multiple BOM.
\r
195 if (memcmp(BOMbuf, Buffer, 3) == 0)
\r
197 // BOM already exists.
\r
198 AfterBOM = Buffer + 3;
\r
199 while (memcmp(BOMbuf, AfterBOM, 3) == 0)
\r
201 // Multiple BOM found.
\r
202 Changed |= DOUBLE_BOM;
\r
203 if (Action & DOUBLE_BOM)
\r
205 // Delete BOM from source
\r
207 memmove(Buffer, AfterBOM, NumRead);
\r
208 Buffer[NumRead] = '\0';
\r
217 Changed |= ADD_BOM;
\r
218 if (Action & ADD_BOM)
\r
220 // Add BOM to source
\r
221 AfterBOM = Buffer + 3;
\r
222 memmove(AfterBOM, Buffer, NumRead);
\r
223 memcpy(Buffer, BOMbuf, 3);
\r
228 // Check for XML tag <?xml version="1.0" encoding="UTF-8"?>
\r
229 Buffer[NumRead] = '\0'; // Add null terminator for string search.
\r
230 UTFtaglen = strlen(UTFtag);
\r
231 if (strstr(Buffer, "encoding=\"UTF-8\"") == NULL)
\r
233 // No XML tag found.
\r
234 Changed |= XML_TAG;
\r
235 if (Action & XML_TAG)
\r
237 TagStart = strstr(Buffer, "<?xml version");
\r
238 if (TagStart != NULL)
\r
240 TagStop = strstr(TagStart, "?>");
\r
241 if (TagStop != NULL)
\r
243 // Version tag present without UTF-8
\r
244 Len = UTFtaglen - (TagStop - TagStart + 2);
\r
247 // Expand/contract the space
\r
248 memmove(TagStop + Len, TagStop, NumRead - (TagStop - Buffer));
\r
251 memcpy(TagStart, UTFtag, UTFtaglen);
\r
255 // Version tag is not terminated. Cannot fix.
\r
256 Action &= ~XML_TAG;
\r
261 // No version tag found. Add one after BOM, with newline.
\r
262 memmove(AfterBOM + UTFtaglen + 1, AfterBOM, NumRead);
\r
263 memcpy(AfterBOM, UTFtag, UTFtaglen);
\r
264 AfterBOM[UTFtaglen] = '\n';
\r
265 NumRead += UTFtaglen + 1;
\r
270 if (!(Action & Changed))
\r
272 // If no problems marked for fixing, leave it here.
\r
275 // Changes made - open a temp file for the BOM'ed version
\r
276 if ((fpout = fopen(TName, "w")) == NULL)
\r
278 fprintf(stderr, "Cannot open temp file\n");
\r
283 if (fwrite(Buffer, 1, NumRead, fpout) != NumRead)
\r
285 fprintf(stderr, "Error writing to temp file\n");
\r
294 // If changes have been made, replace original file with temp file.
\r
295 if (Changed & Action)
\r
297 // Replace original with temp file
\r
299 if (remove(FName) != 0)
\r
301 fprintf(stderr, "Cannot delete original file\n");
\r
304 if (rename(TName, FName) != 0)
\r
306 fprintf(stderr, "Cannot replace original file with fixed version\n");
\r
309 // Add flags to indicate what we have actually fixed
\r
310 if (Changed & Action & (DOUBLE_BOM | ADD_BOM))
\r
311 Changed |= FIXED_BOM;
\r
312 if (Changed & Action & XML_TAG)
\r
313 Changed |= FIXED_TAG;
\r