--- /dev/null
+/******************************************************************************\r
+ MakeUTF8.c\r
+\r
+Copyright (C) 2002 - 2006 Simon Large\r
+\r
+This program is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU General Public License\r
+as published by the Free Software Foundation; either version 2\r
+of the License, or (at your option) any later version.\r
+\r
+This program is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
+GNU General Public License for more details.\r
+\r
+You should have received a copy of the GNU General Public License\r
+along with this program; if not, write to the Free Software Foundation,\r
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\r
+\r
+Description:\r
+This program checks text files for the presence of a byte-order-mark (BOM)\r
+and for a UTF-8 encoding indicator in the XML version tag. You can also\r
+opt to add either or both of these features.\r
+\r
+Use:\r
+MakeUTF8 [ -b ] [ -x ] file [ file ... ]\r
+Wildcard filenames are supported. Subdirectory recursion is not at present.\r
+-b option adds/corrects BOM in file if not already present.\r
+-x option adds/corrects XML tag if not already present.\r
+With no options, the current stateis reported but nothing is changed.\r
+\r
+Example:\r
+MakeUTF8 -b *.xml tsvn_dug\*.xml\r
+Fixes BOMs (but not XML tags) in all .xml files in the current directory,\r
+and in the tsvn_dug subdirectory.\r
+\r
+This program has only been built using the Microsoft Visual C++ compiler.\r
+Library calls for finding files (_findfirst64) will probably need to be\r
+changed in other environments.\r
+\r
+No special compiler options were used. CL MakeUTF8.c works OK.\r
+******************************************************************************/\r
+\r
+#include <stdio.h>\r
+#include <string.h>\r
+#include <stdlib.h>\r
+#include <io.h>\r
+\r
+// Status flags returned from the file processor.\r
+#define ADD_BOM 1 // BOM is missing\r
+#define DOUBLE_BOM 2 // Double BOM found\r
+#define XML_TAG 4 // XML tag missing, or UTF-8 not included\r
+#define FIXED_BOM 64 // BOM has been added or fixed\r
+#define FIXED_TAG 128 // XML tag has been added or fixed\r
+\r
+char *help =\r
+"MakeUTF8 Version 1.1\n"\r
+"Add UTF-8 byte-order-mark and XML-tag to start of text file.\n\n"\r
+"Use: MakeUTF8 [ -b ] [ -x ] file [ file ... ]\n"\r
+" -b option adds/corrects BOM in file if not already present\n"\r
+" -x option adds/corrects XML tag if not already present\n" \r
+" With no options, just report current state\n\n";\r
+\r
+int ProcessFile(const char *FName, const char *TName, int Action);\r
+\r
+main(int argc, char *argv[])\r
+{\r
+ int n, Action = 0, Result = 0;\r
+ char Path[_MAX_PATH], Temp[_MAX_PATH];\r
+ char *FName;\r
+ struct __finddata64_t FileInfo;\r
+ intptr_t hFile;\r
+\r
+ if (argc < 2)\r
+ {\r
+ fprintf(stderr, "%s", help);\r
+ exit(0);\r
+ }\r
+\r
+ for (n = 1; n < argc; n++)\r
+ {\r
+ if (stricmp(argv[n], "-b") == 0)\r
+ {\r
+ Action |= ADD_BOM | DOUBLE_BOM;\r
+ continue;\r
+ }\r
+ if (stricmp(argv[n], "-x") == 0)\r
+ {\r
+ Action |= XML_TAG;\r
+ continue;\r
+ }\r
+ // Unscramble wildcard filenames\r
+ if ((hFile = _findfirst64(argv[n], &FileInfo)) != -1)\r
+ {\r
+ printf("BOM\tXML-tag\tFile\n");\r
+ printf("--------------------\n");\r
+ // Extract path from original argument.\r
+ strcpy(Path, argv[n]);\r
+ // Set FName to point to filename portion of path\r
+ FName = strrchr(Path, '\\');\r
+ if (FName == NULL) FName = strrchr(Path, '/');\r
+ if (FName == NULL) FName = strrchr(Path, ':');\r
+ if (FName == NULL) FName = Path;\r
+ else ++FName;\r
+ \r
+ // Process all matching files.\r
+ do\r
+ {\r
+ if (!(FileInfo.attrib & _A_SUBDIR))\r
+ {\r
+ // Append filename to path\r
+ char *p;\r
+ strcpy(FName, FileInfo.name);\r
+ // Create temp filename by replacing extension with $$$\r
+ strcpy(Temp, Path);\r
+ p = strrchr(Temp, '.');\r
+ if (p != NULL) *p = '\0'; // Trim off extension\r
+ strcat(Temp, ".$$$");\r
+ Result = ProcessFile(Path, Temp, Action);\r
+ if (Result < 0) break; // Failed.\r
+ // Show results of analysis / repair\r
+ if (Result & ADD_BOM)\r
+ {\r
+ if (Result & FIXED_BOM)\r
+ p = "Added";\r
+ else\r
+ p = "None";\r
+ }\r
+ else if (Result & DOUBLE_BOM)\r
+ {\r
+ if (Result & FIXED_BOM)\r
+ p = "Fixed";\r
+ else\r
+ p = "Multi";\r
+ }\r
+ else\r
+ p = "OK";\r
+ printf("%s\t", p);\r
+ if (Result & XML_TAG)\r
+ {\r
+ if (Result & FIXED_TAG)\r
+ p = "Fixed";\r
+ else\r
+ p = "None";\r
+ }\r
+ else\r
+ p = "OK";\r
+ printf("%s\t%s\n", p, FileInfo.name);\r
+ }\r
+ }\r
+ while (_findnext64(hFile, &FileInfo) == 0);\r
+ _findclose(hFile);\r
+ }\r
+ }\r
+ exit((Result < 0) ? 1 : 0);\r
+}\r
+\r
+// These 3 bytes are the BOM we want\r
+char BOMbuf[3] = { 0xef, 0xbb, 0xbf };\r
+\r
+// This is the XML tag we want\r
+char *UTFtag = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";\r
+\r
+// Read this amount at start of file to check for BOM and tag\r
+#define BUFSIZE 2048\r
+\r
+int ProcessFile(const char *FName, const char *TName, int Action)\r
+{\r
+ FILE *fp, *fpout;\r
+ char Buffer[BUFSIZE + 1024];\r
+ int Len;\r
+ size_t NumRead;\r
+ int Changed = 0, Checked = 0;\r
+ int UTFtaglen;\r
+ char *TagStart, *TagStop;\r
+ char *AfterBOM = Buffer;\r
+ \r
+ if ((fp = fopen(FName, "r")) == NULL)\r
+ return -1;\r
+ \r
+ // Check if output file exists already\r
+ if ((fpout = fopen(TName, "r")) != NULL) {\r
+ fprintf(stderr, "%s:\tTemp file already exists\n", TName);\r
+ fclose(fpout);\r
+ fclose(fp);\r
+ return -1;\r
+ }\r
+ \r
+ while ((NumRead = fread(Buffer, 1, BUFSIZE, fp)) > 0)\r
+ {\r
+ if (!Checked)\r
+ {\r
+ Checked = 1;\r
+ // Check for no BOM or multiple BOM.\r
+ if (memcmp(BOMbuf, Buffer, 3) == 0)\r
+ {\r
+ // BOM already exists.\r
+ AfterBOM = Buffer + 3;\r
+ while (memcmp(BOMbuf, AfterBOM, 3) == 0)\r
+ {\r
+ // Multiple BOM found.\r
+ Changed |= DOUBLE_BOM;\r
+ if (Action & DOUBLE_BOM)\r
+ {\r
+ // Delete BOM from source\r
+ NumRead -= 3;\r
+ memmove(Buffer, AfterBOM, NumRead);\r
+ Buffer[NumRead] = '\0';\r
+ }\r
+ else\r
+ break;\r
+ }\r
+ }\r
+ else\r
+ {\r
+ // No BOM found.\r
+ Changed |= ADD_BOM;\r
+ if (Action & ADD_BOM)\r
+ {\r
+ // Add BOM to source\r
+ AfterBOM = Buffer + 3;\r
+ memmove(AfterBOM, Buffer, NumRead);\r
+ memcpy(Buffer, BOMbuf, 3);\r
+ NumRead += 3;\r
+ }\r
+ }\r
+ \r
+ // Check for XML tag <?xml version="1.0" encoding="UTF-8"?>\r
+ Buffer[NumRead] = '\0'; // Add null terminator for string search.\r
+ UTFtaglen = strlen(UTFtag);\r
+ if (strstr(Buffer, "encoding=\"UTF-8\"") == NULL)\r
+ {\r
+ // No XML tag found.\r
+ Changed |= XML_TAG;\r
+ if (Action & XML_TAG)\r
+ {\r
+ TagStart = strstr(Buffer, "<?xml version");\r
+ if (TagStart != NULL)\r
+ {\r
+ TagStop = strstr(TagStart, "?>");\r
+ if (TagStop != NULL)\r
+ {\r
+ // Version tag present without UTF-8\r
+ Len = UTFtaglen - (TagStop - TagStart + 2);\r
+ if (Len != 0)\r
+ {\r
+ // Expand/contract the space\r
+ memmove(TagStop + Len, TagStop, NumRead - (TagStop - Buffer));\r
+ NumRead += Len;\r
+ }\r
+ memcpy(TagStart, UTFtag, UTFtaglen);\r
+ }\r
+ else\r
+ {\r
+ // Version tag is not terminated. Cannot fix.\r
+ Action &= ~XML_TAG;\r
+ }\r
+ }\r
+ else\r
+ {\r
+ // No version tag found. Add one after BOM, with newline.\r
+ memmove(AfterBOM + UTFtaglen + 1, AfterBOM, NumRead);\r
+ memcpy(AfterBOM, UTFtag, UTFtaglen);\r
+ AfterBOM[UTFtaglen] = '\n';\r
+ NumRead += UTFtaglen + 1;\r
+ }\r
+ }\r
+ }\r
+\r
+ if (!(Action & Changed))\r
+ {\r
+ // If no problems marked for fixing, leave it here.\r
+ break;\r
+ }\r
+ // Changes made - open a temp file for the BOM'ed version\r
+ if ((fpout = fopen(TName, "w")) == NULL)\r
+ {\r
+ fprintf(stderr, "Cannot open temp file\n");\r
+ fclose(fp);\r
+ return -1;\r
+ }\r
+ }\r
+ if (fwrite(Buffer, 1, NumRead, fpout) != NumRead)\r
+ {\r
+ fprintf(stderr, "Error writing to temp file\n");\r
+ fclose(fpout);\r
+ fclose(fp);\r
+ return -1;\r
+ }\r
+ }\r
+\r
+ fclose(fp);\r
+\r
+ // If changes have been made, replace original file with temp file.\r
+ if (Changed & Action)\r
+ {\r
+ // Replace original with temp file\r
+ fclose(fpout);\r
+ if (remove(FName) != 0)\r
+ {\r
+ fprintf(stderr, "Cannot delete original file\n");\r
+ return -1;\r
+ }\r
+ if (rename(TName, FName) != 0)\r
+ {\r
+ fprintf(stderr, "Cannot replace original file with fixed version\n");\r
+ return -1;\r
+ }\r
+ // Add flags to indicate what we have actually fixed\r
+ if (Changed & Action & (DOUBLE_BOM | ADD_BOM))\r
+ Changed |= FIXED_BOM;\r
+ if (Changed & Action & XML_TAG)\r
+ Changed |= FIXED_TAG;\r
+ }\r
+\r
+ return Changed;\r
+}\r