OSDN Git Service

add contrib dir
[tortoisegit/TortoiseGitJp.git] / contrib / Utils / MakeUTF8.c
diff --git a/contrib/Utils/MakeUTF8.c b/contrib/Utils/MakeUTF8.c
new file mode 100644 (file)
index 0000000..46879ca
--- /dev/null
@@ -0,0 +1,317 @@
+/******************************************************************************\r
+    MakeUTF8.c\r
+\r
+Copyright (C) 2002 - 2006 Simon Large\r
+\r
+This program is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU General Public License\r
+as published by the Free Software Foundation; either version 2\r
+of the License, or (at your option) any later version.\r
+\r
+This program is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\r
+GNU General Public License for more details.\r
+\r
+You should have received a copy of the GNU General Public License\r
+along with this program; if not, write to the Free Software Foundation,\r
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\r
+\r
+Description:\r
+This program checks text files for the presence of a byte-order-mark (BOM)\r
+and for a UTF-8 encoding indicator in the XML version tag. You can also\r
+opt to add either or both of these features.\r
+\r
+Use:\r
+MakeUTF8 [ -b ] [ -x ] file [ file ... ]\r
+Wildcard filenames are supported. Subdirectory recursion is not at present.\r
+-b option adds/corrects BOM in file if not already present.\r
+-x option adds/corrects XML tag if not already present.\r
+With no options, the current stateis reported but nothing is changed.\r
+\r
+Example:\r
+MakeUTF8 -b *.xml tsvn_dug\*.xml\r
+Fixes BOMs (but not XML tags) in all .xml files in the current directory,\r
+and in the tsvn_dug subdirectory.\r
+\r
+This program has only been built using the Microsoft Visual C++ compiler.\r
+Library calls for finding files (_findfirst64) will probably need to be\r
+changed in other environments.\r
+\r
+No special compiler options were used. CL MakeUTF8.c works OK.\r
+******************************************************************************/\r
+\r
+#include <stdio.h>\r
+#include <string.h>\r
+#include <stdlib.h>\r
+#include <io.h>\r
+\r
+// Status flags returned from the file processor.\r
+#define ADD_BOM     1       // BOM is missing\r
+#define DOUBLE_BOM  2       // Double BOM found\r
+#define XML_TAG     4       // XML tag missing, or UTF-8 not included\r
+#define FIXED_BOM   64      // BOM has been added or fixed\r
+#define FIXED_TAG   128     // XML tag has been added or fixed\r
+\r
+char *help =\r
+"MakeUTF8     Version 1.1\n"\r
+"Add UTF-8 byte-order-mark and XML-tag to start of text file.\n\n"\r
+"Use: MakeUTF8 [ -b ] [ -x ] file [ file ... ]\n"\r
+"     -b option adds/corrects BOM in file if not already present\n"\r
+"     -x option adds/corrects XML tag if not already present\n" \r
+"     With no options, just report current state\n\n";\r
+\r
+int ProcessFile(const char *FName, const char *TName, int Action);\r
+\r
+main(int argc, char *argv[])\r
+{\r
+    int n, Action = 0, Result = 0;\r
+    char Path[_MAX_PATH], Temp[_MAX_PATH];\r
+    char *FName;\r
+    struct __finddata64_t FileInfo;\r
+    intptr_t hFile;\r
+\r
+    if (argc < 2)\r
+    {\r
+        fprintf(stderr, "%s", help);\r
+        exit(0);\r
+    }\r
+\r
+    for (n = 1; n < argc; n++)\r
+    {\r
+        if (stricmp(argv[n], "-b") == 0)\r
+        {\r
+            Action |= ADD_BOM | DOUBLE_BOM;\r
+            continue;\r
+        }\r
+        if (stricmp(argv[n], "-x") == 0)\r
+        {\r
+            Action |= XML_TAG;\r
+            continue;\r
+        }\r
+        // Unscramble wildcard filenames\r
+        if ((hFile = _findfirst64(argv[n], &FileInfo)) != -1)\r
+        {\r
+            printf("BOM\tXML-tag\tFile\n");\r
+            printf("--------------------\n");\r
+            // Extract path from original argument.\r
+            strcpy(Path, argv[n]);\r
+            // Set FName to point to filename portion of path\r
+            FName = strrchr(Path, '\\');\r
+            if (FName == NULL) FName = strrchr(Path, '/');\r
+            if (FName == NULL) FName = strrchr(Path, ':');\r
+            if (FName == NULL) FName = Path;\r
+            else ++FName;\r
+            \r
+            // Process all matching files.\r
+            do\r
+            {\r
+                if (!(FileInfo.attrib & _A_SUBDIR))\r
+                {\r
+                    // Append filename to path\r
+                    char *p;\r
+                    strcpy(FName, FileInfo.name);\r
+                    // Create temp filename by replacing extension with $$$\r
+                    strcpy(Temp, Path);\r
+                    p = strrchr(Temp, '.');\r
+                    if (p != NULL) *p = '\0';       // Trim off extension\r
+                    strcat(Temp, ".$$$");\r
+                    Result = ProcessFile(Path, Temp, Action);\r
+                    if (Result < 0) break;          // Failed.\r
+                    // Show results of analysis / repair\r
+                    if (Result & ADD_BOM)\r
+                    {\r
+                        if (Result & FIXED_BOM)\r
+                            p = "Added";\r
+                        else\r
+                            p = "None";\r
+                    }\r
+                    else if (Result & DOUBLE_BOM)\r
+                    {\r
+                        if (Result & FIXED_BOM)\r
+                            p = "Fixed";\r
+                        else\r
+                            p = "Multi";\r
+                    }\r
+                    else\r
+                        p = "OK";\r
+                    printf("%s\t", p);\r
+                    if (Result & XML_TAG)\r
+                    {\r
+                        if (Result & FIXED_TAG)\r
+                            p = "Fixed";\r
+                        else\r
+                            p = "None";\r
+                    }\r
+                    else\r
+                        p = "OK";\r
+                    printf("%s\t%s\n", p, FileInfo.name);\r
+                }\r
+            }\r
+            while (_findnext64(hFile, &FileInfo) == 0);\r
+            _findclose(hFile);\r
+        }\r
+    }\r
+    exit((Result < 0) ? 1 : 0);\r
+}\r
+\r
+// These 3 bytes are the BOM we want\r
+char BOMbuf[3] = { 0xef, 0xbb, 0xbf };\r
+\r
+// This is the XML tag we want\r
+char *UTFtag = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";\r
+\r
+// Read this amount at start of file to check for BOM and tag\r
+#define BUFSIZE 2048\r
+\r
+int ProcessFile(const char *FName, const char *TName, int Action)\r
+{\r
+    FILE *fp, *fpout;\r
+    char Buffer[BUFSIZE + 1024];\r
+    int Len;\r
+    size_t NumRead;\r
+    int Changed = 0, Checked = 0;\r
+    int UTFtaglen;\r
+    char *TagStart, *TagStop;\r
+    char *AfterBOM = Buffer;\r
+    \r
+    if ((fp = fopen(FName, "r")) == NULL)\r
+        return -1;\r
+        \r
+    // Check if output file exists already\r
+    if ((fpout = fopen(TName, "r")) != NULL) {\r
+        fprintf(stderr, "%s:\tTemp file already exists\n", TName);\r
+        fclose(fpout);\r
+        fclose(fp);\r
+        return -1;\r
+    }\r
+    \r
+    while ((NumRead = fread(Buffer, 1, BUFSIZE, fp)) > 0)\r
+    {\r
+        if (!Checked)\r
+        {\r
+            Checked = 1;\r
+            // Check for no BOM or multiple BOM.\r
+            if (memcmp(BOMbuf, Buffer, 3) == 0)\r
+            {\r
+                // BOM already exists.\r
+                AfterBOM = Buffer + 3;\r
+                while (memcmp(BOMbuf, AfterBOM, 3) == 0)\r
+                {\r
+                    // Multiple BOM found.\r
+                    Changed |= DOUBLE_BOM;\r
+                    if (Action & DOUBLE_BOM)\r
+                    {\r
+                        // Delete BOM from source\r
+                        NumRead -= 3;\r
+                        memmove(Buffer, AfterBOM, NumRead);\r
+                        Buffer[NumRead] = '\0';\r
+                    }\r
+                    else\r
+                        break;\r
+                }\r
+            }\r
+            else\r
+            {\r
+                // No BOM found.\r
+                Changed |= ADD_BOM;\r
+                if (Action & ADD_BOM)\r
+                {\r
+                    // Add BOM to source\r
+                    AfterBOM = Buffer + 3;\r
+                    memmove(AfterBOM, Buffer, NumRead);\r
+                    memcpy(Buffer, BOMbuf, 3);\r
+                    NumRead += 3;\r
+                }\r
+            }\r
+            \r
+            // Check for XML tag <?xml version="1.0" encoding="UTF-8"?>\r
+            Buffer[NumRead] = '\0';     // Add null terminator for string search.\r
+            UTFtaglen = strlen(UTFtag);\r
+            if (strstr(Buffer, "encoding=\"UTF-8\"") == NULL)\r
+            {\r
+                // No XML tag found.\r
+                Changed |= XML_TAG;\r
+                if (Action & XML_TAG)\r
+                {\r
+                    TagStart = strstr(Buffer, "<?xml version");\r
+                    if (TagStart != NULL)\r
+                    {\r
+                        TagStop = strstr(TagStart, "?>");\r
+                        if (TagStop != NULL)\r
+                        {\r
+                            // Version tag present without UTF-8\r
+                            Len = UTFtaglen - (TagStop - TagStart + 2);\r
+                            if (Len != 0)\r
+                            {\r
+                                // Expand/contract the space\r
+                                memmove(TagStop + Len, TagStop, NumRead - (TagStop - Buffer));\r
+                                NumRead += Len;\r
+                            }\r
+                            memcpy(TagStart, UTFtag, UTFtaglen);\r
+                        }\r
+                        else\r
+                        {\r
+                            // Version tag is not terminated. Cannot fix.\r
+                            Action &= ~XML_TAG;\r
+                        }\r
+                    }\r
+                    else\r
+                    {\r
+                        // No version tag found. Add one after BOM, with newline.\r
+                        memmove(AfterBOM + UTFtaglen + 1, AfterBOM, NumRead);\r
+                        memcpy(AfterBOM, UTFtag, UTFtaglen);\r
+                        AfterBOM[UTFtaglen] = '\n';\r
+                        NumRead += UTFtaglen + 1;\r
+                    }\r
+                }\r
+            }\r
+\r
+            if (!(Action & Changed))\r
+            {\r
+                // If no problems marked for fixing, leave it here.\r
+                break;\r
+            }\r
+            // Changes made - open a temp file for the BOM'ed version\r
+            if ((fpout = fopen(TName, "w")) == NULL)\r
+            {\r
+                fprintf(stderr, "Cannot open temp file\n");\r
+                fclose(fp);\r
+                return -1;\r
+            }\r
+        }\r
+        if (fwrite(Buffer, 1, NumRead, fpout) != NumRead)\r
+        {\r
+            fprintf(stderr, "Error writing to temp file\n");\r
+            fclose(fpout);\r
+            fclose(fp);\r
+            return -1;\r
+        }\r
+    }\r
+\r
+    fclose(fp);\r
+\r
+    // If changes have been made, replace original file with temp file.\r
+    if (Changed & Action)\r
+    {\r
+        // Replace original with temp file\r
+        fclose(fpout);\r
+        if (remove(FName) != 0)\r
+        {\r
+            fprintf(stderr, "Cannot delete original file\n");\r
+            return -1;\r
+        }\r
+        if (rename(TName, FName) != 0)\r
+        {\r
+            fprintf(stderr, "Cannot replace original file with fixed version\n");\r
+            return -1;\r
+        }\r
+        // Add flags to indicate what we have actually fixed\r
+        if (Changed & Action & (DOUBLE_BOM | ADD_BOM))\r
+            Changed |= FIXED_BOM;\r
+        if (Changed & Action & XML_TAG)\r
+            Changed |= FIXED_TAG;\r
+    }\r
+\r
+    return Changed;\r
+}\r