OSDN Git Service

first commit
[slackware/slackbuild.git] / stardict-tools / stardict-tools-3.0.1 / src / cedict.cpp
1 #include "stdio.h"
2 #include "stdlib.h"
3 #include <string.h>
4 #include <sys/stat.h>
5
6
7 #include <gtk/gtk.h>
8 #include <glib.h>
9
10 struct _worditem
11 {
12         gchar *word;
13         gchar *pinyin;
14         gchar *definition;
15 };
16
17 gint stardict_strcmp(const gchar *s1, const gchar *s2)
18 {
19         gint a;
20         a = g_ascii_strcasecmp(s1, s2);
21         if (a == 0)
22                 return strcmp(s1, s2);
23         else
24                 return a;
25 }
26
27 gint comparefunc(gconstpointer a,gconstpointer b)
28 {
29         gint x;
30         x = stardict_strcmp(((struct _worditem *)a)->word,((struct _worditem *)b)->word);
31         if (x == 0)
32                 return ((struct _worditem *)a)->definition - ((struct _worditem *)b)->definition;
33         else
34                 return x;
35 }
36
37 void to_pinyin(gchar *str)
38 {
39         gchar *p1 = str;
40         gchar *p2 = str;
41         while (*p1) {
42                 if (*p1 == 'u' && *(p1+1) == ':') {
43                         *p2 = 'v';
44                         p2++;
45                         p1+=2;
46                         continue;
47                 } else {
48                         *p2 = *p1;
49                         p2++;
50                         p1++;
51                         continue;
52                 }
53         }
54         *p2 = '\0';
55 }
56
57 void to_definition(gchar *str)
58 {
59         while (*str) {
60                 if (*str=='/')
61                         *str='\n';
62                 str++;
63         }
64 }
65
66 void convert(char *filename)
67 {                       
68         struct stat stats;
69         if (stat (filename, &stats) == -1)
70         {
71                 printf("file not exist!\n");
72                 return;
73         }
74         gchar *basefilename = g_path_get_basename(filename);
75         FILE *tabfile;
76         tabfile = fopen(filename,"r");
77
78         gchar *buffer = (gchar *)g_malloc (stats.st_size + 1);
79         fread (buffer, 1, stats.st_size, tabfile);
80         fclose (tabfile);
81         buffer[stats.st_size] = '\0';   
82         
83         GArray *array = g_array_sized_new(FALSE,FALSE, sizeof(struct _worditem),20000);
84                 
85         gchar *p, *p1, *p2, *p3;
86         p = buffer;
87         if ((guchar)*p==0xEF && (guchar)*(p+1)==0xBB && (guchar)*(p+2)==0xBF) // UTF-8 order characters.
88                 p+=3;
89         struct _worditem worditem;
90         glong linenum=1;
91         while (1) {
92                 if (*p == '\0') {
93                         g_print("over\n");
94                         break;
95                 }
96                 p1 = strchr(p,'\n');
97                 if (!p1) {
98                         g_print("error, no end line\n");
99                         return;
100                 }
101                 *p1 = '\0';
102                 p1++;
103                 p2 = strchr(p,'[');
104                 if (!p2) {
105                         g_print("error, no [, %ld\n", linenum);
106                         return;
107                 }
108                 *p2 = '\0';
109                 p2++;
110                 p3 = strchr(p2, ']');
111                 if (!p3) {
112                         g_print("error, no ], %ld\n", linenum);
113                         return;
114                 }
115                 *p3 = '\0';
116                 p3++;
117                 worditem.word = p;
118                 to_pinyin(p2);
119                 worditem.pinyin = p2;
120                 to_definition(p3);
121                 worditem.definition = p3;
122                 g_strstrip(worditem.word);
123                 g_strstrip(worditem.pinyin);
124                 g_strstrip(worditem.definition);
125                 if (!worditem.word[0]) {
126                         g_print("%s-%ld, bad word!!!\n", basefilename, linenum);
127                         p= p1;
128                         linenum++;
129                         continue;
130                 }
131                 if (!worditem.pinyin[0]) {
132                         g_print("%s-%ld, bad pinyin!!!\n", basefilename, linenum);
133                 }
134                 if (!worditem.definition[0]) {
135                         g_print("%s-%ld, bad definition!!!\n", basefilename, linenum);
136                 }
137                 if (!worditem.pinyin[0] && !worditem.definition[0]) {
138                         g_print("%s-%ld, bad pinyin and definition!!!\n", basefilename, linenum);
139                         p= p1;
140                         linenum++;
141                         continue;
142                 }
143                 g_array_append_val(array, worditem);                    
144                 p= p1;                          
145                 linenum++;
146         }               
147         g_array_sort(array,comparefunc);
148                 
149         gchar idxfilename[256];
150         gchar dicfilename[256];
151         sprintf(idxfilename, "%s.idx", basefilename);
152         sprintf(dicfilename, "%s.dict", basefilename);
153         FILE *idxfile = fopen(idxfilename,"w");
154         FILE *dicfile = fopen(dicfilename,"w");
155
156         guint32 offset_old;
157         guint32 tmpglong;
158         struct _worditem *pworditem;
159         gint pinyin_len;
160         gint definition_len;
161         gulong i;
162         for (i=0; i< array->len; i++) {
163                 offset_old = ftell(dicfile);
164                 pworditem = &g_array_index(array, struct _worditem, i);
165                 pinyin_len = strlen(pworditem->pinyin);
166                 fwrite(pworditem->pinyin, 1 , pinyin_len+1,dicfile);
167                 definition_len = strlen(pworditem->definition);
168                 fwrite(pworditem->definition, 1 ,definition_len,dicfile);
169                 fwrite(pworditem->word,sizeof(gchar),strlen(pworditem->word)+1,idxfile);
170                 tmpglong = g_htonl(offset_old);
171                 fwrite(&(tmpglong),sizeof(guint32),1,idxfile);
172                 tmpglong = g_htonl(pinyin_len+1+ definition_len);
173                 fwrite(&(tmpglong),sizeof(guint32),1,idxfile);
174         }
175         fclose(idxfile);
176         fclose(dicfile);
177         g_print("%s wordcount: %d\n", basefilename, array->len);
178
179         g_free(buffer);
180         g_array_free(array,TRUE);
181         
182         gchar command[256];
183         sprintf(command, "dictzip %s.dict", basefilename);
184         system(command);
185
186         g_free(basefilename);
187 }
188
189 int main(int argc,char * argv [])
190 {
191         if (argc<2) {
192                 printf("please type this:\n./cedict cedict.gb.utf8\n");
193                 return FALSE;
194         }
195
196         gtk_set_locale ();
197         g_type_init ();
198         for (int i=1; i< argc; i++)
199                 convert (argv[i]);
200         return FALSE;   
201 }
202