4 #include "recognize_stroke.h"
5 #include "recognize_kanji.h"
6 #include "recognize_extra.h"
9 void print_stroke(GList *s)
11 g_print("stroke len = %d\n", g_list_length(s));
12 for(;s; s = g_list_next(s))
14 gint16 x = ((GdkPoint *)s->data)->x;
15 gint16 y = ((GdkPoint *)s->data)->y;
16 g_printf("%d %d\n", x, y);
20 static void print_gunichar2(gunichar2 *entry, gint len)
23 for(i = 0; i < len; i++)
25 gchar *tmp = (gchar*)entry;
28 gchar *utfstr = g_convert((gchar*)(entry++), 2, "UTF-8", "UTF-16LE", NULL, NULL, NULL);
29 g_printf("%s", utfstr);
36 typedef struct _kanji_result
42 gint LevenshteinDistance(gint l1, gchar *s1, gint l2, gchar *s2)
44 gchar *d = calloc((l1+1)*(l2+1)+1, sizeof(gchar));
47 for(i = 0; i <= l1; i++)
49 for(j = 0; j <= l2; j++)
52 for(j = 1; j <= l2; j++)
54 for(i = 1; i <= l1; i++)
56 if(s1[i-1] == s2[j-1])
57 d[i*(l2 + 1) + j] = d[(i-1)*(l2 + 1) + j - 1];
60 gint a = d[(i-1)*(l2 + 1) + j] + 1;
61 gint b = d[i*(l2 + 1) + j - 1] + 1;
62 gint c = d[(i-1)*(l2 + 1) + j - 1] + 1;
63 d[i*(l2 + 1) + j] = (a > b) ? ((b > c) ? c : b) : ((a > c) ? c : a);
67 gint ret = d[(l1+1)*(l2+1) - 1];
72 static kanji_result* rate_next_kanji(GList *strokes, gchar **sdata, gunichar2 *entry)
74 kanji_result *res = calloc(1, sizeof(kanji_result));
76 gunichar2 *bakptr = entry;
77 gint i, j, l, l1 = 0, l2 = 0;
78 for(i = 0; i < g_strv_length(sdata); i++)
79 l1 += strlen(sdata[i]);
80 gchar *s1 = calloc(l1, sizeof(gchar));
81 for(i = 0, j = 0; i < g_strv_length(sdata); i++)
84 g_memmove(&(s1[j]), sdata[i], l);
87 for(l2 = 0; g_ascii_isalpha((gchar)*++entry); l2++);
88 gchar *s2 = calloc(l2, sizeof(gchar));
90 for(i = 0; i < l2; s2[i++] = (gchar)(*++entry));
91 res->dist += LevenshteinDistance(l1, s1, l2, s2);
97 res->dist += pass_extra_filters(strokes, entry);
102 static gint kanji_results_compare(gpointer *ptr1, gpointer *ptr2)
104 kanji_result *kr1 = (kanji_result*)*ptr1, *kr2 = (kanji_result*)*ptr2;
105 if(kr1->dist > kr2->dist)
107 if(kr1->dist < kr2->dist)
112 static gunichar2* find_next_entry(gchar *allkanji, gunichar2 *entry, gint allkanjilen, gunichar2 key)
114 if(allkanji == (gchar*)entry)
118 return find_next_entry(allkanji, entry, allkanjilen, key);
122 if((gchar*)entry - allkanji >= allkanjilen)
124 while((gchar*)entry - allkanji < allkanjilen)
140 static gunichar2* pick_kanji(GList *strokes, gchar **sdata, gchar *allkanji, gint allkanjilen)
142 const gint MAX_DISTANCE = 5;
143 gint datalen = g_strv_length(sdata), i;
144 gunichar2 key = 'A' + datalen - 1;
145 gunichar2 *entry = (gunichar2*)allkanji;
149 entry = find_next_entry(allkanji, entry, allkanjilen, key);
152 GPtrArray *arr = g_ptr_array_new();
153 g_ptr_array_set_free_func(arr, g_free);
156 kanji_result *res = rate_next_kanji(strokes, sdata, entry);
157 g_ptr_array_add(arr, res);
158 g_ptr_array_sort(arr, (GCompareFunc)kanji_results_compare);
159 for(i = arr->len - 1; i >= 0; i--)
161 kanji_result *res = g_ptr_array_index(arr, i);
162 if(res->dist > MAX_DISTANCE)
163 g_ptr_array_remove_index(arr, i);
167 entry = find_next_entry(allkanji, entry, allkanjilen, key);
171 gunichar2 *ret = calloc(arr->len + 1, sizeof(gunichar2));
172 for(i = 0; i < arr->len; i++)
174 kanji_result *res = (kanji_result*)g_ptr_array_index(arr, i);
177 g_ptr_array_free(arr, TRUE);
181 gunichar2* recognize_kanji(GList *strokes)
183 static gchar **sdata = NULL;
184 static gint sdata_len = 0;
185 static GList *tmp = NULL;
186 gint strokes_len = g_list_length(strokes);
190 for(i = 0; i < sdata_len; g_free(sdata[i++]));
198 if(strokes_len == sdata_len - 1)
200 g_free(sdata[sdata_len - 1]);
201 tmp = g_list_remove(tmp, g_list_last(tmp)->data);
203 sdata = g_realloc(sdata, (strokes_len + 1)*sizeof(gchar*));
204 if(strokes_len == sdata_len + 1)
207 sdata[strokes_len - 1] = recognize_stroke(g_list_first(g_list_last(strokes)->data), &s);
208 tmp = g_list_append(tmp, s);
210 sdata[strokes_len] = 0;
211 sdata_len = strokes_len;
213 gchar *dir = g_strdup(KP123_DATADIR);
214 g_printf("loading strokes from %s\n", dir);
218 gchar *fname = g_build_filename(dir, "strokes.txt", NULL);
219 GMappedFile *file = g_mapped_file_new(fname, FALSE, NULL);
224 g_printf("failed to open strokes.txt\n");
227 gint allkanjilen = g_mapped_file_get_length(file);
228 gchar *allkanji = g_mapped_file_get_contents(file);
229 gunichar2 *result = pick_kanji(tmp, sdata, allkanji, allkanjilen);
230 g_mapped_file_unref(file);