4 #include "recognize_stroke.h"
5 #include "recognize_kanji.h"
6 #include "recognize_extra.h"
9 void print_stroke(GList *s)
11 g_print("stroke len = %d\n", g_list_length(s));
12 for(;s; s = g_list_next(s))
14 gint16 x = ((GdkPoint *)s->data)->x;
15 gint16 y = ((GdkPoint *)s->data)->y;
16 g_printf("%d %d\n", x, y);
20 static void print_gunichar2(gunichar2 *entry, gint len)
23 for(i = 0; i < len; i++)
25 gchar *tmp = (gchar*)entry;
28 gchar *utfstr = g_convert((gchar*)(entry++), 2, "UTF-8", "UTF-16LE", NULL, NULL, NULL);
29 g_printf("%s", utfstr);
36 typedef struct _kanji_result
42 gint LevenshteinDistance(gint l1, gchar *s1, gint l2, gchar *s2)
44 gchar *d = calloc((l1+1)*(l2+1)+1, sizeof(gchar));
47 for(i = 0; i <= l1; i++)
49 for(j = 0; j <= l2; j++)
52 for(j = 1; j <= l2; j++)
54 for(i = 1; i <= l1; i++)
56 if(s1[i-1] == s2[j-1])
57 d[i*(l2 + 1) + j] = d[(i-1)*(l2 + 1) + j - 1];
60 gint a = d[(i-1)*(l2 + 1) + j] + 1;
61 gint b = d[i*(l2 + 1) + j - 1] + 1;
62 gint c = d[(i-1)*(l2 + 1) + j - 1] + 1;
63 d[i*(l2 + 1) + j] = (a > b) ? ((b > c) ? c : b) : ((a > c) ? c : a);
67 gint ret = d[(l1+1)*(l2+1) - 1];
72 static gchar* merge_strokes(gchar **sdata, gint *l1)
76 for(i = 0; i < g_strv_length(sdata); i++)
77 *l1 += strlen(sdata[i]);
78 gchar *s1 = calloc(*l1, sizeof(gchar));
79 for(i = 0, j = 0; i < g_strv_length(sdata); i++)
82 g_memmove(&(s1[j]), sdata[i], l);
88 static gchar* unichar_to_char(gunichar2 *entry, gint *l2)
91 gunichar2 *ptr = entry;
92 for(*l2 = 0; g_ascii_isalpha((gchar)*++ptr); (*l2)++);
93 gchar *s2 = calloc(*l2, sizeof(gchar));
95 for(i = 0; i < *l2; s2[i++] = (gchar)(*++entry));
99 static kanji_result* rate_next_kanji(GList *strokes, gchar **sdata, gunichar2 *entry)
101 kanji_result *res = calloc(1, sizeof(kanji_result));
102 gint n1 = g_strv_length(sdata);
103 gint n2 = *entry - 'A' + 1;
106 gchar *s1 = merge_strokes(sdata, &l1);
107 gchar *s2 = unichar_to_char(entry, &l2);
108 res->dist += LevenshteinDistance(l1, s1, l2, s2);
111 entry += strlen(s2)*sizeof(gunichar2);
116 res->dist += pass_extra_filters(strokes, entry);
121 static gint kanji_results_compare(gpointer *ptr1, gpointer *ptr2)
123 kanji_result *kr1 = (kanji_result*)*ptr1, *kr2 = (kanji_result*)*ptr2;
124 if(kr1->dist > kr2->dist)
126 if(kr1->dist < kr2->dist)
131 static gunichar2* find_next_entry(gchar *allkanji, gunichar2 *entry, gint allkanjilen, gunichar2 key1, gunichar2 key2)
133 if(allkanji == (gchar*)entry)
137 return find_next_entry(allkanji, entry, allkanjilen, key1, key2);
141 if((gchar*)entry - allkanji >= allkanjilen)
143 while((gchar*)entry - allkanji < allkanjilen)
159 static gunichar2* pick_kanji(GList *strokes, gchar **sdata, gchar *allkanji, gint allkanjilen)
161 const gint MAX_COUNT = 25;
162 gint datalen = g_strv_length(sdata), i;
163 gint delta = 1 + datalen/8;
164 gunichar2 key = 'A' + datalen - 1;
165 gunichar2 key1 = key - delta, key2 = key + delta;
166 if(key1 < 'A') key1 = 'A';
167 gunichar2 *entry = (gunichar2*)allkanji;
171 entry = find_next_entry(allkanji, entry, allkanjilen, key1, key2);
174 GPtrArray *arr = g_ptr_array_new();
175 g_ptr_array_set_free_func(arr, g_free);
178 kanji_result *res = rate_next_kanji(strokes, sdata, entry);
179 g_ptr_array_add(arr, res);
180 entry = find_next_entry(allkanji, entry, allkanjilen, key1, key2);
184 g_ptr_array_sort(arr, (GCompareFunc)kanji_results_compare);
185 if(arr->len > MAX_COUNT)
187 kanji_result *res = g_ptr_array_index(arr, MAX_COUNT-1);
188 gint max_dist = res->dist;
189 for(i = arr->len - 1; i >= MAX_COUNT; i--)
191 kanji_result *res = g_ptr_array_index(arr, i);
192 if(res->dist > max_dist)
193 g_ptr_array_remove_index(arr, i);
198 gunichar2 *ret = calloc(arr->len + 1, sizeof(gunichar2));
199 for(i = 0; i < arr->len; i++)
201 kanji_result *res = (kanji_result*)g_ptr_array_index(arr, i);
204 g_ptr_array_free(arr, TRUE);
208 static GMappedFile* load_strokes_txt(gchar *dir)
210 gchar *fname = g_build_filename(dir, "strokes.txt", NULL);
211 GMappedFile *file = g_mapped_file_new(fname, FALSE, NULL);
216 gunichar2* recognize_kanji(GList *strokes)
218 static gchar **sdata = NULL;
219 static gint sdata_len = 0;
220 static GList *tmp = NULL;
221 gint strokes_len = g_list_length(strokes);
225 for(i = 0; i < sdata_len; g_free(sdata[i++]));
233 if(strokes_len == sdata_len - 1)
235 g_free(sdata[sdata_len - 1]);
236 tmp = g_list_remove(tmp, g_list_last(tmp)->data);
238 sdata = g_realloc(sdata, (strokes_len + 1)*sizeof(gchar*));
239 if(strokes_len == sdata_len + 1)
242 sdata[strokes_len - 1] = recognize_stroke(g_list_first(g_list_last(strokes)->data), &s);
243 tmp = g_list_append(tmp, s);
245 sdata[strokes_len] = 0;
246 sdata_len = strokes_len;
247 GMappedFile *file = load_strokes_txt("");
251 file = load_strokes_txt(KP123_DATADIR);
256 g_printf("failed to load strokes.txt\n");
259 gint allkanjilen = g_mapped_file_get_length(file);
260 gchar *allkanji = g_mapped_file_get_contents(file);
261 gunichar2 *result = pick_kanji(tmp, sdata, allkanji, allkanjilen);
262 g_mapped_file_unref(file);