return ret;
}
-static kanji_result* rate_next_kanji(GList *strokes, gchar **sdata, gunichar2 *entry)
+static gchar* merge_strokes(gchar **sdata, gint *l1)
{
- kanji_result *res = calloc(1, sizeof(kanji_result));
- res->uc = *++entry;
- gunichar2 *bakptr = entry;
- gint i, j, l, l1 = 0, l2 = 0;
+ gint i, j, l;
+ *l1 = 0;
for(i = 0; i < g_strv_length(sdata); i++)
- l1 += strlen(sdata[i]);
- gchar *s1 = calloc(l1, sizeof(gchar));
+ *l1 += strlen(sdata[i]);
+ gchar *s1 = calloc(*l1, sizeof(gchar));
for(i = 0, j = 0; i < g_strv_length(sdata); i++)
{
l = strlen(sdata[i]);
g_memmove(&(s1[j]), sdata[i], l);
j += l;
}
- for(l2 = 0; g_ascii_isalpha((gchar)*++entry); l2++);
- gchar *s2 = calloc(l2, sizeof(gchar));
- entry = bakptr;
- for(i = 0; i < l2; s2[i++] = (gchar)(*++entry));
+ return s1;
+}
+
+static gchar* unichar_to_char(gunichar2 *entry, gint *l2)
+{
+ *l2 = 0;
+ gunichar2 *ptr = entry;
+ for(*l2 = 0; g_ascii_isalpha((gchar)*++ptr); (*l2)++);
+ gchar *s2 = calloc(*l2, sizeof(gchar));
+ gint i;
+ for(i = 0; i < *l2; s2[i++] = (gchar)(*++entry));
+ return s2;
+}
+
+static kanji_result* rate_next_kanji(GList *strokes, gchar **sdata, gunichar2 *entry)
+{
+ kanji_result *res = calloc(1, sizeof(kanji_result));
+ gint n1 = g_strv_length(sdata);
+ gint n2 = *entry - 'A' + 1;
+ res->uc = *++entry;
+ gint l1, l2;
+ gchar *s1 = merge_strokes(sdata, &l1);
+ gchar *s2 = unichar_to_char(entry, &l2);
res->dist += LevenshteinDistance(l1, s1, l2, s2);
g_free(s1);
g_free(s2);
+ entry += strlen(s2)*sizeof(gunichar2);
entry++;
if(*entry == '|')
{
- res->dist += pass_extra_filters(strokes, entry);
+ if(n1 >= n2)
+ res->dist += pass_extra_filters(strokes, entry);
}
return res;
}
return 0;
}
-static gunichar2* find_next_entry(gchar *allkanji, gunichar2 *entry, gint allkanjilen, gunichar2 key)
+static gunichar2* find_next_entry(gchar *allkanji, gunichar2 *entry, gint allkanjilen, gunichar2 key1, gunichar2 key2)
{
if(allkanji == (gchar*)entry)
{
++entry;
- if(*entry != key)
- return find_next_entry(allkanji, entry, allkanjilen, key);
+ if(*entry != key1)
+ return find_next_entry(allkanji, entry, allkanjilen, key1, key2);
}
else
{
if(*++entry == '\n')
{
entry++;
- if(*entry == key)
- break;
- if(*entry > key)
+ if(*entry > key2)
return 0;
+ if(*entry >= key1)
+ break;
++entry;
}
}
static gunichar2* pick_kanji(GList *strokes, gchar **sdata, gchar *allkanji, gint allkanjilen)
{
- const gint MAX_DISTANCE = 5;
+ const gint MAX_COUNT = 25;
gint datalen = g_strv_length(sdata), i;
+ gint delta = 1 + datalen/8;
gunichar2 key = 'A' + datalen - 1;
+ gunichar2 key1 = key - delta, key2 = key + delta;
+ if(key1 < 'A') key1 = 'A';
gunichar2 *entry = (gunichar2*)allkanji;
if(key > 'Z')
return 0;
- entry = find_next_entry(allkanji, entry, allkanjilen, key);
+ entry = find_next_entry(allkanji, entry, allkanjilen, key1, key2);
if(!entry)
return 0;
GPtrArray *arr = g_ptr_array_new();
{
kanji_result *res = rate_next_kanji(strokes, sdata, entry);
g_ptr_array_add(arr, res);
- g_ptr_array_sort(arr, (GCompareFunc)kanji_results_compare);
- for(i = arr->len - 1; i >= 0; i--)
+ entry = find_next_entry(allkanji, entry, allkanjilen, key1, key2);
+ if(!entry)
+ break;
+ }
+ g_ptr_array_sort(arr, (GCompareFunc)kanji_results_compare);
+ if(arr->len > MAX_COUNT)
+ {
+ kanji_result *res = g_ptr_array_index(arr, MAX_COUNT-1);
+ gint max_dist = res->dist;
+ for(i = arr->len - 1; i >= MAX_COUNT; i--)
{
kanji_result *res = g_ptr_array_index(arr, i);
- if(res->dist > MAX_DISTANCE)
+ if(res->dist > max_dist)
g_ptr_array_remove_index(arr, i);
else
break;
}
- entry = find_next_entry(allkanji, entry, allkanjilen, key);
- if(!entry)
- break;
}
gunichar2 *ret = calloc(arr->len + 1, sizeof(gunichar2));
for(i = 0; i < arr->len; i++)