OSDN Git Service

modified: src/recognize_kanji.c
authorevgeny <evgeny@users.sourceforge.jp>
Mon, 23 May 2011 16:06:32 +0000 (20:06 +0400)
committerevgeny <evgeny@users.sourceforge.jp>
Mon, 23 May 2011 16:06:32 +0000 (20:06 +0400)
src/Makefile.am
src/Makefile.in
src/recognize_kanji.c
src/recognize_stroke.c
src/recognize_stroke.h

index 4a9a038..364b594 100644 (file)
@@ -4,7 +4,8 @@ bin_PROGRAMS = kp123
 kp123_SOURCES =        kp123.c \
                padarea.c \
                recognize_stroke.c \
-               recognize_kanji.c
+               recognize_kanji.c \
+               recognize_extra.c
 
 AM_CFLAGS = @GTK_CFLAGS@ -Wall -funsigned-char
 
index d131dc9..117f346 100644 (file)
@@ -51,7 +51,8 @@ CONFIG_CLEAN_VPATH_FILES =
 am__installdirs = "$(DESTDIR)$(bindir)"
 PROGRAMS = $(bin_PROGRAMS)
 am_kp123_OBJECTS = kp123.$(OBJEXT) padarea.$(OBJEXT) \
-       recognize_stroke.$(OBJEXT) recognize_kanji.$(OBJEXT)
+       recognize_stroke.$(OBJEXT) recognize_kanji.$(OBJEXT) \
+       recognize_extra.$(OBJEXT)
 kp123_OBJECTS = $(am_kp123_OBJECTS)
 kp123_DEPENDENCIES =
 DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
@@ -191,7 +192,8 @@ top_srcdir = @top_srcdir@
 kp123_SOURCES = kp123.c \
                padarea.c \
                recognize_stroke.c \
-               recognize_kanji.c
+               recognize_kanji.c \
+               recognize_extra.c
 
 AM_CFLAGS = @GTK_CFLAGS@ -Wall -funsigned-char
 kp123_LDADD = @GTK_LIBS@
@@ -284,6 +286,7 @@ distclean-compile:
 
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/kp123.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/padarea.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/recognize_extra.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/recognize_kanji.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/recognize_stroke.Po@am__quote@
 
index 754cc4e..5bde148 100644 (file)
@@ -3,11 +3,12 @@
 
 #include "recognize_stroke.h"
 #include "recognize_kanji.h"
+#include "recognize_extra.h"
 
 #if 0
-static void print_stroke(GList *s)
+void print_stroke(GList *s)
 {
-    g_printf("stroke len = %d\n", g_list_length(s));
+    g_print("stroke len = %d\n", g_list_length(s));
     for(;s; s = g_list_next(s))
     {
        gint16 x = ((GdkPoint *)s->data)->x;
@@ -15,6 +16,21 @@ static void print_stroke(GList *s)
        g_printf("%d %d\n", x, y);
     }
 }
+
+static void print_gunichar2(gunichar2 *entry, gint len)
+{
+    gint i;
+    for(i = 0; i < len; i++)
+    {
+       gchar *tmp = (gchar*)entry;
+       if(tmp[0] == 0xD)
+           break;
+       gchar *utfstr = g_convert((gchar*)(entry++), 2, "UTF-8", "UTF-16LE", NULL, NULL, NULL);
+       g_printf("%s", utfstr);
+       g_free(utfstr);
+    }
+    g_printf("\n");
+}
 #endif
 
 typedef struct _kanji_result
@@ -53,7 +69,7 @@ gint LevenshteinDistance(gint l1, gchar *s1, gint l2, gchar *s2)
     return ret;
 }
 
-static kanji_result* rate_next_kanji(gchar **sdata, gunichar2 *entry)
+static kanji_result* rate_next_kanji(GList *strokes, gchar **sdata, gunichar2 *entry)
 {
     kanji_result *res = calloc(1, sizeof(kanji_result));
     res->uc = *++entry;
@@ -75,6 +91,11 @@ static kanji_result* rate_next_kanji(gchar **sdata, gunichar2 *entry)
     res->dist += LevenshteinDistance(l1, s1, l2, s2);
     g_free(s1);
     g_free(s2);
+    entry++;
+    if(*entry == '|')
+    {
+       res->dist += pass_extra_filters(strokes, entry);
+    }
     return res;
 }
 
@@ -90,10 +111,16 @@ static gint kanji_results_compare(gpointer *ptr1, gpointer *ptr2)
 
 static gunichar2* find_next_entry(gchar *allkanji, gunichar2 *entry, gint allkanjilen, gunichar2 key)
 {
-    if(allkanji == (gchar*)entry && key == 'A')
+    if(allkanji == (gchar*)entry)
+    {
        ++entry;
+       if(*entry != key)
+           return find_next_entry(allkanji, entry, allkanjilen, key);
+    }
     else
     {
+       if((gchar*)entry - allkanji >= allkanjilen)
+           return 0;
        while((gchar*)entry - allkanji < allkanjilen)
        {
            if(*++entry == '\n')
@@ -110,9 +137,9 @@ static gunichar2* find_next_entry(gchar *allkanji, gunichar2 *entry, gint allkan
     return entry;
 }
 
-static gunichar2* pick_kanji(gchar **sdata, gchar *allkanji, gint allkanjilen)
+static gunichar2* pick_kanji(GList *strokes, gchar **sdata, gchar *allkanji, gint allkanjilen)
 {
-    const gint MAX_DISTANCE = 3;
+    const gint MAX_DISTANCE = 5;
     gint datalen = g_strv_length(sdata), i;
     gunichar2 key = 'A' + datalen - 1;
     gunichar2 *entry = (gunichar2*)allkanji;
@@ -126,7 +153,7 @@ static gunichar2* pick_kanji(gchar **sdata, gchar *allkanji, gint allkanjilen)
     g_ptr_array_set_free_func(arr, g_free);
     for(;;)
     {
-       kanji_result *res = rate_next_kanji(sdata, entry);
+       kanji_result *res = rate_next_kanji(strokes, sdata, entry);
        g_ptr_array_add(arr, res);
        g_ptr_array_sort(arr, (GCompareFunc)kanji_results_compare);
        for(i = arr->len - 1; i >= 0; i--)
@@ -155,6 +182,7 @@ gunichar2* recognize_kanji(GList *strokes)
 {
     static gchar **sdata = NULL;
     static gint sdata_len = 0;
+    static GList *tmp = NULL;
     gint strokes_len = g_list_length(strokes);
     if(!strokes_len)
     {
@@ -163,13 +191,22 @@ gunichar2* recognize_kanji(GList *strokes)
        g_free(sdata);
        sdata = NULL;
        sdata_len = 0;
+       g_list_free(tmp);
+       tmp = NULL;
        return 0;
     }
     if(strokes_len == sdata_len - 1)
+    {
        g_free(sdata[sdata_len - 1]);
+       tmp = g_list_remove(tmp, g_list_last(tmp)->data);
+    }
     sdata = g_realloc(sdata, (strokes_len + 1)*sizeof(gchar*));
     if(strokes_len == sdata_len + 1)
-       sdata[strokes_len - 1] = recognize_stroke(g_list_first(g_list_last(strokes)->data));
+    {
+       GList *s = NULL;
+       sdata[strokes_len - 1] = recognize_stroke(g_list_first(g_list_last(strokes)->data), &s);
+       tmp = g_list_append(tmp, s);
+    }
     sdata[strokes_len] = 0;
     sdata_len = strokes_len;
 #ifdef KP_LIBDIR
@@ -188,7 +225,7 @@ gunichar2* recognize_kanji(GList *strokes)
     }
     gint allkanjilen = g_mapped_file_get_length(file);
     gchar *allkanji = g_mapped_file_get_contents(file);
-    gunichar2 *result = pick_kanji(sdata, allkanji, allkanjilen);
+    gunichar2 *result = pick_kanji(tmp, sdata, allkanji, allkanjilen);
     g_mapped_file_unref(file);
     return result;
 }
index 756370e..fd932df 100644 (file)
@@ -142,7 +142,7 @@ static float find_average_and_smallest_dist(GList *s, GList **pt0, GList **pt1,
 /* let A, B be consecutive points
  * if dist(A,B) less than a certain percentage of average dist
  * then remove B */
-static int filter_points_by_dist(GList *s)
+static GList* filter_points_by_dist(GList *s)
 {
     const float MAX_MIN_DIST = 0.3;
     float d0, dmin;
@@ -150,7 +150,7 @@ static int filter_points_by_dist(GList *s)
     while(1)
     {
        if(g_list_length(s) < 3)
-           return 0;
+           return g_list_first(s);
        d0 = MAX_MIN_DIST*find_average_and_smallest_dist(s, &pt0, &pt1, &dmin);
        if(dmin < d0)
        {
@@ -177,7 +177,7 @@ static int filter_points_by_dist(GList *s)
        else
            break;
     }
-    return 0;
+    return s;
 }
 
 gchar *get_stroke_str(GList *s)
@@ -203,12 +203,12 @@ gchar *get_stroke_str(GList *s)
     return ret;
 }
 
-gchar* recognize_stroke(GList *stroke)
+gchar* recognize_stroke(GList *stroke, GList **out)
 {
-    GList *tmp = g_list_copy(stroke);
-    tmp = filter_equal_points(tmp);
-    tmp = filter_points_by_angle(tmp);
-    filter_points_by_dist(tmp);
-    return get_stroke_str(tmp);
+    *out = g_list_copy(stroke);
+    *out = filter_equal_points(*out);
+    *out = filter_points_by_angle(*out);
+    *out = filter_points_by_dist(*out);
+    return get_stroke_str(*out);
 }
 
index 6e29650..0916936 100644 (file)
@@ -2,6 +2,6 @@
 #ifndef __KP_RECOGNIZE_STROKE__
 #define __KP_RECOGNIZE_STROKE__
 
-gchar* recognize_stroke(GList *stroke);
+gchar* recognize_stroke(GList *stroke, GList **out);
 
 #endif