OSDN Git Service

fixed tests and updated test.py
[nvdajp/nvdajpmiscdep.git] / jptools / mecabRunner.py
1 # mecabRunner.py \r
2 # -*- coding: utf-8 -*-\r
3 # Japanese text processor test module\r
4 # by Takuya Nishimoto\r
5 \r
6 from __future__ import unicode_literals\r
7 import os\r
8 import sys\r
9 from glob import glob\r
10 jt_dir = os.path.normpath(\r
11         os.path.join(os.getcwdu(), '..', 'source', 'synthDrivers', 'jtalk')\r
12         )\r
13 sys.path.append(jt_dir)\r
14 from mecab import *\r
15 from mecabHarness import tasks\r
16 import jtalkDir\r
17 \r
18 dic = os.path.join(jt_dir, 'dic')\r
19 user_dics_org = jtalkDir.user_dics_org\r
20 user_dics = jtalkDir.user_dics\r
21 \r
22 def __print(s):\r
23         print s.encode('utf-8', 'ignore')\r
24 \r
25 buffer = ''\r
26 \r
27 def __print_dummy(s):\r
28         global buffer\r
29         buffer += s + '\n'\r
30 \r
31 def Mecab_get_reading(mf, CODE_=CODE):\r
32         reading = ''\r
33         braille = ''\r
34         for pos in xrange(0, mf.size):\r
35                 ar = Mecab_getFeature(mf, pos, CODE_=CODE_).split(',')\r
36                 rd = ''\r
37                 if len(ar) > 9:\r
38                         rd = ar[9].replace('\u3000', ' ')\r
39                 elif ar[0] != 'ー':\r
40                         rd = ar[0]\r
41                 reading += rd\r
42                 if len(ar) > 12:\r
43                         braille += ar[12] + r"/"\r
44                 else:\r
45                         braille += rd + r"/"\r
46         return (reading, braille.rstrip(r" /"))\r
47 \r
48 def get_reading(msg):\r
49         s = text2mecab(msg)\r
50         mf = MecabFeatures()\r
51         Mecab_analysis(s, mf)\r
52         Mecab_correctFeatures(mf)\r
53         Mecab_print(mf, __print_dummy)\r
54         reading = Mecab_get_reading(mf)\r
55         mf = None\r
56         return reading\r
57 \r
58 def runTasks(enableUserDic=False):\r
59         print jt_dir, dic, user_dics_org, user_dics\r
60         if enableUserDic:\r
61                 Mecab_initialize(__print, jt_dir, dic, user_dics)\r
62         else:\r
63                 Mecab_initialize(__print, jt_dir, dic)\r
64         count = 0\r
65         for i in tasks:\r
66                 if isinstance(i, dict):\r
67                         if 'braille' in i:\r
68                                 if 'speech' in i:\r
69                                         item = [ i['text'], i['speech'], i['braille'] ]\r
70                                 else:\r
71                                         s = i['braille'].replace(' ', '').replace('/', '')\r
72                                         item = [ i['text'], s, i['braille'] ]\r
73                         elif 'input' in i:\r
74                                 if 'speech' in i:\r
75                                         item = [ i['text'], i['speech'], i['input'] ]\r
76                                 else:\r
77                                         s = i['input'].replace(' ', '').replace('/', '')\r
78                                         item = [ i['text'], s, i['input'] ]\r
79                         else:\r
80                                 item = [ i['text'], i['speech'] ]\r
81                 else:\r
82                         item = i\r
83                 buffer = ''\r
84                 result = get_reading(item[0])\r
85                 if item[1] is not None and result[0] != item[1]:\r
86                         __print('')\r
87                         __print('')\r
88                         __print(buffer)\r
89                         __print('input:    ' + item[0])\r
90                         __print('reading expected: ' + item[1])\r
91                         __print('reading result:   ' + result[0])\r
92                         count += 1\r
93                 if len(item) > 2 and result[1] != item[2]:\r
94                         __print('')\r
95                         __print('')\r
96                         __print(buffer)\r
97                         __print('input:            ' + item[0])\r
98                         __print('braille expected: ' + item[2])\r
99                         __print('braille result:   ' + result[1])\r
100                         count += 1\r
101 \r
102         return count\r
103 \r
104 if __name__ == '__main__':\r
105         runTasks(enableUserDic=True)\r