+++ /dev/null
-# _jtalk_core.py
-# -*- coding: utf-8 -*-
-#A part of NonVisual Desktop Access (NVDA)
-#Copyright (C) 2010-2012 Takuya Nishimoto (NVDA Japanese Team)
-#This file is covered by the GNU General Public License.
-#See the file COPYING for more details.
-
-# Japanese speech engine wrapper for Open JTalk
-# http://ja.nishimotz.com/project:libopenjtalk
-
-import codecs
-import re
-import string
-import os
-import struct
-import sys
-from mecab import *
-
-############################################
-
-# htsengineapi/include/HTS_engine.h
-
-# size of structure:
-# HTS_Global 56
-# HTS_ModelSet 76
-# HTS_Label 24
-# HTS_SStreamSet 24
-# HTS_PStreamSet 12
-# HTS_GStreamSet 20
-
-class HTS_ModelSet(Structure):
- _fields_ = [
- ("_dummy", c_byte * 56),
- ]
-
-class HTS_Label(Structure):
- _fields_ = [
- ("_dummy", c_byte * 76),
- ]
-HTS_Label_ptr = POINTER(HTS_Label)
-
-class HTS_SStreamSet(Structure):
- _fields_ = [
- ("_dummy", c_byte * 24),
- ]
-
-class HTS_PStreamSet(Structure):
- _fields_ = [
- ("_dummy", c_byte * 12),
- ]
-
-class HTS_GStream(Structure):
- _fields_ = [
- ("static_length", c_int), # int static_length; /* static features length */
- ("par", c_double_p_p), # double **par; /* generated parameter */
- ]
-
-HTS_GStream_ptr = POINTER(HTS_GStream)
-
-# FIXME: engine.gss.total_nsample is always 0
-class HTS_GStreamSet(Structure):
- _fields_ = [
- ("total_nsample", c_int), # int total_nsample; /* total sample */
- ("total_frame", c_int), # int total_frame; /* total frame */
- ("nstream", c_int), # int nstream; /* # of streams */
- ("gstream", HTS_GStream_ptr), # HTS_GStream *gstream; /* generated parameter streams */
- ("gspeech", c_short_p), # short *gspeech; /* generated speech */
- ]
-HTS_GStreamSet_ptr = POINTER(HTS_GStreamSet)
-
-class HTS_Global(Structure):
- _fields_ = [
- ("state", c_int), # /* Gamma=-1/stage : if stage=0 then Gamma=0 */
- ("use_log_gain", c_int), # HTS_Boolean (TRUE=1) /* log gain flag (for LSP) */
- ("sampling_rate", c_int), # /* sampling rate */
- ("fperiod", c_int), # /* frame period */
- ("alpha", c_double), # /* all-pass constant */
- ("beta", c_double), # /* postfiltering coefficient */
- ("audio_buff_size", c_int), # /* audio buffer size (for audio device) */
- ("msd_threshold", c_double_p), # /* MSD thresholds */
- ("duration_iw", c_double_p), # /* weights for duration interpolation */
- ("parameter_iw", c_double_p_p), # /* weights for parameter interpolation */
- ("gv_iw", c_double_p_p), # /* weights for GV interpolation */
- ("gv_weight", c_double_p), # /* GV weights */
- ]
-HTS_Global_ptr = POINTER(HTS_Global)
-
-class HTS_Engine(Structure):
- _fields_ = [
- ("global", HTS_Global),
- ("audio", c_void_p), # HTS_Audio (requires nvdajp miscdep 86 or later)
- ("ms", HTS_ModelSet),
- ("label", HTS_Label),
- ("sss", HTS_SStreamSet),
- ("pss", HTS_PStreamSet),
- ("gss", HTS_GStreamSet),
- ("lf0_offset", c_double),
- ("lf0_amp", c_double),
- ]
-HTS_Engine_ptr = POINTER(HTS_Engine)
-
-############################################
-
-class NJD(Structure):
- _fields_ = [
- ("_dummy", c_byte * 8),
- ]
-NJD_ptr = POINTER(NJD)
-
-class JPCommonNode(Structure):
- pass
-JPCommonNode_ptr = POINTER(JPCommonNode)
-JPCommonNode._fields_ = [
- ('pron', c_char_p),
- ('pos', c_char_p),
- ('ctype', c_char_p),
- ('cform', c_char_p),
- ('acc', c_int),
- ('chain_flag', c_int),
- ('prev', JPCommonNode_ptr),
- ('next', JPCommonNode_ptr),
- ]
-
-class JPCommonLabelBreathGroup(Structure):
- pass
-JPCommonLabelBreathGroup_ptr = POINTER(JPCommonLabelBreathGroup)
-
-class JPCommonLabelAccentPhrase(Structure):
- pass
-JPCommonLabelAccentPhrase_ptr = POINTER(JPCommonLabelAccentPhrase)
-
-class JPCommonLabelWord(Structure):
- pass
-JPCommonLabelWord_ptr = POINTER(JPCommonLabelWord)
-
-class JPCommonLabelMora(Structure):
- pass
-JPCommonLabelMora_ptr = POINTER(JPCommonLabelMora)
-
-class JPCommonLabelPhoneme(Structure):
- pass
-JPCommonLabelPhoneme_ptr = POINTER(JPCommonLabelPhoneme)
-
-# jpcommon/jpcommon.h
-class JPCommonLabel(Structure):
- _fields_ = [
- ('size', c_int),
- ('feature', c_char_p_p),
- ('breath_head', JPCommonLabelBreathGroup_ptr),
- ('breath_tail', JPCommonLabelBreathGroup_ptr),
- ('accent_head', JPCommonLabelAccentPhrase_ptr),
- ('accent_tail', JPCommonLabelAccentPhrase_ptr),
- ('word_head', JPCommonLabelWord_ptr),
- ('word_tail', JPCommonLabelWord_ptr),
- ('mora_head', JPCommonLabelMora_ptr),
- ('mora_tail', JPCommonLabelMora_ptr),
- ('phoneme_head', JPCommonLabelPhoneme_ptr),
- ('phoneme_tail', JPCommonLabelPhoneme_ptr),
- ('short_pause_flag', c_int),
- ]
-JPCommonLabel_ptr = POINTER(JPCommonLabel)
-
-class JPCommon(Structure):
- _fields_ = [
- ("head", JPCommonNode_ptr),
- ("tail", JPCommonNode_ptr),
- ("label", JPCommonLabel_ptr),
- ]
-JPCommon_ptr = POINTER(JPCommon)
-
-# for debug
-def JPC_label_print(feature, size, logwrite_):
- if logwrite_ is None: return
- if feature is None or size is None:
- logwrite_( "JPC_label_print size: 0" )
- return
- s2 = "JPC_label_print size: %d\n" % size
- for i in xrange(0, size):
- s = string_at(feature[i])
- if s:
- s2 += "%s\n" % s
- else:
- s2 += "[None]"
- logwrite_(s2)
-
-#############################################
-
-FNLEN = 1000
-FILENAME = c_char * FNLEN
-FILENAME_ptr = POINTER(FILENAME)
-FILENAME_ptr_ptr = POINTER(FILENAME_ptr)
-FILENAME_ptr_x3 = FILENAME_ptr * 3
-FILENAME_ptr_x3_ptr = POINTER(FILENAME_ptr_x3)
-
-libjt = None
-njd = NJD()
-jpcommon = JPCommon()
-engine = HTS_Engine()
-use_lpf = 0
-
-def libjt_version():
- if libjt is None: return "libjt version none"
- return libjt.jt_version()
-
-def libjt_initialize(JT_DLL, **args):
- global libjt, njd, jpcommon, engine, use_lpf
- use_lpf = args['use_lpf']
-
- if libjt is None: libjt = cdll.LoadLibrary(JT_DLL.encode('mbcs'))
- libjt.jt_version.restype = c_char_p
-
- libjt.NJD_initialize.argtypes = [NJD_ptr]
- libjt.NJD_initialize(njd)
-
- libjt.JPCommon_initialize.argtypes = [JPCommon_ptr]
- libjt.JPCommon_initialize(jpcommon)
-
- libjt.HTS_Engine_initialize.argtypes = [HTS_Engine_ptr, c_int]
- if use_lpf:
- libjt.HTS_Engine_initialize(engine, 3)
- else:
- libjt.HTS_Engine_initialize(engine, 2)
-
- libjt.HTS_Engine_set_sampling_rate.argtypes = [HTS_Engine_ptr, c_int]
- libjt.HTS_Engine_set_sampling_rate(engine, args['samp_rate']) # 16000
-
- libjt.HTS_Engine_set_fperiod.argtypes = [HTS_Engine_ptr, c_int]
- libjt.HTS_Engine_set_fperiod(engine, args['fperiod']) # if samping-rate is 16000: 80(point=5ms) frame period
-
- libjt.HTS_Engine_set_alpha.argtypes = [HTS_Engine_ptr, c_double]
- libjt.HTS_Engine_set_alpha(engine, args['alpha']) # 0.42
-
- libjt.HTS_Engine_set_gamma.argtypes = [HTS_Engine_ptr, c_int]
- libjt.HTS_Engine_set_gamma(engine, 0)
-
- libjt.HTS_Engine_set_log_gain.argtypes = [HTS_Engine_ptr, c_int]
- libjt.HTS_Engine_set_log_gain(engine, 0)
-
- libjt.HTS_Engine_set_beta.argtypes = [HTS_Engine_ptr, c_double]
- libjt.HTS_Engine_set_beta(engine, 0.0)
-
- libjt.HTS_Engine_set_audio_buff_size.argtypes = [HTS_Engine_ptr, c_int]
- libjt.HTS_Engine_set_audio_buff_size(engine, 1600)
-
- libjt.HTS_Engine_set_msd_threshold.argtypes = [HTS_Engine_ptr, c_int, c_double]
- libjt.HTS_Engine_set_msd_threshold(engine, 1, 0.5)
-
- libjt.HTS_Engine_set_gv_weight.argtypes = [HTS_Engine_ptr, c_int, c_double]
- libjt.HTS_Engine_set_gv_weight(engine, 0, 1.0)
- libjt.HTS_Engine_set_gv_weight(engine, 1, 0.7)
- if use_lpf:
- libjt.HTS_Engine_set_gv_weight(engine, 2, 1.0)
-
- # for libjt_synthesis()
- libjt.mecab2njd.argtypes = [NJD_ptr, FEATURE_ptr_array_ptr, c_int]
- libjt.njd_set_pronunciation.argtypes = [NJD_ptr]
- libjt.njd_set_digit.argtypes = [NJD_ptr]
- libjt.njd_set_accent_phrase.argtypes = [NJD_ptr]
- libjt.njd_set_accent_type.argtypes = [NJD_ptr]
- libjt.njd_set_unvoiced_vowel.argtypes = [NJD_ptr]
- libjt.njd_set_long_vowel.argtypes = [NJD_ptr]
- libjt.njd2jpcommon.argtypes = [JPCommon_ptr, NJD_ptr]
- libjt.JPCommon_make_label.argtypes = [JPCommon_ptr]
- libjt.JPCommon_get_label_size.argtypes = [JPCommon_ptr]
- libjt.JPCommon_get_label_size.argtypes = [JPCommon_ptr]
- libjt.JPCommon_get_label_feature.argtypes = [JPCommon_ptr]
- libjt.JPCommon_get_label_feature.restype = c_char_p_p
- libjt.JPCommon_get_label_size.argtypes = [JPCommon_ptr]
- libjt.HTS_Engine_load_label_from_string_list.argtypes = [
- HTS_Engine_ptr, c_char_p_p, c_int]
-
- libjt.HTS_Engine_create_sstream.argtypes = [HTS_Engine_ptr]
- libjt.HTS_Engine_create_pstream.argtypes = [HTS_Engine_ptr]
- libjt.HTS_Engine_create_gstream.argtypes = [HTS_Engine_ptr]
- libjt.HTS_Engine_refresh.argtypes = [HTS_Engine_ptr]
- libjt.JPCommon_refresh.argtypes = [JPCommon_ptr]
- libjt.NJD_refresh.argtypes = [NJD_ptr]
- libjt.HTS_GStreamSet_get_total_nsample.argtypes = [HTS_GStreamSet_ptr]
- libjt.HTS_GStreamSet_get_speech.argtypes = [HTS_GStreamSet_ptr, c_int]
- libjt.NJD_print.argtypes = [NJD_ptr]
- libjt.JPCommon_print.argtypes = [JPCommon_ptr]
- libjt.JPCommonLabel_print.argtypes = [JPCommonLabel_ptr]
-
- libjt.jt_total_nsample.argtypes = [HTS_Engine_ptr]
- libjt.jt_speech_ptr.argtypes = [HTS_Engine_ptr]
- libjt.jt_speech_ptr.restype = c_short_p
- libjt.jt_save_logs.argtypes = [c_char_p, HTS_Engine_ptr, NJD_ptr]
- libjt.jt_save_riff.argtypes = [c_char_p, HTS_Engine_ptr]
- libjt.jt_speech_normalize.argtypes = [HTS_Engine_ptr, c_short, c_int]
- libjt.jt_trim_silence.argtypes = [HTS_Engine_ptr, c_short, c_short]
- libjt.jt_trim_silence.restype = c_int
-
- libjt.NJD_clear.argtypes = [NJD_ptr]
- libjt.JPCommon_clear.argtypes = [JPCommon_ptr]
- libjt.HTS_Engine_clear.argtypes = [HTS_Engine_ptr]
-
- libjt.HTS_Engine_set_lf0_offset_amp.argtypes = [HTS_Engine_ptr, c_double, c_double]
-
- # for libjt_jpcommon_make_label()
- libjt.JPCommonLabel_clear.argtypes = [JPCommonLabel_ptr]
- libjt.JPCommonLabel_initialize.argtypes = [JPCommonLabel_ptr]
- libjt.JPCommonNode_get_pron.restype = c_char_p
- libjt.JPCommonNode_get_pos.restype = c_char_p
- libjt.JPCommonNode_get_ctype.restype = c_char_p
- libjt.JPCommonNode_get_cform.restype = c_char_p
- libjt.JPCommonNode_get_acc.restype = c_int
- libjt.JPCommonNode_get_chain_flag.restype = c_int
- libjt.JPCommonLabel_push_word.argtype = [JPCommonLabel_ptr, c_char_p, c_char_p, c_char_p, c_char_p, c_int, c_int]
-
-def libjt_load(VOICE):
- global libjt, engine, use_lpf
- VOICE = VOICE.encode('mbcs')
- libjt.HTS_Engine_load_duration_from_fn.argtypes = [
- HTS_Engine_ptr, FILENAME_ptr_ptr, FILENAME_ptr_ptr, c_int]
-
- fn_ms_dur_buf = create_string_buffer(os.path.join(VOICE, "dur.pdf"))
- fn_ms_dur_buf_ptr = cast(byref(fn_ms_dur_buf), FILENAME_ptr)
- fn_ms_dur = cast(byref(fn_ms_dur_buf_ptr), FILENAME_ptr_ptr)
- fn_ts_dur_buf = create_string_buffer(os.path.join(VOICE, "tree-dur.inf"))
- fn_ts_dur_buf_ptr = cast(byref(fn_ts_dur_buf), FILENAME_ptr)
- fn_ts_dur = cast(byref(fn_ts_dur_buf_ptr), FILENAME_ptr_ptr)
- libjt.HTS_Engine_load_duration_from_fn(engine, fn_ms_dur, fn_ts_dur, 1)
-
- libjt.HTS_Engine_load_parameter_from_fn.argtypes = [
- HTS_Engine_ptr, FILENAME_ptr_ptr, FILENAME_ptr_ptr,
- FILENAME_ptr_x3_ptr, c_int, c_int, c_int, c_int]
-
- fn_ms_mcp_buf = create_string_buffer(os.path.join(VOICE, "mgc.pdf"))
- fn_ms_mcp_buf_ptr = cast(byref(fn_ms_mcp_buf), FILENAME_ptr)
- fn_ms_mcp = cast(byref(fn_ms_mcp_buf_ptr), FILENAME_ptr_ptr)
- fn_ts_mcp_buf = create_string_buffer(os.path.join(VOICE, "tree-mgc.inf"))
- fn_ts_mcp_buf_ptr = cast(byref(fn_ts_mcp_buf), FILENAME_ptr)
- fn_ts_mcp = cast(byref(fn_ts_mcp_buf_ptr), FILENAME_ptr_ptr)
- fn_ws_mcp_buf_1 = create_string_buffer(os.path.join(VOICE, "mgc.win1"))
- fn_ws_mcp_buf_2 = create_string_buffer(os.path.join(VOICE, "mgc.win2"))
- fn_ws_mcp_buf_3 = create_string_buffer(os.path.join(VOICE, "mgc.win3"))
- fn_ws_mcp_buf_ptr_x3 = FILENAME_ptr_x3(
- cast(byref(fn_ws_mcp_buf_1), FILENAME_ptr),
- cast(byref(fn_ws_mcp_buf_2), FILENAME_ptr),
- cast(byref(fn_ws_mcp_buf_3), FILENAME_ptr))
- fn_ws_mcp = cast(byref(fn_ws_mcp_buf_ptr_x3), FILENAME_ptr_x3_ptr)
- libjt.HTS_Engine_load_parameter_from_fn(
- engine, fn_ms_mcp, fn_ts_mcp, fn_ws_mcp,
- 0, 0, 3, 1)
-
- fn_ms_lf0_buf = create_string_buffer(os.path.join(VOICE, "lf0.pdf"))
- fn_ms_lf0_buf_ptr = cast(byref(fn_ms_lf0_buf), FILENAME_ptr)
- fn_ms_lf0 = cast(byref(fn_ms_lf0_buf_ptr), FILENAME_ptr_ptr)
- fn_ts_lf0_buf = create_string_buffer(os.path.join(VOICE, "tree-lf0.inf"))
- fn_ts_lf0_buf_ptr = cast(byref(fn_ts_lf0_buf), FILENAME_ptr)
- fn_ts_lf0 = cast(byref(fn_ts_lf0_buf_ptr), FILENAME_ptr_ptr)
- fn_ws_lf0_buf_1 = create_string_buffer(os.path.join(VOICE, "lf0.win1"))
- fn_ws_lf0_buf_2 = create_string_buffer(os.path.join(VOICE, "lf0.win2"))
- fn_ws_lf0_buf_3 = create_string_buffer(os.path.join(VOICE, "lf0.win3"))
- fn_ws_lf0_buf_ptr_x3 = FILENAME_ptr_x3(
- cast(byref(fn_ws_lf0_buf_1), FILENAME_ptr),
- cast(byref(fn_ws_lf0_buf_2), FILENAME_ptr),
- cast(byref(fn_ws_lf0_buf_3), FILENAME_ptr))
- fn_ws_lf0 = cast(byref(fn_ws_lf0_buf_ptr_x3), FILENAME_ptr_x3_ptr)
- libjt.HTS_Engine_load_parameter_from_fn(
- engine, fn_ms_lf0, fn_ts_lf0, fn_ws_lf0,
- 1, 1, 3, 1)
-
- if use_lpf:
- fn_ms_lpf_buf = create_string_buffer(os.path.join(VOICE, "lpf.pdf"))
- fn_ms_lpf_buf_ptr = cast(byref(fn_ms_lpf_buf), FILENAME_ptr)
- fn_ms_lpf = cast(byref(fn_ms_lpf_buf_ptr), FILENAME_ptr_ptr)
- fn_ts_lpf_buf = create_string_buffer(os.path.join(VOICE, "tree-lpf.inf"))
- fn_ts_lpf_buf_ptr = cast(byref(fn_ts_lpf_buf), FILENAME_ptr)
- fn_ts_lpf = cast(byref(fn_ts_lpf_buf_ptr), FILENAME_ptr_ptr)
- fn_ws_lpf_buf_1 = create_string_buffer(os.path.join(VOICE, "lpf.win1"))
- fn_ws_lpf_buf_ptr_x3 = FILENAME_ptr_x3(
- cast(byref(fn_ws_lpf_buf_1), FILENAME_ptr),
- cast(0, FILENAME_ptr),
- cast(0, FILENAME_ptr))
- fn_ws_lpf = cast(byref(fn_ws_lpf_buf_ptr_x3), FILENAME_ptr_x3_ptr)
- libjt.HTS_Engine_load_parameter_from_fn(engine, fn_ms_lpf, fn_ts_lpf, fn_ws_lpf, 2, 0, 1, 1)
-
- libjt.HTS_Engine_load_gv_from_fn.argtypes = [
- HTS_Engine_ptr, FILENAME_ptr_ptr, FILENAME_ptr_ptr,
- c_int, c_int]
-
- fn_ms_gvm_buf = create_string_buffer(os.path.join(VOICE, "gv-mgc.pdf"))
- fn_ms_gvm_buf_ptr = cast(byref(fn_ms_gvm_buf), FILENAME_ptr)
- fn_ms_gvm = cast(byref(fn_ms_gvm_buf_ptr), FILENAME_ptr_ptr)
- fn_ts_gvm_buf = create_string_buffer(os.path.join(VOICE, "tree-gv-mgc.inf"))
- fn_ts_gvm_buf_ptr = cast(byref(fn_ts_gvm_buf), FILENAME_ptr)
- fn_ts_gvm = cast(byref(fn_ts_gvm_buf_ptr), FILENAME_ptr_ptr)
- libjt.HTS_Engine_load_gv_from_fn(
- engine, fn_ms_gvm, fn_ts_gvm, 0, 1)
-
- fn_ms_gvl_buf = create_string_buffer(os.path.join(VOICE, "gv-lf0.pdf"))
- fn_ms_gvl_buf_ptr = cast(byref(fn_ms_gvl_buf), FILENAME_ptr)
- fn_ms_gvl = cast(byref(fn_ms_gvl_buf_ptr), FILENAME_ptr_ptr)
- fn_ts_gvl_buf = create_string_buffer(os.path.join(VOICE, "tree-gv-lf0.inf"))
- fn_ts_gvl_buf_ptr = cast(byref(fn_ts_gvl_buf), FILENAME_ptr)
- fn_ts_gvl = cast(byref(fn_ts_gvl_buf_ptr), FILENAME_ptr_ptr)
- libjt.HTS_Engine_load_gv_from_fn(
- engine, fn_ms_gvl, fn_ts_gvl, 1, 1)
-
- libjt.HTS_Engine_load_gv_switch_from_fn.argtypes = [
- HTS_Engine_ptr, FILENAME_ptr]
-
- fn_gv_switch_buf = create_string_buffer(os.path.join(VOICE, "gv-switch.inf"))
- fn_gv_switch = cast(byref(fn_gv_switch_buf), FILENAME_ptr)
- libjt.HTS_Engine_load_gv_switch_from_fn(
- engine, fn_gv_switch)
-
-def libjt_refresh():
- libjt.HTS_Engine_refresh(engine)
- libjt.JPCommon_refresh(jpcommon)
- libjt.NJD_refresh(njd)
-
-def libjt_clear():
- libjt.NJD_clear(njd)
- libjt.JPCommon_clear(jpcommon)
- libjt.HTS_Engine_clear(engine)
-
-#def libjt_jpcommon_make_label(jpcommon, logwrite_=None):
-# if jpcommon.label:
-# libjt.JPCommonLabel_clear(jpcommon.label)
-# else:
-# jpcommon.label = cast(mc_calloc(1, sizeof(JPCommonLabel)), JPCommonLabel_ptr)
-# libjt.JPCommonLabel_initialize(jpcommon.label)
-# node = jpcommon.head
-# while node:
-# label = jpcommon.label
-# pron = libjt.JPCommonNode_get_pron(node)
-# pos = libjt.JPCommonNode_get_pos(node)
-# ctype = libjt.JPCommonNode_get_ctype(node)
-# cform = libjt.JPCommonNode_get_cform(node)
-# acc = libjt.JPCommonNode_get_acc(node)
-# flag = libjt.JPCommonNode_get_chain_flag(node)
-# if logwrite_ : logwrite_('%s,%s,%d,%d' % (pron, pos, acc, flag))
-# libjt.JPCommonLabel_push_word(label, pron, pos, ctype, cform, acc, flag)
-# node = cast(node[0].next, JPCommonNode_ptr)
-# libjt.JPCommonLabel_make(jpcommon.label)
-
-def libjt_synthesis(feature, size, fperiod_=80, feed_func_=None, is_speaking_func_=None, thres_=32, thres2_=32, level_=32767, logwrite_=None, lf0_offset_=0.0, lf0_amp_=1.0):
- if feature is None or size is None: return None
- if logwrite_ : logwrite_('libjt_synthesis start.')
- try:
- libjt.HTS_Engine_set_lf0_offset_amp(engine, lf0_offset_, lf0_amp_)
- libjt.HTS_Engine_set_fperiod(engine, fperiod_) # 80(point=5ms) frame period
- libjt.mecab2njd(njd, feature, size)
- libjt.njd_set_pronunciation(njd)
- libjt.njd_set_digit(njd)
- libjt.njd_set_accent_phrase(njd)
- except WindowsError(e):
- if logwrite_ : logwrite_('libjt_synthesis error #1 ' + str(e))
- # exception: access violation reading 0x00000000
- # https://github.com/nishimotz/libopenjtalk/commit/10d3abda6835e0547846fb5e12a36c1425561aaa#diff-66
- try:
- libjt.njd_set_accent_type(njd)
- except WindowsError:
- if logwrite_ : logwrite_('libjt_synthesis njd_set_accent_type() error ')
- try:
- libjt.njd_set_unvoiced_vowel(njd)
- libjt.njd_set_long_vowel(njd)
- libjt.njd2jpcommon(jpcommon, njd)
- libjt.JPCommon_make_label(jpcommon)
- except WindowsError(e):
- if logwrite_ : logwrite_('libjt_synthesis error #2 ' + str(e))
- if is_speaking_func_ and not is_speaking_func_() :
- libjt_refresh()
- return None
- try:
- s = libjt.JPCommon_get_label_size(jpcommon)
- except WindowsError:
- if logwrite_ : logwrite_('libjt_synthesis JPCommon_get_label_size() error ')
- buf = None
- if s > 2:
- try:
- f = libjt.JPCommon_get_label_feature(jpcommon)
- libjt.HTS_Engine_load_label_from_string_list(engine, f, s)
- libjt.HTS_Engine_create_sstream(engine)
- libjt.HTS_Engine_create_pstream(engine)
- libjt.HTS_Engine_create_gstream(engine)
- except WindowsError:
- if logwrite_ : logwrite_('libjt_synthesis error #3 ')
- if is_speaking_func_ and not is_speaking_func_() :
- libjt_refresh()
- return None
- try:
- total_nsample = libjt.jt_trim_silence(engine, thres_, thres2_)
- libjt.jt_speech_normalize(engine, level_, total_nsample)
- speech_ptr = libjt.jt_speech_ptr(engine)
- byte_count = total_nsample * sizeof(c_short)
- buf = string_at(speech_ptr, byte_count)
- if feed_func_: feed_func_(buf)
- #libjt.jt_save_logs("_logfile", engine, njd)
- except WindowsError:
- if logwrite_ : logwrite_('libjt_synthesis error #5 ')
- if logwrite_ : logwrite_('libjt_synthesis done.')
- return buf
+++ /dev/null
-# _nvdajp_predic.py
-# -*- coding: utf-8 -*-
-# A part of speech engine nvdajp_jtalk
-# Copyright (C) 2010-2011 Takuya Nishimoto (nishimotz.com)
-
-import re
-
-predic = None
-camel_word_separator = re.compile(u'([a-z])([A-Z])')
-
-def setup():
- global predic
- if predic is None:
- predic = load()
-
-def convert(msg):
- for p in predic:
- try:
- msg = re.sub(p[0], p[1], msg)
- except:
- pass
- msg = re.sub(camel_word_separator, u'\\1 \\2', msg) # HelloWorld -> Hello World
- msg = msg.lower()
- return msg
-
-def load():
- return [
- ### Unicode REPLACEMENT CHARACTER
- [re.compile(u'\ufffd'), u' '],
- ### zenkaku space normalize
- [re.compile(u' '), u' '],
-
- ## 人々 昔々 家々 山々
- [re.compile(u'(.)々'), u'\\1\\1'],
- ## Welcome to NVDA
- [re.compile('Welcome to'), u'ウェルカムトゥー'],
-
- ###
- ## nvdajp interfaces and documents
- [re.compile(u'上矢印'), u'ウエヤジルシ'],
- [re.compile(u'下矢印'), u'シタヤジルシ'],
- [re.compile(u'同梱'), u'ドーコン'],
- [re.compile(u'最上行'), u'サイジョーギョー'],
- [re.compile(u'単一文字'), u'タンイツモジ'],
- [re.compile(u'現在行'), u'ゲンザイギョー'],
- [re.compile(u'正しく'), u'ただしく'],
- [re.compile(u'初期値'), u'しょきち'],
- [re.compile(u'既定値'), u'きていち'],
- [re.compile(u'メモ帳'), u'めもちょー'],
- [re.compile(u'仮名文字'), u'かなもじ'],
- ## 行をブックマーク 行を隠す
- [re.compile(u'行をブックマーク'), u'ギョーをブックマーク'],
- [re.compile(u'行を隠す'), u'ギョーを隠す'],
- ## 被災された方へ 圏内の方へ 支援をお考えの方へ 少しでも
- [re.compile(u'された方'), u'されたかた'],
- [re.compile(u'圏内の方'), u'圏内のかた'],
- [re.compile(u'お考えの方'), u'お考えのかた'],
- [re.compile(u'少しでも'), u'すこしでも'],
- ##
- [re.compile(u'大見出し'), u'オーミダシ'],
- [re.compile(u'拡張子'), u'カクチョーシ'],
- [re.compile(u'前景色'), u'ゼンケーショク'],
- [re.compile(u'小文字'), u'コモジ'],
- [re.compile(u'親オブジェクト'), u'オヤオブジェクト'],
- [re.compile(u'表計算'), u'ヒョーケーサン'],
- [re.compile(u'八ッ場'), u'ヤンバ'],
- [re.compile(u'初音ミク'), u'ハツネミク'],
- [re.compile(u'金正日'), u'キムジョンイル'],
- [re.compile(u'正恩'), u'ジョンウン'],
- [re.compile(u'急きょ'), u'キューキョ'],
-
- ### trim space
- [re.compile(u'マイ '), u'マイ'],
- [re.compile(u'コントロール パネル'), u'コントロールパネル'],
- [re.compile(u'タスク バー'), u'タスクバー'],
- [re.compile(u'の '), u'の'], # remove space "1の 7" -> "1の7"
- [re.compile(u' 側'), u' ガワ'],
-
- ## isolated hiragana HA (mecab replaces to WA)
- ## は
- [re.compile(u'^は$'), u'ハ'],
- [re.compile(u'\\sは$'), u'ハ'],
-
- ### zenkaku alphabet convert
- [re.compile(u'A'), u'A'],
- [re.compile(u'B'), u'B'],
- [re.compile(u'C'), u'C'],
- [re.compile(u'D'), u'D'],
- [re.compile(u'E'), u'E'],
- [re.compile(u'F'), u'F'],
- [re.compile(u'G'), u'G'],
- [re.compile(u'H'), u'H'],
- [re.compile(u'I'), u'I'],
- [re.compile(u'J'), u'J'],
- [re.compile(u'K'), u'K'],
- [re.compile(u'L'), u'L'],
- [re.compile(u'M'), u'M'],
- [re.compile(u'N'), u'N'],
- [re.compile(u'O'), u'O'],
- [re.compile(u'P'), u'P'],
- [re.compile(u'Q'), u'Q'],
- [re.compile(u'R'), u'R'],
- [re.compile(u'S'), u'S'],
- [re.compile(u'T'), u'T'],
- [re.compile(u'U'), u'U'],
- [re.compile(u'V'), u'V'],
- [re.compile(u'W'), u'W'],
- [re.compile(u'X'), u'X'],
- [re.compile(u'Y'), u'Y'],
- [re.compile(u'Z'), u'Z'],
-
- [re.compile(u'a'), u'a'],
- [re.compile(u'b'), u'b'],
- [re.compile(u'c'), u'c'],
- [re.compile(u'd'), u'd'],
- [re.compile(u'e'), u'e'],
- [re.compile(u'f'), u'f'],
- [re.compile(u'g'), u'g'],
- [re.compile(u'h'), u'h'],
- [re.compile(u'i'), u'i'],
- [re.compile(u'j'), u'j'],
- [re.compile(u'k'), u'k'],
- [re.compile(u'l'), u'l'],
- [re.compile(u'm'), u'm'],
- [re.compile(u'n'), u'n'],
- [re.compile(u'o'), u'o'],
- [re.compile(u'p'), u'p'],
- [re.compile(u'q'), u'q'],
- [re.compile(u'r'), u'r'],
- [re.compile(u's'), u's'],
- [re.compile(u't'), u't'],
- [re.compile(u'u'), u'u'],
- [re.compile(u'v'), u'v'],
- [re.compile(u'w'), u'w'],
- [re.compile(u'x'), u'x'],
- [re.compile(u'y'), u'y'],
- [re.compile(u'z'), u'z'],
-
- ### zenkaku numbers convert
- [re.compile(u'0'), u'0'],
- [re.compile(u'1'), u'1'],
- [re.compile(u'2'), u'2'],
- [re.compile(u'3'), u'3'],
- [re.compile(u'4'), u'4'],
- [re.compile(u'5'), u'5'],
- [re.compile(u'6'), u'6'],
- [re.compile(u'7'), u'7'],
- [re.compile(u'8'), u'8'],
- [re.compile(u'9'), u'9'],
-
- # Tween
- [re.compile(u'[Tt]ween'), u'トゥイーン'],
- # msspeech
- [re.compile(u'[Mm]sspeech'), u'エムエススピーチ'],
- #
- #[re.compile(u'[Yy]ou[Tt]ube'), u'ユーチューブ'],
- #[re.compile(u'[Mm]ixi'), u'ミクシー'],
-
- # ぼらんてぃあ
- [re.compile(u'ぼらんてぃあ'), u'ボランティア'],
- ## 59 名
- [re.compile(u'(\\d) 名'), u'\\1名'],
- ## 4行 ヨンコー -> ヨンギョー
- [re.compile(u'(\\d)行'), u'\\1ギョー'],
- ## 1都5県
- [re.compile(u'1都(\\d)+県'), u'イット\\1ケン'],
- ## 2 分前更新
- [re.compile(u'(\\d)+ 分前更新'), u'\\1分マエコーシン'],
-
- ## 1MB 10MB 1.2MB 0.5MB 321.0MB 123.45MB 2.7GB
- [re.compile(u'(\\d+)MB'), u'\\1メガバイト'],
- [re.compile(u'(\\d+)GB'), u'\\1ギガバイト'],
- [re.compile(u'(\\d+)MHz'), u'\\1メガヘルツ'],
- [re.compile(u'(\\d+)GHz'), u'\\1ギガヘルツ'],
-
- ### zenkaku symbols convert
- ## 2011.03.11
- ## 1,234円
- [re.compile(u'.'), u'.'],
- [re.compile(u','), u','],
-
- ## 1,234
- ## 1,234,567
- ## 1,234,567,890
- ## 1,23 = ichi comma niju san
- ## 1,0 = ichi comma zero
- [re.compile(u'(\\d)\\,(\\d{3})'), u'\\1\\2'],
- [re.compile(u'(\\d{2})\\,(\\d{3})'), u'\\1\\2'],
- [re.compile(u'(\\d{3})\\,(\\d{3})'), u'\\1\\2'],
- [re.compile(u'(\\d)\\,(\\d{1,2})'), u'\\1カンマ\\2'],
-
- [re.compile(u'\\b0(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)'), u' 00 0\\1 0\\2 0\\3 0\\4 0\\5 0\\6 0\\7 0\\8 0\\9 '],
- [re.compile(u'\\b0(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)'), u' 00 0\\1 0\\2 0\\3 0\\4 0\\5 0\\6 0\\7 0\\8 '],
- [re.compile(u'\\b0(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)'), u' 00 0\\1 0\\2 0\\3 0\\4 0\\5 0\\6 0\\7 '],
- [re.compile(u'\\b0(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)'), u' 00 0\\1 0\\2 0\\3 0\\4 0\\5 0\\6 '],
- [re.compile(u'\\b0(\\d)(\\d)(\\d)(\\d)(\\d)'), u' 00 0\\1 0\\2 0\\3 0\\4 0\\5 '],
- [re.compile(u'\\b0(\\d)(\\d)(\\d)(\\d)'), u' 00 0\\1 0\\2 0\\3 0\\4 '],
- [re.compile(u'\\b0(\\d)(\\d)(\\d)'), u' 00 0\\1 0\\2 0\\3 '],
- [re.compile(u'\\b0(\\d)(\\d)'), u' 00 0\\1 0\\2 '],
- [re.compile(u'\\b0(\\d)'), u' 00 0\\1 '],
-
- [re.compile(u' 00'), u'ゼロ'],
- [re.compile(u' 01'), u'イチ'],
- [re.compile(u' 02'), u'ニー'],
- [re.compile(u' 03'), u'サン'],
- [re.compile(u' 04'), u'ヨン'],
- [re.compile(u' 05'), u'ゴー'],
- [re.compile(u' 06'), u'ロク'],
- [re.compile(u' 07'), u'ナナ'],
- [re.compile(u' 08'), u'ハチ'],
- [re.compile(u' 09'), u'キュー'],
-
- ]
-
-copy c:\work\nvda\jpmain\source\locale\ja\characters.dic characters-ja.dic\r
rem build\r
python make_timestamp.py\r
cd htsengineapi\r
call all-build.cmd\r
call all-install.cmd\r
-python mecabRunner.py|lv -Iu\r
+cd ..\..\jptools\r
+call test-mecab.cmd\r
+cd ..\include\jtalk\r
+\r
+\r
+\r
+\r
\r
--- /dev/null
+copy c:\work\nvda\nvdajp\source\locale\ja\characters.dic characters-ja.dic\r
+++ /dev/null
-# coding: UTF-8\r
-#nvdajptext/mecab.py \r
-#A part of NonVisual Desktop Access (NVDA)\r
-#This file is covered by the GNU General Public License.\r
-#See the file COPYING for more details.\r
-#Copyright (C) 2010-2012 Masataka.Shinke, Takuya Nishimoto\r
-\r
-CODE = 'utf-8'\r
-\r
-from ctypes import *\r
-import codecs\r
-import re\r
-import string\r
-import os\r
-import struct\r
-import unicodedata\r
-import threading\r
-import sys\r
-\r
-DEFAULT_JTALK_DIR = unicode(os.path.dirname(__file__), 'mbcs')\r
-if hasattr(sys,'frozen'):\r
- d = os.path.join(os.getcwdu(), 'synthDrivers', 'jtalk')\r
- if os.path.isdir(d):\r
- DEFAULT_JTALK_DIR = d\r
-\r
-c_double_p = POINTER(c_double)\r
-c_double_p_p = POINTER(c_double_p) \r
-c_short_p = POINTER(c_short)\r
-c_char_p_p = POINTER(c_char_p) \r
-\r
-##############################################\r
-\r
-# http://mecab.sourceforge.net/libmecab.html\r
-# c:/mecab/sdk/mecab.h\r
-MECAB_NOR_NODE = 0\r
-MECAB_UNK_NODE = 1\r
-MECAB_BOS_NODE = 2\r
-MECAB_EOS_NODE = 3\r
-class mecab_token_t(Structure):\r
- pass\r
-mecab_token_t_ptr = POINTER(mecab_token_t)\r
-\r
-class mecab_path_t(Structure):\r
- pass\r
-mecab_path_t_ptr = POINTER(mecab_path_t)\r
-\r
-class mecab_node_t(Structure):\r
- pass\r
-mecab_node_t_ptr = POINTER(mecab_node_t)\r
-mecab_node_t_ptr_ptr = POINTER(mecab_node_t_ptr)\r
-mecab_node_t._fields_ = [\r
- ("prev", mecab_node_t_ptr),\r
- ("next", mecab_node_t_ptr),\r
- ("enext", mecab_node_t_ptr),\r
- ("bnext", mecab_node_t_ptr),\r
- ("rpath", mecab_path_t_ptr),\r
- ("lpath", mecab_path_t_ptr),\r
- # ("begin_node_list", mecab_node_t_ptr_ptr),\r
- # ("end_node_list", mecab_node_t_ptr_ptr),\r
- ("surface", c_char_p),\r
- ("feature", c_char_p),\r
- ("id", c_uint),\r
- ("length", c_ushort),\r
- ("rlength", c_ushort),\r
- ("rcAttr", c_ushort),\r
- ("lcAttr", c_ushort),\r
- ("posid", c_ushort),\r
- ("char_type", c_ubyte),\r
- ("stat", c_ubyte),\r
- ("isbest", c_ubyte),\r
- # ("sentence_length", c_uint),\r
- ("alpha", c_float),\r
- ("beta", c_float),\r
- ("prob", c_float),\r
- ("wcost", c_short),\r
- ("cost", c_long),\r
- # ("token", mecab_token_t_ptr),\r
- ]\r
-\r
-############################################\r
-\r
-# typedef struct _Mecab{\r
-# char **feature;\r
-# int size;\r
-# mecab_t *mecab;\r
-# } Mecab;\r
-\r
-FELEN = 1000 # string len\r
-FECOUNT = 1000\r
-FEATURE = c_char * FELEN\r
-FEATURE_ptr = POINTER(FEATURE)\r
-FEATURE_ptr_array = FEATURE_ptr * FECOUNT\r
-FEATURE_ptr_array_ptr = POINTER(FEATURE_ptr_array)\r
-\r
-mecab = None\r
-libmc = None\r
-lock = threading.Lock()\r
-\r
-mc_malloc = cdll.msvcrt.malloc\r
-mc_malloc.restype = POINTER(c_ubyte)\r
-mc_calloc = cdll.msvcrt.calloc\r
-mc_calloc.restype = POINTER(c_ubyte)\r
-mc_free = cdll.msvcrt.free\r
-\r
-class NonblockingMecabFeatures(object):\r
- def __init__(self):\r
- self.size = 0\r
- self.feature = FEATURE_ptr_array()\r
- for i in xrange(0, FECOUNT):\r
- buf = mc_malloc(FELEN) \r
- self.feature[i] = cast(buf, FEATURE_ptr)\r
-\r
- def __del__(self):\r
- for i in xrange(0, FECOUNT):\r
- try:\r
- mc_free(self.feature[i]) \r
- except:\r
- pass\r
-\r
-class MecabFeatures(NonblockingMecabFeatures):\r
- def __init__(self):\r
- global lock\r
- lock.acquire()\r
- super(MecabFeatures, self).__init__()\r
-\r
- def __del__(self):\r
- global lock\r
- super(MecabFeatures, self).__del__()\r
- lock.release()\r
-\r
-predic = None\r
-\r
-def text2mecab_setup():\r
- global predic\r
- if predic is None:\r
- predic = [\r
- [re.compile(u" "), u" "],\r
- [re.compile(u"!"), u"!"],\r
- [re.compile(u"\""), u"”"],\r
- [re.compile(u"#"), u"#"],\r
- [re.compile(u"\\$"), u"$"],\r
- [re.compile(u"%"), u"%"],\r
- [re.compile(u"&"), u"&"],\r
- [re.compile(u"'"), u"’"],\r
- [re.compile(u"\\("), u"("],\r
- [re.compile(u"\\)"), u")"],\r
- [re.compile(u"\\*"), u"*"],\r
- [re.compile(u"\\+"), u"+"],\r
- [re.compile(u","), u","],\r
- [re.compile(u"\\-"), u"−"],\r
- [re.compile(u"\\."), u"."],\r
- [re.compile(u"\\/"), u"/"],\r
- [re.compile(u"0"), u"0"],\r
- [re.compile(u"1"), u"1"],\r
- [re.compile(u"2"), u"2"],\r
- [re.compile(u"3"), u"3"],\r
- [re.compile(u"4"), u"4"],\r
- [re.compile(u"5"), u"5"],\r
- [re.compile(u"6"), u"6"],\r
- [re.compile(u"7"), u"7"],\r
- [re.compile(u"8"), u"8"],\r
- [re.compile(u"9"), u"9"],\r
- [re.compile(u":"), u":"],\r
- [re.compile(u";"), u";"],\r
- [re.compile(u"<"), u"<"],\r
- [re.compile(u"="), u"="],\r
- [re.compile(u">"), u">"],\r
- [re.compile(u"\?"), u"?"],\r
- [re.compile(u"@"), u"@"],\r
- [re.compile(u"A"), u"A"],\r
- [re.compile(u"B"), u"B"],\r
- [re.compile(u"C"), u"C"],\r
- [re.compile(u"D"), u"D"],\r
- [re.compile(u"E"), u"E"],\r
- [re.compile(u"F"), u"F"],\r
- [re.compile(u"G"), u"G"],\r
- [re.compile(u"H"), u"H"],\r
- [re.compile(u"I"), u"I"],\r
- [re.compile(u"J"), u"J"],\r
- [re.compile(u"K"), u"K"],\r
- [re.compile(u"L"), u"L"],\r
- [re.compile(u"M"), u"M"],\r
- [re.compile(u"N"), u"N"],\r
- [re.compile(u"O"), u"O"],\r
- [re.compile(u"P"), u"P"],\r
- [re.compile(u"Q"), u"Q"],\r
- [re.compile(u"R"), u"R"],\r
- [re.compile(u"S"), u"S"],\r
- [re.compile(u"T"), u"T"],\r
- [re.compile(u"U"), u"U"],\r
- [re.compile(u"V"), u"V"],\r
- [re.compile(u"W"), u"W"],\r
- [re.compile(u"X"), u"X"],\r
- [re.compile(u"Y"), u"Y"],\r
- [re.compile(u"Z"), u"Z"],\r
- [re.compile(u"\\["), u"["],\r
- [re.compile(u"\\\\"), u"¥"],\r
- [re.compile(u"\\]"), u"]"],\r
- [re.compile(u"\\^"), u"^"],\r
- [re.compile(u"_"), u"_"],\r
- [re.compile(u"`"), u"‘"],\r
- [re.compile(u"a"), u"a"],\r
- [re.compile(u"b"), u"b"],\r
- [re.compile(u"c"), u"c"],\r
- [re.compile(u"d"), u"d"],\r
- [re.compile(u"e"), u"e"],\r
- [re.compile(u"f"), u"f"],\r
- [re.compile(u"g"), u"g"],\r
- [re.compile(u"h"), u"h"],\r
- [re.compile(u"i"), u"i"],\r
- [re.compile(u"j"), u"j"],\r
- [re.compile(u"k"), u"k"],\r
- [re.compile(u"l"), u"l"],\r
- [re.compile(u"m"), u"m"],\r
- [re.compile(u"n"), u"n"],\r
- [re.compile(u"o"), u"o"],\r
- [re.compile(u"p"), u"p"],\r
- [re.compile(u"q"), u"q"],\r
- [re.compile(u"r"), u"r"],\r
- [re.compile(u"s"), u"s"],\r
- [re.compile(u"t"), u"t"],\r
- [re.compile(u"u"), u"u"],\r
- [re.compile(u"v"), u"v"],\r
- [re.compile(u"w"), u"w"],\r
- [re.compile(u"x"), u"x"],\r
- [re.compile(u"y"), u"y"],\r
- [re.compile(u"z"), u"z"],\r
- [re.compile(u"{"), u"{"],\r
- [re.compile(u"\\|"), u"|"],\r
- [re.compile(u"}"), u"}"],\r
- [re.compile(u"~"), u"〜"],\r
- ]\r
-\r
-def text2mecab_convert(s):\r
- for p in predic:\r
- try:\r
- s = re.sub(p[0], p[1], s)\r
- except:\r
- pass\r
- return s\r
-\r
-def Mecab_text2mecab(txt, CODE_=CODE):\r
- text2mecab_setup()\r
- txt = unicodedata.normalize('NFKC', txt)\r
- txt = text2mecab_convert(txt)\r
- return txt.encode(CODE_, 'ignore')\r
-\r
-def Mecab_initialize(logwrite_ = None, jtalk_dir = DEFAULT_JTALK_DIR):\r
- #if logwrite_: logwrite_('mecab init begin')\r
- mecab_dll = os.path.join(jtalk_dir, 'libmecab.dll')\r
- global libmc\r
- if libmc is None:\r
- libmc = cdll.LoadLibrary(mecab_dll.encode('mbcs'))\r
- libmc.mecab_version.restype = c_char_p\r
- libmc.mecab_strerror.restype = c_char_p\r
- libmc.mecab_sparse_tonode.restype = mecab_node_t_ptr\r
- libmc.mecab_new.argtypes = [c_int, c_char_p_p]\r
- global mecab\r
- if mecab is None:\r
- dic = os.path.join(jtalk_dir, 'dic')\r
- mecabrc = os.path.join(jtalk_dir, 'mecabrc')\r
- args = (c_char_p * 5)('mecab', '-d', dic.encode('utf-8'), '-r', mecabrc.encode('utf-8'))\r
- mecab = libmc.mecab_new(5, args)\r
- if logwrite_:\r
- if not mecab: logwrite_('mecab_new failed.')\r
- logwrite_(libmc.mecab_strerror(mecab))\r
- f = open(os.path.join(dic, "DIC_VERSION"))\r
- s = f.read().strip()\r
- f.close()\r
- logwrite_('mecab:' + libmc.mecab_version() + ' ' + s)\r
- #if logwrite_: logwrite_('mecab init end')\r
-\r
-def Mecab_analysis(src, features):\r
- if not src: features.size = 0; return\r
- head = libmc.mecab_sparse_tonode(mecab, src)\r
- if head is None: features.size = 0; return\r
- features.size = 0\r
-\r
- # make array of features\r
- node = head\r
- i = 0\r
- while node:\r
- s = node[0].stat\r
- if s != MECAB_BOS_NODE and s != MECAB_EOS_NODE:\r
- c = node[0].length\r
- s = string_at(node[0].surface, c) + "," + string_at(node[0].feature)\r
- #print s.decode(CODE) # for debug\r
- buf = create_string_buffer(s)\r
- dst_ptr = features.feature[i]\r
- src_ptr = byref(buf)\r
- memmove(dst_ptr, src_ptr, len(s)+1)\r
- i += 1\r
- node = node[0].next\r
- features.size = i\r
- if i > FECOUNT: return\r
- return\r
-\r
-# for debug\r
-def Mecab_print(mf, logwrite_ = None, CODE_ = CODE, output_header = True):\r
- if logwrite_ is None: return\r
- feature = mf.feature\r
- size = mf.size\r
- if feature is None or size is None: \r
- if output_header:\r
- logwrite_( "Mecab_print size: 0" )\r
- return\r
- s2 = ''\r
- if output_header:\r
- s2 += "Mecab_print size: %d\n" % size\r
- for i in xrange(0, size):\r
- s = string_at(feature[i])\r
- if s:\r
- if CODE_ is None:\r
- s2 += "%d %s\n" % (i, s)\r
- else:\r
- s2 += "%d %s\n" % (i, s.decode(CODE_, 'ignore'))\r
- else:\r
- s2 += "[None]\n"\r
- logwrite_(s2)\r
-\r
-def Mecab_getFeature(mf, pos, CODE_ = CODE):\r
- s = string_at(mf.feature[pos])\r
- return s.decode(CODE_, 'ignore')\r
-\r
-def Mecab_setFeature(mf, pos, s, CODE_ = CODE):\r
- s = s.encode(CODE_, 'ignore')\r
- buf = create_string_buffer(s)\r
- dst_ptr = mf.feature[pos]\r
- src_ptr = byref(buf)\r
- memmove(dst_ptr, src_ptr, len(s)+1)\r
-\r
-def getMoraCount(s):\r
- # 1/3 => 3\r
- # */* => 0\r
- m = s.split('/')\r
- if len(m) == 2:\r
- m2 = m[1]\r
- if m2 != '*':\r
- return int(m2)\r
- return 0\r
-\r
-# PATTERN 1\r
-# before:\r
-# 1 五絡脈病証,名詞,数,*,*,*,*,*\r
-#\r
-# after:\r
-# 1 五絡脈病証,名詞,普通名詞,*,*,*,*,五絡脈病証,ゴミャクラクビョウショウ,\r
-# ゴミャクラクビョーショー,1/9,C0\r
-# \r
-# PATTERN 2\r
-# before:\r
-# 0 ∫⣿♪ ,名詞,サ変接続,*,*,*,*,*\r
-#\r
-# after:\r
-# 0 ∫⣿♪ ,名詞,サ変接続,*,*,*,*,∫♪ ,セキブンキゴーイチニーサンヨンゴーロクナナ\r
-# ハチノテンオンプ,セキブンキゴーイチニーサンヨンゴーロクナナハチノテンオンプ,1/29,C0\r
-# \r
-# PATTERN 3\r
-# before:\r
-# 0 ま,接頭詞,名詞接続,*,*,*,*,ま,マ,マ,1/1,P2\r
-# 1 ー,名詞,一般,*,*,*,*,*\r
-#\r
-# after:\r
-# 0 ま,接頭詞,名詞接続,*,*,*,*,まー,マー,マー,1/2,P2\r
-# 1 ー,名詞,一般,*,*,*,*,*\r
-def Mecab_correctFeatures(mf, CODE_ = CODE):\r
- for pos in xrange(0, mf.size):\r
- ar = Mecab_getFeature(mf, pos, CODE_=CODE_).split(',')\r
- need_fix = False\r
- if ar[2] == u'数' and ar[7] == u'*': \r
- need_fix = True\r
- if ar[1] == u'名詞' and ar[2] == u'サ変接続' and ar[7] == u'*': \r
- need_fix = True\r
- if need_fix:\r
- hyoki = ar[0]\r
- yomi = ''\r
- pron = ''\r
- mora = 0\r
- nbmf = NonblockingMecabFeatures()\r
- for c in hyoki:\r
- Mecab_analysis(Mecab_text2mecab(c, CODE_=CODE_), nbmf)\r
- for pos2 in xrange(0, nbmf.size):\r
- ar2 = Mecab_getFeature(nbmf, pos2, CODE_=CODE_).split(',')\r
- if len(ar2) > 10:\r
- yomi += ar2[8]\r
- pron += ar2[9]\r
- mora += getMoraCount(ar2[10])\r
- nbmf = None\r
- feature = u'{h},名詞,普通名詞,*,*,*,*,{h},{y},{p},1/{m},C0'.format(h=hyoki, y=yomi, p=pron, m=mora)\r
- Mecab_setFeature(mf, pos, feature, CODE_=CODE_)\r
- elif pos > 0 and ar[0] == u'ー' and ar[1] == u'名詞' and ar[2] == u'一般':\r
- ar2 = Mecab_getFeature(mf, pos-1, CODE_=CODE_).split(',')\r
- if len(ar2) > 10:\r
- hyoki = ar2[0] + u'ー'\r
- hin1 = ar2[1]\r
- hin2 = ar2[2]\r
- yomi = ar2[8] + u'ー'\r
- pron = ar2[9] + u'ー'\r
- mora = getMoraCount(ar2[10]) + 1\r
- feature = u'{h},{h1},{h2},*,*,*,*,{h},{y},{p},1/{m},C0'.format(h=hyoki, h1=hin1, h2=hin2, y=yomi, p=pron, m=mora)\r
- Mecab_setFeature(mf, pos-1, feature, CODE_=CODE_)\r
- elif pos >= 2:\r
- ar3 = Mecab_getFeature(mf, pos-2, CODE_=CODE_).split(',')\r
- if len(ar3) > 10 and ar3[1] != u'記号':\r
- hyoki = ar3[0] + ar2[0] + u'ー'\r
- hin1 = ar3[1]\r
- hin2 = ar3[2]\r
- yomi = ar3[8] + ar2[0] + u'ー'\r
- pron = ar3[9] + ar2[0] + u'ー'\r
- mora = getMoraCount(ar3[10]) + len(ar2[0]) + 1\r
- feature = u'{h},{h1},{h2},*,*,*,*,{h},{y},{p},1/{m},C0'.format(h=hyoki, h1=hin1, h2=hin2, y=yomi, p=pron, m=mora)\r
- Mecab_setFeature(mf, pos-2, feature, CODE_=CODE_)\r
-\r
-def Mecab_utf8_to_cp932(mf):\r
- for pos in xrange(0, mf.size):\r
- s = Mecab_getFeature(mf, pos, CODE_ = 'utf-8')\r
- Mecab_setFeature(mf, pos, s, CODE_ = 'cp932')\r
from __future__ import unicode_literals\r
import os\r
import sys\r
+sys.path.append(r'..\source\synthDrivers\jtalk')\r
from mecab import *\r
\r
def __print(s):\r
]\r
\r
if __name__ == '__main__':\r
- JT_DIR = os.path.normpath(os.path.join(os.getcwdu(), '..', '..', 'source', 'synthDrivers', 'jtalk'))\r
+ JT_DIR = os.path.normpath(os.path.join(os.getcwdu(), '..', 'source', 'synthDrivers', 'jtalk'))\r
print JT_DIR\r
Mecab_initialize(__print, JT_DIR)\r
for i in tasks:\r
libjt.njd_set_pronunciation(njd)
libjt.njd_set_digit(njd)
libjt.njd_set_accent_phrase(njd)
- except WindowsError:
- if logwrite_ : logwrite_('libjt_synthesis error #1 ')
+ except WindowsError(e):
+ if logwrite_ : logwrite_('libjt_synthesis error #1 ' + str(e))
# exception: access violation reading 0x00000000
# https://github.com/nishimotz/libopenjtalk/commit/10d3abda6835e0547846fb5e12a36c1425561aaa#diff-66
try:
libjt.njd_set_long_vowel(njd)
libjt.njd2jpcommon(jpcommon, njd)
libjt.JPCommon_make_label(jpcommon)
- except WindowsError:
- if logwrite_ : logwrite_('libjt_synthesis error #2 ')
+ except WindowsError(e):
+ if logwrite_ : logwrite_('libjt_synthesis error #2 ' + str(e))
if is_speaking_func_ and not is_speaking_func_() :
libjt_refresh()
return None