+++ /dev/null
-# _jtalk_core.py
-# -*- coding: utf-8 -*-
-#A part of NonVisual Desktop Access (NVDA)
-#Copyright (C) 2010-2012 Takuya Nishimoto (NVDA Japanese Team)
-#This file is covered by the GNU General Public License.
-#See the file COPYING for more details.
-
-# Japanese speech engine wrapper for Open JTalk
-# http://ja.nishimotz.com/project:libopenjtalk
-
-import codecs
-import re
-import string
-import os
-import struct
-import sys
-from mecab import *
-
-############################################
-
-# htsengineapi/include/HTS_engine.h
-
-# size of structure:
-# HTS_Global 56
-# HTS_ModelSet 76
-# HTS_Label 24
-# HTS_SStreamSet 24
-# HTS_PStreamSet 12
-# HTS_GStreamSet 20
-
-class HTS_ModelSet(Structure):
- _fields_ = [
- ("_dummy", c_byte * 56),
- ]
-
-class HTS_Label(Structure):
- _fields_ = [
- ("_dummy", c_byte * 76),
- ]
-HTS_Label_ptr = POINTER(HTS_Label)
-
-class HTS_SStreamSet(Structure):
- _fields_ = [
- ("_dummy", c_byte * 24),
- ]
-
-class HTS_PStreamSet(Structure):
- _fields_ = [
- ("_dummy", c_byte * 12),
- ]
-
-class HTS_GStream(Structure):
- _fields_ = [
- ("static_length", c_int), # int static_length; /* static features length */
- ("par", c_double_p_p), # double **par; /* generated parameter */
- ]
-
-HTS_GStream_ptr = POINTER(HTS_GStream)
-
-# FIXME: engine.gss.total_nsample is always 0
-class HTS_GStreamSet(Structure):
- _fields_ = [
- ("total_nsample", c_int), # int total_nsample; /* total sample */
- ("total_frame", c_int), # int total_frame; /* total frame */
- ("nstream", c_int), # int nstream; /* # of streams */
- ("gstream", HTS_GStream_ptr), # HTS_GStream *gstream; /* generated parameter streams */
- ("gspeech", c_short_p), # short *gspeech; /* generated speech */
- ]
-HTS_GStreamSet_ptr = POINTER(HTS_GStreamSet)
-
-class HTS_Global(Structure):
- _fields_ = [
- ("state", c_int), # /* Gamma=-1/stage : if stage=0 then Gamma=0 */
- ("use_log_gain", c_int), # HTS_Boolean (TRUE=1) /* log gain flag (for LSP) */
- ("sampling_rate", c_int), # /* sampling rate */
- ("fperiod", c_int), # /* frame period */
- ("alpha", c_double), # /* all-pass constant */
- ("beta", c_double), # /* postfiltering coefficient */
- ("audio_buff_size", c_int), # /* audio buffer size (for audio device) */
- ("msd_threshold", c_double_p), # /* MSD thresholds */
- ("duration_iw", c_double_p), # /* weights for duration interpolation */
- ("parameter_iw", c_double_p_p), # /* weights for parameter interpolation */
- ("gv_iw", c_double_p_p), # /* weights for GV interpolation */
- ("gv_weight", c_double_p), # /* GV weights */
- ]
-HTS_Global_ptr = POINTER(HTS_Global)
-
-class HTS_Engine(Structure):
- _fields_ = [
- ("global", HTS_Global),
- ("audio", c_void_p), # HTS_Audio (requires nvdajp miscdep 86 or later)
- ("ms", HTS_ModelSet),
- ("label", HTS_Label),
- ("sss", HTS_SStreamSet),
- ("pss", HTS_PStreamSet),
- ("gss", HTS_GStreamSet),
- ("lf0_offset", c_double),
- ("lf0_amp", c_double),
- ]
-HTS_Engine_ptr = POINTER(HTS_Engine)
-
-############################################
-
-class NJD(Structure):
- _fields_ = [
- ("_dummy", c_byte * 8),
- ]
-NJD_ptr = POINTER(NJD)
-
-class JPCommonNode(Structure):
- pass
-JPCommonNode_ptr = POINTER(JPCommonNode)
-JPCommonNode._fields_ = [
- ('pron', c_char_p),
- ('pos', c_char_p),
- ('ctype', c_char_p),
- ('cform', c_char_p),
- ('acc', c_int),
- ('chain_flag', c_int),
- ('prev', JPCommonNode_ptr),
- ('next', JPCommonNode_ptr),
- ]
-
-class JPCommonLabelBreathGroup(Structure):
- pass
-JPCommonLabelBreathGroup_ptr = POINTER(JPCommonLabelBreathGroup)
-
-class JPCommonLabelAccentPhrase(Structure):
- pass
-JPCommonLabelAccentPhrase_ptr = POINTER(JPCommonLabelAccentPhrase)
-
-class JPCommonLabelWord(Structure):
- pass
-JPCommonLabelWord_ptr = POINTER(JPCommonLabelWord)
-
-class JPCommonLabelMora(Structure):
- pass
-JPCommonLabelMora_ptr = POINTER(JPCommonLabelMora)
-
-class JPCommonLabelPhoneme(Structure):
- pass
-JPCommonLabelPhoneme_ptr = POINTER(JPCommonLabelPhoneme)
-
-# jpcommon/jpcommon.h
-class JPCommonLabel(Structure):
- _fields_ = [
- ('size', c_int),
- ('feature', c_char_p_p),
- ('breath_head', JPCommonLabelBreathGroup_ptr),
- ('breath_tail', JPCommonLabelBreathGroup_ptr),
- ('accent_head', JPCommonLabelAccentPhrase_ptr),
- ('accent_tail', JPCommonLabelAccentPhrase_ptr),
- ('word_head', JPCommonLabelWord_ptr),
- ('word_tail', JPCommonLabelWord_ptr),
- ('mora_head', JPCommonLabelMora_ptr),
- ('mora_tail', JPCommonLabelMora_ptr),
- ('phoneme_head', JPCommonLabelPhoneme_ptr),
- ('phoneme_tail', JPCommonLabelPhoneme_ptr),
- ('short_pause_flag', c_int),
- ]
-JPCommonLabel_ptr = POINTER(JPCommonLabel)
-
-class JPCommon(Structure):
- _fields_ = [
- ("head", JPCommonNode_ptr),
- ("tail", JPCommonNode_ptr),
- ("label", JPCommonLabel_ptr),
- ]
-JPCommon_ptr = POINTER(JPCommon)
-
-# for debug
-def JPC_label_print(feature, size, logwrite_):
- if logwrite_ is None: return
- if feature is None or size is None:
- logwrite_( "JPC_label_print size: 0" )
- return
- s2 = "JPC_label_print size: %d\n" % size
- for i in xrange(0, size):
- s = string_at(feature[i])
- if s:
- s2 += "%s\n" % s
- else:
- s2 += "[None]"
- logwrite_(s2)
-
-#############################################
-
-FNLEN = 1000
-FILENAME = c_char * FNLEN
-FILENAME_ptr = POINTER(FILENAME)
-FILENAME_ptr_ptr = POINTER(FILENAME_ptr)
-FILENAME_ptr_x3 = FILENAME_ptr * 3
-FILENAME_ptr_x3_ptr = POINTER(FILENAME_ptr_x3)
-
-libjt = None
-njd = NJD()
-jpcommon = JPCommon()
-engine = HTS_Engine()
-use_lpf = 0
-
-def libjt_version():
- if libjt is None: return "libjt version none"
- return libjt.jt_version()
-
-def libjt_initialize(JT_DLL, **args):
- global libjt, njd, jpcommon, engine, use_lpf
- use_lpf = args['use_lpf']
-
- if libjt is None: libjt = cdll.LoadLibrary(JT_DLL.encode('mbcs'))
- libjt.jt_version.restype = c_char_p
-
- libjt.NJD_initialize.argtypes = [NJD_ptr]
- libjt.NJD_initialize(njd)
-
- libjt.JPCommon_initialize.argtypes = [JPCommon_ptr]
- libjt.JPCommon_initialize(jpcommon)
-
- libjt.HTS_Engine_initialize.argtypes = [HTS_Engine_ptr, c_int]
- if use_lpf:
- libjt.HTS_Engine_initialize(engine, 3)
- else:
- libjt.HTS_Engine_initialize(engine, 2)
-
- libjt.HTS_Engine_set_sampling_rate.argtypes = [HTS_Engine_ptr, c_int]
- libjt.HTS_Engine_set_sampling_rate(engine, args['samp_rate']) # 16000
-
- libjt.HTS_Engine_set_fperiod.argtypes = [HTS_Engine_ptr, c_int]
- libjt.HTS_Engine_set_fperiod(engine, args['fperiod']) # if samping-rate is 16000: 80(point=5ms) frame period
-
- libjt.HTS_Engine_set_alpha.argtypes = [HTS_Engine_ptr, c_double]
- libjt.HTS_Engine_set_alpha(engine, args['alpha']) # 0.42
-
- libjt.HTS_Engine_set_gamma.argtypes = [HTS_Engine_ptr, c_int]
- libjt.HTS_Engine_set_gamma(engine, 0)
-
- libjt.HTS_Engine_set_log_gain.argtypes = [HTS_Engine_ptr, c_int]
- libjt.HTS_Engine_set_log_gain(engine, 0)
-
- libjt.HTS_Engine_set_beta.argtypes = [HTS_Engine_ptr, c_double]
- libjt.HTS_Engine_set_beta(engine, 0.0)
-
- libjt.HTS_Engine_set_audio_buff_size.argtypes = [HTS_Engine_ptr, c_int]
- libjt.HTS_Engine_set_audio_buff_size(engine, 1600)
-
- libjt.HTS_Engine_set_msd_threshold.argtypes = [HTS_Engine_ptr, c_int, c_double]
- libjt.HTS_Engine_set_msd_threshold(engine, 1, 0.5)
-
- libjt.HTS_Engine_set_gv_weight.argtypes = [HTS_Engine_ptr, c_int, c_double]
- libjt.HTS_Engine_set_gv_weight(engine, 0, 1.0)
- libjt.HTS_Engine_set_gv_weight(engine, 1, 0.7)
- if use_lpf:
- libjt.HTS_Engine_set_gv_weight(engine, 2, 1.0)
-
- # for libjt_synthesis()
- libjt.mecab2njd.argtypes = [NJD_ptr, FEATURE_ptr_array_ptr, c_int]
- libjt.njd_set_pronunciation.argtypes = [NJD_ptr]
- libjt.njd_set_digit.argtypes = [NJD_ptr]
- libjt.njd_set_accent_phrase.argtypes = [NJD_ptr]
- libjt.njd_set_accent_type.argtypes = [NJD_ptr]
- libjt.njd_set_unvoiced_vowel.argtypes = [NJD_ptr]
- libjt.njd_set_long_vowel.argtypes = [NJD_ptr]
- libjt.njd2jpcommon.argtypes = [JPCommon_ptr, NJD_ptr]
- libjt.JPCommon_make_label.argtypes = [JPCommon_ptr]
- libjt.JPCommon_get_label_size.argtypes = [JPCommon_ptr]
- libjt.JPCommon_get_label_size.argtypes = [JPCommon_ptr]
- libjt.JPCommon_get_label_feature.argtypes = [JPCommon_ptr]
- libjt.JPCommon_get_label_feature.restype = c_char_p_p
- libjt.JPCommon_get_label_size.argtypes = [JPCommon_ptr]
- libjt.HTS_Engine_load_label_from_string_list.argtypes = [
- HTS_Engine_ptr, c_char_p_p, c_int]
-
- libjt.HTS_Engine_create_sstream.argtypes = [HTS_Engine_ptr]
- libjt.HTS_Engine_create_pstream.argtypes = [HTS_Engine_ptr]
- libjt.HTS_Engine_create_gstream.argtypes = [HTS_Engine_ptr]
- libjt.HTS_Engine_refresh.argtypes = [HTS_Engine_ptr]
- libjt.JPCommon_refresh.argtypes = [JPCommon_ptr]
- libjt.NJD_refresh.argtypes = [NJD_ptr]
- libjt.HTS_GStreamSet_get_total_nsample.argtypes = [HTS_GStreamSet_ptr]
- libjt.HTS_GStreamSet_get_speech.argtypes = [HTS_GStreamSet_ptr, c_int]
- libjt.NJD_print.argtypes = [NJD_ptr]
- libjt.JPCommon_print.argtypes = [JPCommon_ptr]
- libjt.JPCommonLabel_print.argtypes = [JPCommonLabel_ptr]
-
- libjt.jt_total_nsample.argtypes = [HTS_Engine_ptr]
- libjt.jt_speech_ptr.argtypes = [HTS_Engine_ptr]
- libjt.jt_speech_ptr.restype = c_short_p
- libjt.jt_save_logs.argtypes = [c_char_p, HTS_Engine_ptr, NJD_ptr]
- libjt.jt_save_riff.argtypes = [c_char_p, HTS_Engine_ptr]
- libjt.jt_speech_normalize.argtypes = [HTS_Engine_ptr, c_short, c_int]
- libjt.jt_trim_silence.argtypes = [HTS_Engine_ptr, c_short, c_short]
- libjt.jt_trim_silence.restype = c_int
-
- libjt.NJD_clear.argtypes = [NJD_ptr]
- libjt.JPCommon_clear.argtypes = [JPCommon_ptr]
- libjt.HTS_Engine_clear.argtypes = [HTS_Engine_ptr]
-
- libjt.HTS_Engine_set_lf0_offset_amp.argtypes = [HTS_Engine_ptr, c_double, c_double]
-
- # for libjt_jpcommon_make_label()
- libjt.JPCommonLabel_clear.argtypes = [JPCommonLabel_ptr]
- libjt.JPCommonLabel_initialize.argtypes = [JPCommonLabel_ptr]
- libjt.JPCommonNode_get_pron.restype = c_char_p
- libjt.JPCommonNode_get_pos.restype = c_char_p
- libjt.JPCommonNode_get_ctype.restype = c_char_p
- libjt.JPCommonNode_get_cform.restype = c_char_p
- libjt.JPCommonNode_get_acc.restype = c_int
- libjt.JPCommonNode_get_chain_flag.restype = c_int
- libjt.JPCommonLabel_push_word.argtype = [JPCommonLabel_ptr, c_char_p, c_char_p, c_char_p, c_char_p, c_int, c_int]
-
-def libjt_load(VOICE):
- global libjt, engine, use_lpf
- VOICE = VOICE.encode('mbcs')
- libjt.HTS_Engine_load_duration_from_fn.argtypes = [
- HTS_Engine_ptr, FILENAME_ptr_ptr, FILENAME_ptr_ptr, c_int]
-
- fn_ms_dur_buf = create_string_buffer(os.path.join(VOICE, "dur.pdf"))
- fn_ms_dur_buf_ptr = cast(byref(fn_ms_dur_buf), FILENAME_ptr)
- fn_ms_dur = cast(byref(fn_ms_dur_buf_ptr), FILENAME_ptr_ptr)
- fn_ts_dur_buf = create_string_buffer(os.path.join(VOICE, "tree-dur.inf"))
- fn_ts_dur_buf_ptr = cast(byref(fn_ts_dur_buf), FILENAME_ptr)
- fn_ts_dur = cast(byref(fn_ts_dur_buf_ptr), FILENAME_ptr_ptr)
- libjt.HTS_Engine_load_duration_from_fn(engine, fn_ms_dur, fn_ts_dur, 1)
-
- libjt.HTS_Engine_load_parameter_from_fn.argtypes = [
- HTS_Engine_ptr, FILENAME_ptr_ptr, FILENAME_ptr_ptr,
- FILENAME_ptr_x3_ptr, c_int, c_int, c_int, c_int]
-
- fn_ms_mcp_buf = create_string_buffer(os.path.join(VOICE, "mgc.pdf"))
- fn_ms_mcp_buf_ptr = cast(byref(fn_ms_mcp_buf), FILENAME_ptr)
- fn_ms_mcp = cast(byref(fn_ms_mcp_buf_ptr), FILENAME_ptr_ptr)
- fn_ts_mcp_buf = create_string_buffer(os.path.join(VOICE, "tree-mgc.inf"))
- fn_ts_mcp_buf_ptr = cast(byref(fn_ts_mcp_buf), FILENAME_ptr)
- fn_ts_mcp = cast(byref(fn_ts_mcp_buf_ptr), FILENAME_ptr_ptr)
- fn_ws_mcp_buf_1 = create_string_buffer(os.path.join(VOICE, "mgc.win1"))
- fn_ws_mcp_buf_2 = create_string_buffer(os.path.join(VOICE, "mgc.win2"))
- fn_ws_mcp_buf_3 = create_string_buffer(os.path.join(VOICE, "mgc.win3"))
- fn_ws_mcp_buf_ptr_x3 = FILENAME_ptr_x3(
- cast(byref(fn_ws_mcp_buf_1), FILENAME_ptr),
- cast(byref(fn_ws_mcp_buf_2), FILENAME_ptr),
- cast(byref(fn_ws_mcp_buf_3), FILENAME_ptr))
- fn_ws_mcp = cast(byref(fn_ws_mcp_buf_ptr_x3), FILENAME_ptr_x3_ptr)
- libjt.HTS_Engine_load_parameter_from_fn(
- engine, fn_ms_mcp, fn_ts_mcp, fn_ws_mcp,
- 0, 0, 3, 1)
-
- fn_ms_lf0_buf = create_string_buffer(os.path.join(VOICE, "lf0.pdf"))
- fn_ms_lf0_buf_ptr = cast(byref(fn_ms_lf0_buf), FILENAME_ptr)
- fn_ms_lf0 = cast(byref(fn_ms_lf0_buf_ptr), FILENAME_ptr_ptr)
- fn_ts_lf0_buf = create_string_buffer(os.path.join(VOICE, "tree-lf0.inf"))
- fn_ts_lf0_buf_ptr = cast(byref(fn_ts_lf0_buf), FILENAME_ptr)
- fn_ts_lf0 = cast(byref(fn_ts_lf0_buf_ptr), FILENAME_ptr_ptr)
- fn_ws_lf0_buf_1 = create_string_buffer(os.path.join(VOICE, "lf0.win1"))
- fn_ws_lf0_buf_2 = create_string_buffer(os.path.join(VOICE, "lf0.win2"))
- fn_ws_lf0_buf_3 = create_string_buffer(os.path.join(VOICE, "lf0.win3"))
- fn_ws_lf0_buf_ptr_x3 = FILENAME_ptr_x3(
- cast(byref(fn_ws_lf0_buf_1), FILENAME_ptr),
- cast(byref(fn_ws_lf0_buf_2), FILENAME_ptr),
- cast(byref(fn_ws_lf0_buf_3), FILENAME_ptr))
- fn_ws_lf0 = cast(byref(fn_ws_lf0_buf_ptr_x3), FILENAME_ptr_x3_ptr)
- libjt.HTS_Engine_load_parameter_from_fn(
- engine, fn_ms_lf0, fn_ts_lf0, fn_ws_lf0,
- 1, 1, 3, 1)
-
- if use_lpf:
- fn_ms_lpf_buf = create_string_buffer(os.path.join(VOICE, "lpf.pdf"))
- fn_ms_lpf_buf_ptr = cast(byref(fn_ms_lpf_buf), FILENAME_ptr)
- fn_ms_lpf = cast(byref(fn_ms_lpf_buf_ptr), FILENAME_ptr_ptr)
- fn_ts_lpf_buf = create_string_buffer(os.path.join(VOICE, "tree-lpf.inf"))
- fn_ts_lpf_buf_ptr = cast(byref(fn_ts_lpf_buf), FILENAME_ptr)
- fn_ts_lpf = cast(byref(fn_ts_lpf_buf_ptr), FILENAME_ptr_ptr)
- fn_ws_lpf_buf_1 = create_string_buffer(os.path.join(VOICE, "lpf.win1"))
- fn_ws_lpf_buf_ptr_x3 = FILENAME_ptr_x3(
- cast(byref(fn_ws_lpf_buf_1), FILENAME_ptr),
- cast(0, FILENAME_ptr),
- cast(0, FILENAME_ptr))
- fn_ws_lpf = cast(byref(fn_ws_lpf_buf_ptr_x3), FILENAME_ptr_x3_ptr)
- libjt.HTS_Engine_load_parameter_from_fn(engine, fn_ms_lpf, fn_ts_lpf, fn_ws_lpf, 2, 0, 1, 1)
-
- libjt.HTS_Engine_load_gv_from_fn.argtypes = [
- HTS_Engine_ptr, FILENAME_ptr_ptr, FILENAME_ptr_ptr,
- c_int, c_int]
-
- fn_ms_gvm_buf = create_string_buffer(os.path.join(VOICE, "gv-mgc.pdf"))
- fn_ms_gvm_buf_ptr = cast(byref(fn_ms_gvm_buf), FILENAME_ptr)
- fn_ms_gvm = cast(byref(fn_ms_gvm_buf_ptr), FILENAME_ptr_ptr)
- fn_ts_gvm_buf = create_string_buffer(os.path.join(VOICE, "tree-gv-mgc.inf"))
- fn_ts_gvm_buf_ptr = cast(byref(fn_ts_gvm_buf), FILENAME_ptr)
- fn_ts_gvm = cast(byref(fn_ts_gvm_buf_ptr), FILENAME_ptr_ptr)
- libjt.HTS_Engine_load_gv_from_fn(
- engine, fn_ms_gvm, fn_ts_gvm, 0, 1)
-
- fn_ms_gvl_buf = create_string_buffer(os.path.join(VOICE, "gv-lf0.pdf"))
- fn_ms_gvl_buf_ptr = cast(byref(fn_ms_gvl_buf), FILENAME_ptr)
- fn_ms_gvl = cast(byref(fn_ms_gvl_buf_ptr), FILENAME_ptr_ptr)
- fn_ts_gvl_buf = create_string_buffer(os.path.join(VOICE, "tree-gv-lf0.inf"))
- fn_ts_gvl_buf_ptr = cast(byref(fn_ts_gvl_buf), FILENAME_ptr)
- fn_ts_gvl = cast(byref(fn_ts_gvl_buf_ptr), FILENAME_ptr_ptr)
- libjt.HTS_Engine_load_gv_from_fn(
- engine, fn_ms_gvl, fn_ts_gvl, 1, 1)
-
- libjt.HTS_Engine_load_gv_switch_from_fn.argtypes = [
- HTS_Engine_ptr, FILENAME_ptr]
-
- fn_gv_switch_buf = create_string_buffer(os.path.join(VOICE, "gv-switch.inf"))
- fn_gv_switch = cast(byref(fn_gv_switch_buf), FILENAME_ptr)
- libjt.HTS_Engine_load_gv_switch_from_fn(
- engine, fn_gv_switch)
-
-def libjt_refresh():
- libjt.HTS_Engine_refresh(engine)
- libjt.JPCommon_refresh(jpcommon)
- libjt.NJD_refresh(njd)
-
-def libjt_clear():
- libjt.NJD_clear(njd)
- libjt.JPCommon_clear(jpcommon)
- libjt.HTS_Engine_clear(engine)
-
-#def libjt_jpcommon_make_label(jpcommon, logwrite_=None):
-# if jpcommon.label:
-# libjt.JPCommonLabel_clear(jpcommon.label)
-# else:
-# jpcommon.label = cast(mc_calloc(1, sizeof(JPCommonLabel)), JPCommonLabel_ptr)
-# libjt.JPCommonLabel_initialize(jpcommon.label)
-# node = jpcommon.head
-# while node:
-# label = jpcommon.label
-# pron = libjt.JPCommonNode_get_pron(node)
-# pos = libjt.JPCommonNode_get_pos(node)
-# ctype = libjt.JPCommonNode_get_ctype(node)
-# cform = libjt.JPCommonNode_get_cform(node)
-# acc = libjt.JPCommonNode_get_acc(node)
-# flag = libjt.JPCommonNode_get_chain_flag(node)
-# if logwrite_ : logwrite_('%s,%s,%d,%d' % (pron, pos, acc, flag))
-# libjt.JPCommonLabel_push_word(label, pron, pos, ctype, cform, acc, flag)
-# node = cast(node[0].next, JPCommonNode_ptr)
-# libjt.JPCommonLabel_make(jpcommon.label)
-
-def libjt_synthesis(feature, size, fperiod_=80, feed_func_=None, is_speaking_func_=None, thres_=32, thres2_=32, level_=32767, logwrite_=None, lf0_offset_=0.0, lf0_amp_=1.0):
- if feature is None or size is None: return None
- if logwrite_ : logwrite_('libjt_synthesis start.')
- try:
- libjt.HTS_Engine_set_lf0_offset_amp(engine, lf0_offset_, lf0_amp_)
- libjt.HTS_Engine_set_fperiod(engine, fperiod_) # 80(point=5ms) frame period
- libjt.mecab2njd(njd, feature, size)
- libjt.njd_set_pronunciation(njd)
- libjt.njd_set_digit(njd)
- libjt.njd_set_accent_phrase(njd)
- except WindowsError as e:
- if logwrite_ : logwrite_('libjt_synthesis error #1 ' + str(e))
- # exception: access violation reading 0x00000000
- # https://github.com/nishimotz/libopenjtalk/commit/10d3abda6835e0547846fb5e12a36c1425561aaa#diff-66
- try:
- libjt.njd_set_accent_type(njd)
- except WindowsError as e:
- if logwrite_ : logwrite_('libjt_synthesis njd_set_accent_type() error ' + str(e))
- try:
- libjt.njd_set_unvoiced_vowel(njd)
- libjt.njd_set_long_vowel(njd)
- libjt.njd2jpcommon(jpcommon, njd)
- libjt.JPCommon_make_label(jpcommon)
- except WindowsError as e:
- if logwrite_ : logwrite_('libjt_synthesis error #2 ' + str(e))
- if is_speaking_func_ and not is_speaking_func_() :
- libjt_refresh()
- return None
- try:
- s = libjt.JPCommon_get_label_size(jpcommon)
- except WindowsError as e:
- if logwrite_ : logwrite_('libjt_synthesis JPCommon_get_label_size() error ' + str(e))
- buf = None
- if s > 2:
- try:
- f = libjt.JPCommon_get_label_feature(jpcommon)
- libjt.HTS_Engine_load_label_from_string_list(engine, f, s)
- libjt.HTS_Engine_create_sstream(engine)
- libjt.HTS_Engine_create_pstream(engine)
- libjt.HTS_Engine_create_gstream(engine)
- except WindowsError as e:
- if logwrite_ : logwrite_('libjt_synthesis error #3 ' + str(e))
- if is_speaking_func_ and not is_speaking_func_() :
- libjt_refresh()
- return None
- try:
- total_nsample = libjt.jt_trim_silence(engine, thres_, thres2_)
- libjt.jt_speech_normalize(engine, level_, total_nsample)
- speech_ptr = libjt.jt_speech_ptr(engine)
- byte_count = total_nsample * sizeof(c_short)
- buf = string_at(speech_ptr, byte_count)
- if feed_func_: feed_func_(buf)
- #libjt.jt_save_logs("_logfile", engine, njd)
- except WindowsError as e:
- if logwrite_ : logwrite_('libjt_synthesis error #5 ' + str(e))
- if logwrite_ : logwrite_('libjt_synthesis done.')
- return buf
+++ /dev/null
-# _nvdajp_jtalk.py
-# -*- coding: utf-8 -*-
-#A part of NonVisual Desktop Access (NVDA)
-# speech engine nvdajp_jtalk
-# Copyright (C) 2010-2012 Takuya Nishimoto (nishimotz.com)
-# based on Open JTalk (bin/open_jtalk.c) http://github.com/nishimotz/libopenjtalk/
-
-from logHandler import log
-import time
-import Queue
-import os
-import codecs
-import re
-import string
-import ctypes
-import baseObject
-import copy
-import nvwave
-from .. import _espeak
-import _jtalk_core
-import _nvdajp_predic
-from _nvdajp_unicode import unicode_normalize
-import _bgthread
-import sys
-import time
-import watchdog
-import config
-
-jtalk_dir = unicode(os.path.dirname(__file__), 'mbcs')
-if hasattr(sys,'frozen'):
- d = os.path.join(os.getcwdu(), 'synthDrivers', 'jtalk')
- if os.path.isdir(d):
- jtalk_dir = d
-
-DEBUG = False
-
-RATE_BOOST_MULTIPLIER = 1.5
-
-# math.log(150) = 5.0, math.log(350) = 5.86
-_jtalk_voices = [
- {"id": "V1",
- "name": "m001",
- "lang":"ja",
- "samp_rate": 48000,
- "fperiod": 240,
- "alpha": 0.55,
- "lf0_base": 5.0,
- "pitch_bias": 0,
- "use_lpf": 1,
- "speaker_attenuation": 1.0,
- "dir": "m001",
- "espeak_variant": "max"},
- {"id": "V2",
- "name": "mei",
- "lang":"ja",
- "samp_rate": 48000,
- "fperiod": 240,
- "alpha": 0.55,
- "lf0_base": 5.86,
- "pitch_bias": -10,
- "use_lpf": 1,
- "speaker_attenuation": 0.5,
- "dir": "mei_normal",
- "espeak_variant": "f1"},
- {"id": "V3",
- "name": "lite",
- "lang":"ja",
- "samp_rate": 16000,
- "fperiod": 80,
- "alpha": 0.42,
- "lf0_base": 5.0,
- "pitch_bias": 0,
- "use_lpf": 0,
- "speaker_attenuation": 1.0,
- "dir": "voice",
- "espeak_variant": "max"},
-]
-default_jtalk_voice = _jtalk_voices[1] # V2
-voice_args = None
-
-class VoiceProperty(baseObject.AutoPropertyObject):
- def __init__(self):
- super(VoiceProperty,self).__init__()
-
-# if samp_rate==16000: normal speed = 80samples period
-fperiod = 240
-
-# gain control
-max_level = 32000
-thres_level = 128
-thres2_level = 128
-speaker_attenuation = 1.0
-
-logwrite = log.debug
-lastIndex = None
-currIndex = None
-lastIndex = None
-player = None
-currentEngine = 0 # 1:espeak 2:jtalk
-
-def isSpeaking():
- return _bgthread.isSpeaking
-
-def setSpeaking(b):
- _bgthread.isSpeaking = b
-
-def _jtalk_speak(msg, index=None, prop=None):
- global currIndex, buff
- global currentEngine
- global lastIndex
- if prop is None: return
- currIndex = index
- if prop.characterMode:
- fperiod_current = voice_args['fperiod']
- else:
- fperiod_current = fperiod
- msg = unicode_normalize(msg)
- msg = _nvdajp_predic.convert(msg)
- lw = None
- if DEBUG: lw = logwrite
- setSpeaking(True)
- currentEngine = 2
- if DEBUG: logwrite("p:%d i:%d msg:%s" % (prop.pitch, prop.inflection, msg))
- level = int(max_level * speaker_attenuation)
- la = 0.020 * prop.inflection # 50 = original range
- ls = 0.015 * (prop.pitch - 50.0 + voice_args['pitch_bias']) # 50 = no shift
- lo = ls + voice_args['lf0_base'] * (1 - la)
- if DEBUG: logwrite("lo:%f la:%f" % (lo, la))
- for t in string.split(msg):
- if DEBUG: logwrite("unicode (%s)" % t)
- s = _jtalk_core.Mecab_text2mecab(t)
- if DEBUG: logwrite("utf-8 (%s)" % s.decode('utf-8', 'ignore'))
- if not isSpeaking(): _jtalk_core.libjt_refresh(); return
- mf = _jtalk_core.MecabFeatures()
- _jtalk_core.Mecab_analysis(s, mf, logwrite_=logwrite)
- if DEBUG: _jtalk_core.Mecab_print(mf, logwrite)
- _jtalk_core.Mecab_correctFeatures(mf)
- if DEBUG: _jtalk_core.Mecab_print(mf, logwrite)
- ar = _jtalk_core.Mecab_splitFeatures(mf, CODE_='utf-8')
- for m in ar:
- if isSpeaking():
- _jtalk_core.Mecab_utf8_to_cp932(m)
- if DEBUG: _jtalk_core.Mecab_print(m, logwrite, CODE_='cp932')
- if DEBUG: logwrite("Mecab_analysis done")
- _jtalk_core.libjt_synthesis(
- m.feature,
- m.size,
- fperiod_ = fperiod_current,
- feed_func_ = player.feed, # player.feed() is called inside
- is_speaking_func_ = isSpeaking,
- thres_ = thres_level,
- thres2_ = thres2_level,
- level_ = level,
- logwrite_ = lw,
- lf0_offset_ = lo,
- lf0_amp_ = la)
- _jtalk_core.libjt_refresh()
- if DEBUG: logwrite("libjt_synthesis done")
- del m
- del mf
- player.sync()
- lastIndex = currIndex
- currIndex = None
- setSpeaking(False)
- currentEngine = 0
-
-espeakMark = 10000
-
-def _espeak_speak(msg, lang, index=None, prop=None):
- global currentEngine, lastIndex, espeakMark
- currentEngine = 1
- msg = unicode(msg)
- msg.translate({ord(u'\01'):None,ord(u'<'):u'<',ord(u'>'):u'>'})
- msg = u"<voice xml:lang=\"%s\">%s</voice>" % (lang, msg)
- msg += u"<mark name=\"%d\" />" % espeakMark
- _espeak.speak(msg)
- while currentEngine == 1 and _espeak.lastIndex != espeakMark:
- time.sleep(0.1)
- watchdog.alive()
- time.sleep(0.4)
- watchdog.alive()
- lastIndex = index
- currentEngine = 0
- espeakMark += 1
-
-# call from BgThread
-def _speak(arg):
- msg, lang, index, prop = arg
- if DEBUG: logwrite('[' + lang + ']' + msg)
- if DEBUG: logwrite("_speak(%s)" % msg)
- if lang == 'ja':
- _jtalk_speak(msg, index, prop)
- else:
- _espeak_speak(msg, lang, index, prop)
-
-def speak(msg, lang, index=None, voiceProperty_=None):
- msg = msg.strip()
- if len(msg) == 0: return
- if voiceProperty_ is None: return
- arg = [msg, lang, index, copy.deepcopy(voiceProperty_)]
- _bgthread.execWhenDone(_speak, arg, mustBeAsync=True)
-
-def stop():
- global currentEngine
- if currentEngine == 1:
- _espeak.stop()
- currentEngine = 0
- return
- # Kill all speech from now.
- # We still want parameter changes to occur, so requeue them.
- params = []
- stop_task_count = 0 # for log.info()
- try:
- while True:
- item = _bgthread.bgQueue.get_nowait() # [func, args, kwargs]
- if item[0] != _speak:
- params.append(item)
- else:
- stop_task_count = stop_task_count + 1
- _bgthread.bgQueue.task_done()
- except Queue.Empty:
- # Let the exception break us out of this loop, as queue.empty() is not reliable anyway.
- pass
- for item in params:
- _bgthread.bgQueue.put(item)
- setSpeaking(False)
- if DEBUG: logwrite("stop: %d task(s) stopping" % stop_task_count)
- player.stop()
- lastIndex = None
-
-def pause(switch):
- if currentEngine == 1:
- _espeak.pause(switch)
- elif currentEngine == 2:
- player.pause(switch)
-
-def initialize(voice = default_jtalk_voice):
- global player, voice_args
- global speaker_attenuation
- voice_args = voice
- speaker_attenuation = voice_args['speaker_attenuation']
- if not _espeak.espeakDLL:
- _espeak.initialize()
- log.debug("jtalk using eSpeak version %s" % _espeak.info())
- _espeak.setVoiceByLanguage("en")
- _espeak.setVoiceAndVariant(variant=voice["espeak_variant"])
- if not player:
- player = nvwave.WavePlayer(channels=1, samplesPerSec=voice_args['samp_rate'], bitsPerSample=16, outputDevice=config.conf["speech"]["outputDevice"])
- if not _bgthread.bgThread:
- _bgthread.initialize()
- if not _jtalk_core.mecab:
- _jtalk_core.Mecab_initialize(log.info)
- _nvdajp_predic.setup()
-
- jt_dll = os.path.join(jtalk_dir, 'libopenjtalk.dll')
- log.debug('jt_dll %s' % jt_dll)
- _jtalk_core.libjt_initialize(jt_dll, **voice_args)
- log.debug(_jtalk_core.libjt_version())
-
- voice_dir = os.path.join(jtalk_dir, voice_args['dir'])
- if os.path.isdir(voice_dir):
- _jtalk_core.libjt_load(voice_dir)
- log.info("loaded " + voice_args['dir'])
- else:
- log.error('%s is not voice directory.' % voice_dir)
-
-def terminate():
- global player
- stop()
- _bgthread.terminate()
- player.close()
- player = None
- _espeak.terminate()
-
-rate_percent = 50
-
-def get_rate(rateBoost):
- return rate_percent
-
-def set_rate(rate, rateBoost):
- global fperiod, rate_percent
- rate_percent = rate
- if voice_args['samp_rate'] == 16000:
- fperiod = int(80 - int(rate) / 2) # 80..30
- if voice_args['samp_rate'] == 48000:
- fperiod = int(240 - 1.5 * int(rate)) # 240..90
- if not rateBoost:
- fperiod = int(fperiod * RATE_BOOST_MULTIPLIER)
-
-def set_volume(vol):
- global max_level, thres_level, thres2_level
- max_level = int(326.67 * int(vol) + 100) # 100..32767
- thres_level = 128
- thres2_level = 128
-
+++ /dev/null
-# _nvdajp_predic.py
-# -*- coding: utf-8 -*-
-# A part of speech engine nvdajp_jtalk
-# Copyright (C) 2010-2011 Takuya Nishimoto (nishimotz.com)
-
-import re
-
-predic = None
-
-def setup():
- global predic
- if predic is None:
- predic = load()
-
-def convert(msg):
- for p in predic:
- try:
- msg = re.sub(p[0], p[1], msg)
- except:
- pass
- msg = msg.lower()
- return msg
-
-def load():
- return [
- [re.compile(u'^ー$'), u'チョーオン'],
- [re.compile(u'^ン$'), u'ウン'],
- [re.compile(u'\\sー$'), u' チョーオン'],
- [re.compile(u'\\sン$'), u' ウン'],
-
- ## 人々 昔々 家々 山々
- [re.compile(u'(.)々'), u'\\1\\1'],
-
- ## isolated hiragana HA (mecab replaces to WA)
- ## は
- [re.compile(u'^は$'), u'ハ'],
- [re.compile(u'\\sは$'), u' ハ'],
-
- ## 59 名
- [re.compile(u'(\\d) 名'), u'\\1名'],
- ## 4行 ヨンコー -> ヨンギョー
- [re.compile(u'(\\d)行'), u'\\1ギョー'],
- ## 2 分前更新
- [re.compile(u'(\\d)+ 分前更新'), u'\\1分マエコーシン'],
-
- ## 1MB 10MB 1.2MB 0.5MB 321.0MB 123.45MB 2.7GB
- ## 1 MB 10 MB 1.2 MB 0.5 MB 321.0 MB 123.45 MB 2.7 GB
- [re.compile(u'(\\d+)\\s*KB'), u'\\1キロバイト'],
- [re.compile(u'(\\d+)\\s*MB'), u'\\1メガバイト'],
- [re.compile(u'(\\d+)\\s*GB'), u'\\1ギガバイト'],
- [re.compile(u'(\\d+)\\s*MHz'), u'\\1メガヘルツ'],
- [re.compile(u'(\\d+)\\s*GHz'), u'\\1ギガヘルツ'],
-
- ## 2013 年 1 月 2 日
- [re.compile(u'(\\d+)\\s+年\\s+(\\d+)\\s+月\\s+(\\d+)\\s+日'), u'\\1年\\2月\\3日'],
-
- ### zenkaku symbols convert
- ## 2011.03.11
- ## 1,234円
- [re.compile(u'.'), u'.'],
- [re.compile(u','), u','],
-
- ## 1,234
- ## 1,234,567
- ## 1,234,567,890
- ## 1,23 = ichi comma niju san
- ## 1,0 = ichi comma zero
- [re.compile(u'(\\d)\\,(\\d{3})'), u'\\1\\2'],
- [re.compile(u'(\\d{2})\\,(\\d{3})'), u'\\1\\2'],
- [re.compile(u'(\\d{3})\\,(\\d{3})'), u'\\1\\2'],
- [re.compile(u'(\\d)\\,(\\d{1,2})'), u'\\1カンマ\\2'],
-
- [re.compile(u'(\\d{1,4})\\.(\\d{1,4})\\.(\\d{1,4})\\.(\\d{1,4})'), u'\\1テン\\2テン\\3テン\\4'],
- [re.compile(u'(\\d{1,4})\\.(\\d{1,4})\\.(\\d{1,4})'), u'\\1テン\\2テン\\3'],
-
- # do not replace '0' after '.' to phonetic symbols (prepare)
- [re.compile(u'\\.0'), u'.00'],
-
- [re.compile(u'\\b0(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)'), u' 00 0\\1 0\\2 0\\3 0\\4 0\\5 0\\6 0\\7 0\\8 0\\9 '],
- [re.compile(u'\\b0(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)'), u' 00 0\\1 0\\2 0\\3 0\\4 0\\5 0\\6 0\\7 0\\8 '],
- [re.compile(u'\\b0(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)'), u' 00 0\\1 0\\2 0\\3 0\\4 0\\5 0\\6 0\\7 '],
- [re.compile(u'\\b0(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)'), u' 00 0\\1 0\\2 0\\3 0\\4 0\\5 0\\6 '],
- [re.compile(u'\\b0(\\d)(\\d)(\\d)(\\d)(\\d)'), u' 00 0\\1 0\\2 0\\3 0\\4 0\\5 '],
- [re.compile(u'\\b0(\\d)(\\d)(\\d)(\\d)'), u' 00 0\\1 0\\2 0\\3 0\\4 '],
- [re.compile(u'\\b0(\\d)(\\d)(\\d)'), u' 00 0\\1 0\\2 0\\3 '],
- [re.compile(u'\\b0(\\d)(\\d)'), u' 00 0\\1 0\\2 '],
- [re.compile(u'\\b0(\\d)'), u' 00 0\\1 '],
-
- [re.compile(u' 00'), u'ゼロ'],
- [re.compile(u' 01'), u'イチ'],
- [re.compile(u' 02'), u'ニー'],
- [re.compile(u' 03'), u'サン'],
- [re.compile(u' 04'), u'ヨン'],
- [re.compile(u' 05'), u'ゴー'],
- [re.compile(u' 06'), u'ロク'],
- [re.compile(u' 07'), u'ナナ'],
- [re.compile(u' 08'), u'ハチ'],
- [re.compile(u' 09'), u'キュー'],
-
- # do not replace '0' after '.' to phonetic symbols (finalize)
- [re.compile(u'\\.00'), u'.0'],
- ]
-