3 /***************************************************************************
4 * Copyright (C) 2005 to 2012 by Jonathan Duddington *
5 * email: jonsd@users.sourceforge.net *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 3 of the License, or *
10 * (at your option) any later version. *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, see: *
19 * <http://www.gnu.org/licenses/>. *
20 ***************************************************************************/
23 /*************************************************************/
24 /* This is the header file for the library version of espeak */
26 /*************************************************************/
32 #define ESPEAK_API __declspec(dllexport)
37 #define ESPEAK_API_REVISION 7
40 Added parameter "options" to eSpeakInitialize()
43 Added espeakWORDGAP to espeak_PARAMETER
46 Added flags parameter to espeak_CompileDictionary()
49 Added espeakCHARS_16BIT
52 Added macros: espeakRATE_MINIMUM, espeakRATE_MAXIMUM, espeakRATE_NORMAL
54 Revision 7 24.Dec.2011
55 Changed espeak_EVENT structure to add id.string[] for phoneme mnemonics.
56 Added espeakINITIALIZE_PHONEME_IPA option for espeak_Initialize() to report phonemes as IPA names.
59 /********************/
61 /********************/
63 // values for 'value' in espeak_SetParameter(espeakRATE, value, 0), nominally in words-per-minute
64 #define espeakRATE_MINIMUM 80
65 #define espeakRATE_MAXIMUM 450
66 #define espeakRATE_NORMAL 175
70 espeakEVENT_LIST_TERMINATED = 0, // Retrieval mode: terminates the event list.
71 espeakEVENT_WORD = 1, // Start of word
72 espeakEVENT_SENTENCE = 2, // Start of sentence
73 espeakEVENT_MARK = 3, // Mark
74 espeakEVENT_PLAY = 4, // Audio element
75 espeakEVENT_END = 5, // End of sentence or clause
76 espeakEVENT_MSG_TERMINATED = 6, // End of message
77 espeakEVENT_PHONEME = 7, // Phoneme, if enabled in espeak_Initialize()
78 espeakEVENT_SAMPLERATE = 8 // internal use, set sample rate
84 espeak_EVENT_TYPE type;
85 unsigned int unique_identifier; // message identifier (or 0 for key or character)
86 int text_position; // the number of characters from the start of the text
87 int length; // word length, in characters (for espeakEVENT_WORD)
88 int audio_position; // the time in mS within the generated speech output data
89 int sample; // sample id (internal use)
90 void* user_data; // pointer supplied by the calling program
92 int number; // used for WORD and SENTENCE events.
93 const char *name; // used for MARK and PLAY events. UTF8 string
94 char string[8]; // used for phoneme names (UTF8). Terminated by a zero byte unless the name needs the full 8 bytes.
98 When a message is supplied to espeak_synth, the request is buffered and espeak_synth returns. When the message is really processed, the callback function will be repetedly called.
101 In RETRIEVAL mode, the callback function supplies to the calling program the audio data and an event list terminated by 0 (LIST_TERMINATED).
103 In PLAYBACK mode, the callback function is called as soon as an event happens.
105 For example suppose that the following message is supplied to espeak_Synth:
109 * Once processed in RETRIEVAL mode, it could lead to 3 calls of the callback function :
113 List of events: SENTENCE + WORD + LIST_TERMINATED
117 List of events: WORD + END + LIST_TERMINATED
121 List of events: MSG_TERMINATED + LIST_TERMINATED
124 * Once processed in PLAYBACK mode, it could lead to 5 calls of the callback function:
127 ** WORD (call when the sounds are actually played)
129 ** END (call when the end of sentence is actually played.)
133 The MSG_TERMINATED event is the last event. It can inform the calling program to clear the user data related to the message.
134 So if the synthesis must be stopped, the callback function is called for each pending message with the MSG_TERMINATED event.
136 A MARK event indicates a <mark> element in the text.
137 A PLAY event indicates an <audio> element in the text, for which the calling program should play the named sound file.
146 } espeak_POSITION_TYPE;
150 /* PLAYBACK mode: plays the audio data, supplies events to the calling program*/
151 AUDIO_OUTPUT_PLAYBACK,
153 /* RETRIEVAL mode: supplies audio data and events to the calling program */
154 AUDIO_OUTPUT_RETRIEVAL,
156 /* SYNCHRONOUS mode: as RETRIEVAL but doesn't return until synthesis is completed */
157 AUDIO_OUTPUT_SYNCHRONOUS,
159 /* Synchronous playback */
160 AUDIO_OUTPUT_SYNCH_PLAYBACK
162 } espeak_AUDIO_OUTPUT;
167 EE_INTERNAL_ERROR=-1,
172 #define espeakINITIALIZE_PHONEME_EVENTS 0x0001
173 #define espeakINITIALIZE_PHONEME_IPA 0x0002
174 #define espeakINITIALIZE_DONT_EXIT 0x8000
179 ESPEAK_API int espeak_Initialize(espeak_AUDIO_OUTPUT output, int buflength, const char *path, int options);
180 /* Must be called before any synthesis functions are called.
181 output: the audio data can either be played by eSpeak or passed back by the SynthCallback function.
183 buflength: The length in mS of sound buffers passed to the SynthCallback function.
185 path: The directory which contains the espeak-data directory, or NULL for the default location.
187 options: bit 0: 1=allow espeakEVENT_PHONEME events.
188 bit 1: 1= espeakEVENT_PHONEME events give IPA phoneme names, not eSpeak phoneme names
189 bit 15: 1=don't exit if espeak_data is not found (used for --help)
191 Returns: sample rate in Hz, or -1 (EE_INTERNAL_ERROR).
194 typedef int (t_espeak_callback)(short*, int, espeak_EVENT*);
199 ESPEAK_API void espeak_SetSynthCallback(t_espeak_callback* SynthCallback);
200 /* Must be called before any synthesis functions are called.
201 This specifies a function in the calling program which is called when a buffer of
202 speech sound data has been produced.
205 The callback function is of the form:
207 int SynthCallback(short *wav, int numsamples, espeak_EVENT *events);
209 wav: is the speech sound data which has been produced.
210 NULL indicates that the synthesis has been completed.
212 numsamples: is the number of entries in wav. This number may vary, may be less than
213 the value implied by the buflength parameter given in espeak_Initialize, and may
214 sometimes be zero (which does NOT indicate end of synthesis).
216 events: an array of espeak_EVENT items which indicate word and sentence events, and
217 also the occurance if <mark> and <audio> elements within the text. The list of
218 events is terminated by an event of type = 0.
221 Callback returns: 0=continue synthesis, 1=abort synthesis.
227 ESPEAK_API void espeak_SetUriCallback(int (*UriCallback)(int, const char*, const char*));
228 /* This function may be called before synthesis functions are used, in order to deal with
229 <audio> tags. It specifies a callback function which is called when an <audio> element is
230 encountered and allows the calling program to indicate whether the sound file which
231 is specified in the <audio> element is available and is to be played.
233 The callback function is of the form:
235 int UriCallback(int type, const char *uri, const char *base);
237 type: type of callback event. Currently only 1= <audio> element
239 uri: the "src" attribute from the <audio> element
241 base: the "xml:base" attribute (if any) from the <speak> element
243 Return: 1=don't play the sound, but speak the text alternative.
244 0=place a PLAY event in the event list at the point where the <audio> element
245 occurs. The calling program can then play the sound at that point.
249 /********************/
251 /********************/
254 #define espeakCHARS_AUTO 0
255 #define espeakCHARS_UTF8 1
256 #define espeakCHARS_8BIT 2
257 #define espeakCHARS_WCHAR 3
258 #define espeakCHARS_16BIT 4
260 #define espeakSSML 0x10
261 #define espeakPHONEMES 0x100
262 #define espeakENDPAUSE 0x1000
263 #define espeakKEEP_NAMEDATA 0x2000
268 ESPEAK_API espeak_ERROR espeak_Synth(const void *text,
270 unsigned int position,
271 espeak_POSITION_TYPE position_type,
272 unsigned int end_position,
274 unsigned int* unique_identifier,
276 /* Synthesize speech for the specified text. The speech sound data is passed to the calling
277 program in buffers by means of the callback function specified by espeak_SetSynthCallback(). The command is asynchronous: it is internally buffered and returns as soon as possible. If espeak_Initialize was previously called with AUDIO_OUTPUT_PLAYBACK as argument, the sound data are played by eSpeak.
279 text: The text to be spoken, terminated by a zero character. It may be either 8-bit characters,
280 wide characters (wchar_t), or UTF8 encoding. Which of these is determined by the "flags"
283 size: Equal to (or greatrer than) the size of the text data, in bytes. This is used in order
284 to allocate internal storage space for the text. This value is not used for
285 AUDIO_OUTPUT_SYNCHRONOUS mode.
287 position: The position in the text where speaking starts. Zero indicates speak from the
290 position_type: Determines whether "position" is a number of characters, words, or sentences.
293 end_position: If set, this gives a character position at which speaking will stop. A value
294 of zero indicates no end position.
296 flags: These may be OR'd together:
297 Type of character codes, one of:
298 espeakCHARS_UTF8 UTF8 encoding
299 espeakCHARS_8BIT The 8 bit ISO-8859 character set for the particular language.
300 espeakCHARS_AUTO 8 bit or UTF8 (this is the default)
301 espeakCHARS_WCHAR Wide characters (wchar_t)
303 espeakSSML Elements within < > are treated as SSML elements, or if not recognised are ignored.
305 espeakPHONEMES Text within [[ ]] is treated as phonemes codes (in espeak's Hirshenbaum encoding).
307 espeakENDPAUSE If set then a sentence pause is added at the end of the text. If not set then
308 this pause is suppressed.
310 unique_identifier: message identifier; helpful for identifying later
311 data supplied to the callback.
313 user_data: pointer which will be passed to the callback function.
315 Return: EE_OK: operation achieved
316 EE_BUFFER_FULL: the command can not be buffered;
317 you may try after a while to call the function again.
324 ESPEAK_API espeak_ERROR espeak_Synth_Mark(const void *text,
326 const char *index_mark,
327 unsigned int end_position,
329 unsigned int* unique_identifier,
331 /* Synthesize speech for the specified text. Similar to espeak_Synth() but the start position is
332 specified by the name of a <mark> element in the text.
334 index_mark: The "name" attribute of a <mark> element within the text which specified the
335 point at which synthesis starts. UTF8 string.
337 For the other parameters, see espeak_Synth()
339 Return: EE_OK: operation achieved
340 EE_BUFFER_FULL: the command can not be buffered;
341 you may try after a while to call the function again.
348 ESPEAK_API espeak_ERROR espeak_Key(const char *key_name);
349 /* Speak the name of a keyboard key.
350 If key_name is a single character, it speaks the name of the character.
351 Otherwise, it speaks key_name as a text string.
353 Return: EE_OK: operation achieved
354 EE_BUFFER_FULL: the command can not be buffered;
355 you may try after a while to call the function again.
362 ESPEAK_API espeak_ERROR espeak_Char(wchar_t character);
363 /* Speak the name of the given character
365 Return: EE_OK: operation achieved
366 EE_BUFFER_FULL: the command can not be buffered;
367 you may try after a while to call the function again.
374 /***********************/
375 /* Speech Parameters */
376 /***********************/
379 espeakSILENCE=0, /* internal use */
387 espeakOPTIONS=8, // reserved for misc. options. not yet used
392 espeakEMPHASIS, /* internal use */
393 espeakLINELENGTH, /* internal use */
394 espeakVOICETYPE, // internal, 1=mbrola
395 N_SPEECH_PARAM /* last enum */
407 ESPEAK_API espeak_ERROR espeak_SetParameter(espeak_PARAMETER parameter, int value, int relative);
408 /* Sets the value of the specified parameter.
409 relative=0 Sets the absolute value of the parameter.
410 relative=1 Sets a relative value of the parameter.
413 espeakRATE: speaking speed in word per minute. Values 80 to 450.
415 espeakVOLUME: volume in range 0-200 or more.
416 0=silence, 100=normal full volume, greater values may produce amplitude compression or distortion
418 espeakPITCH: base pitch, range 0-100. 50=normal
420 espeakRANGE: pitch range, range 0-100. 0-monotone, 50=normal
422 espeakPUNCTUATION: which punctuation characters to announce:
423 value in espeak_PUNCT_TYPE (none, all, some),
424 see espeak_GetParameter() to specify which characters are announced.
426 espeakCAPITALS: announce capital letters by:
430 3 or higher, by raising pitch. This values gives the amount in Hz by which the pitch
431 of a word raised to indicate it has a capital letter.
433 espeakWORDGAP: pause between words, units of 10mS (at the default speed)
435 Return: EE_OK: operation achieved
436 EE_BUFFER_FULL: the command can not be buffered;
437 you may try after a while to call the function again.
444 ESPEAK_API int espeak_GetParameter(espeak_PARAMETER parameter, int current);
445 /* current=0 Returns the default value of the specified parameter.
446 current=1 Returns the current value of the specified parameter, as set by SetParameter()
452 ESPEAK_API espeak_ERROR espeak_SetPunctuationList(const wchar_t *punctlist);
453 /* Specified a list of punctuation characters whose names are to be spoken when the
454 value of the Punctuation parameter is set to "some".
456 punctlist: A list of character codes, terminated by a zero character.
458 Return: EE_OK: operation achieved
459 EE_BUFFER_FULL: the command can not be buffered;
460 you may try after a while to call the function again.
467 ESPEAK_API void espeak_SetPhonemeTrace(int value, FILE *stream);
468 /* Controls the output of phoneme symbols for the text
469 value=0 No phoneme output (default)
470 value=1 Output the translated phoneme symbols for the text
471 value=2 as (1), but also output a trace of how the translation was done (matching rules and list entries)
472 value=3 as (1), but produces IPA rather than ascii phoneme names
474 stream output stream for the phoneme symbols (and trace). If stream=NULL then it uses stdout.
480 ESPEAK_API void espeak_CompileDictionary(const char *path, FILE *log, int flags);
481 /* Compile pronunciation dictionary for a language which corresponds to the currently
482 selected voice. The required voice should be selected before calling this function.
484 path: The directory which contains the language's '_rules' and '_list' files.
485 'path' should end with a path separator character ('/').
486 log: Stream for error reports and statistics information. If log=NULL then stderr will be used.
488 flags: Bit 0: include source line information for debug purposes (This is displayed with the
489 -X command line option).
491 /***********************/
492 /* Voice Selection */
493 /***********************/
498 const char *name; // a given name for this voice. UTF8 string.
499 const char *languages; // list of pairs of (byte) priority + (string) language (and dialect qualifier)
500 const char *identifier; // the filename for this voice within espeak-data/voices
501 unsigned char gender; // 0=none 1=male, 2=female,
502 unsigned char age; // 0=not specified, or age in years
503 unsigned char variant; // only used when passed as a parameter to espeak_SetVoiceByProperties
504 unsigned char xx1; // for internal use
505 int score; // for internal use
506 void *spare; // for internal use
509 /* Note: The espeak_VOICE structure is used for two purposes:
510 1. To return the details of the available voices.
511 2. As a parameter to espeak_SetVoiceByProperties() in order to specify selection criteria.
513 In (1), the "languages" field consists of a list of (UTF8) language names for which this voice
514 may be used, each language name in the list is terminated by a zero byte and is also preceded by
515 a single byte which gives a "priority" number. The list of languages is terminated by an
516 additional zero byte.
518 A language name consists of a language code, optionally followed by one or more qualifier (dialect)
519 names separated by hyphens (eg. "en-uk"). A voice might, for example, have languages "en-uk" and
520 "en". Even without "en" listed, voice would still be selected for the "en" language (because
521 "en-uk" is related) but at a lower priority.
523 The priority byte indicates how the voice is preferred for the language. A low number indicates a
524 more preferred voice, a higher number indicates a less preferred voice.
526 In (2), the "languages" field consists simply of a single (UTF8) language name, with no preceding
533 ESPEAK_API const espeak_VOICE **espeak_ListVoices(espeak_VOICE *voice_spec);
534 /* Reads the voice files from espeak-data/voices and creates an array of espeak_VOICE pointers.
535 The list is terminated by a NULL pointer
537 If voice_spec is NULL then all voices are listed.
538 If voice spec is given, then only the voices which are compatible with the voice_spec
539 are listed, and they are listed in preference order.
545 ESPEAK_API espeak_ERROR espeak_SetVoiceByName(const char *name);
546 /* Searches for a voice with a matching "name" field. Language is not considered.
547 "name" is a UTF8 string.
549 Return: EE_OK: operation achieved
550 EE_BUFFER_FULL: the command can not be buffered;
551 you may try after a while to call the function again.
558 ESPEAK_API espeak_ERROR espeak_SetVoiceByProperties(espeak_VOICE *voice_spec);
559 /* An espeak_VOICE structure is used to pass criteria to select a voice. Any of the following
562 name NULL, or a voice name
564 languages NULL, or a single language string (with optional dialect), eg. "en-uk", or "en"
566 gender 0=not specified, 1=male, 2=female
568 age 0=not specified, or an age in years
570 variant After a list of candidates is produced, scored and sorted, "variant" is used to index
571 that list and choose a voice.
572 variant=0 takes the top voice (i.e. best match). variant=1 takes the next voice, etc
578 ESPEAK_API espeak_VOICE *espeak_GetCurrentVoice(void);
579 /* Returns the espeak_VOICE data for the currently selected voice.
580 This is not affected by temporary voice changes caused by SSML elements such as <voice> and <s>
586 ESPEAK_API espeak_ERROR espeak_Cancel(void);
587 /* Stop immediately synthesis and audio output of the current text. When this
588 function returns, the audio output is fully stopped and the synthesizer is ready to
589 synthesize a new message.
591 Return: EE_OK: operation achieved
599 ESPEAK_API int espeak_IsPlaying(void);
600 /* Returns 1 if audio is played, 0 otherwise.
606 ESPEAK_API espeak_ERROR espeak_Synchronize(void);
607 /* This function returns when all data have been spoken.
608 Return: EE_OK: operation achieved
615 ESPEAK_API espeak_ERROR espeak_Terminate(void);
616 /* last function to be called.
617 Return: EE_OK: operation achieved
625 ESPEAK_API const char *espeak_Info(const char **path_data);
626 /* Returns the version number string.
627 path_data returns the path to espeak_data