1 /***************************************************************************
2 * Copyright (C) 2005 to 2013 by Jonathan Duddington *
3 * email: jonsd@users.sourceforge.net *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 3 of the License, or *
8 * (at your option) any later version. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, see: *
17 * <http://www.gnu.org/licenses/>. *
18 ***************************************************************************/
21 #define L(c1,c2) (c1<<8)+c2 // combine two characters into an integer for translator name
23 #define CTRL_EMBEDDED 0x01 // control character at the start of an embedded command
24 #define REPLACED_E 'E' // 'e' replaced by silent e
26 #define N_WORD_PHONEMES 200 // max phonemes in a word
27 #define N_WORD_BYTES 160 // max bytes for the UTF8 characters in a word
28 #define N_CLAUSE_WORDS 300 // max words in a clause
29 #define N_RULE_GROUP2 120 // max num of two-letter rule chains
30 #define N_HASH_DICT 1024
32 #define N_LETTER_GROUPS 95 // maximum is 127-32
35 /* dictionary flags, word 1 */
36 // bits 0-3 stressed syllable, bit 6=unstressed
37 #define FLAG_SKIPWORDS 0x80
38 #define FLAG_PREPAUSE 0x100
40 #define FLAG_STRESS_END 0x200 // full stress if at end of clause
41 #define FLAG_STRESS_END2 0x400 // full stress if at end of clause, or only followed by unstressed
42 #define FLAG_UNSTRESS_END 0x800 // reduce stress at end of clause
43 #define FLAG_SPELLWORD 0x1000 // re-translate the word as individual letters, separated by spaces
44 #define FLAG_ABBREV 0x2000 // spell as letters, even with a vowel, OR use specified pronunciation rather than split into letters
45 #define FLAG_DOUBLING 0x4000 // doubles the following consonant
47 #define BITNUM_FLAG_ALT 14 // bit number of FLAG_ALT_TRANS - 1
48 #define FLAG_ALT_TRANS 0x8000 // language specific
49 #define FLAG_ALT2_TRANS 0x10000 // language specific
50 #define FLAG_ALT3_TRANS 0x20000 // language specific
51 #define FLAG_ALT4_TRANS 0x40000 // language specific
52 #define FLAG_ALT5_TRANS 0x80000 // language specific
53 #define FLAG_ALT6_TRANS 0x100000 // language specific
55 #define FLAG_COMBINE 0x800000 // combine with the next word
56 #define FLAG_ALLOW_DOT 0x01000000 // ignore '.' after word (abbreviation)
57 #define FLAG_NEEDS_DOT 0x02000000 // only if the word is followed by a dot
58 #define FLAG_WAS_UNPRONOUNCABLE 0x04000000 // the unpronounceable routine was used
59 #define FLAG_MAX3 0x08000000 // limit to 3 repeats
60 #define FLAG_PAUSE1 0x10000000 // shorter prepause
61 #define FLAG_TEXTMODE 0x20000000 // word translates to replacement text, not phonemes
62 #define BITNUM_FLAG_TEXTMODE 29
64 #define FLAG_FOUND_ATTRIBUTES 0x40000000 // word was found in the dictionary list (has attributes)
65 #define FLAG_FOUND 0x80000000 // pronunciation was found in the dictionary list
67 // dictionary flags, word 2
68 #define FLAG_VERBF 0x1 /* verb follows */
69 #define FLAG_VERBSF 0x2 /* verb follows, may have -s suffix */
70 #define FLAG_NOUNF 0x4 /* noun follows */
71 #define FLAG_PASTF 0x8 /* past tense follows */
72 #define FLAG_VERB 0x10 /* pronunciation for verb */
73 #define FLAG_NOUN 0x20 /* pronunciation for noun */
74 #define FLAG_PAST 0x40 /* pronunciation for past tense */
75 #define FLAG_VERB_EXT 0x100 /* extend the 'verb follows' */
76 #define FLAG_CAPITAL 0x200 /* pronunciation if initial letter is upper case */
77 #define FLAG_ALLCAPS 0x400 // only if the word is all capitals
78 #define FLAG_ACCENT 0x800 // character name is base-character name + accent name
79 #define FLAG_HYPHENATED 0x1000 // multiple-words, but needs hyphen between parts 1 and 2
80 #define FLAG_SENTENCE 0x2000 // only if the clause is a sentence
81 #define FLAG_ONLY 0x4000
82 #define FLAG_ONLY_S 0x8000
83 #define FLAG_STEM 0x10000 // must have a suffix
84 #define FLAG_ATEND 0x20000 // use this pronunciation if at end of clause
85 #define FLAG_ATSTART 0x40000 // use this pronunciation if at start of clause
86 #define FLAG_NATIVE 0x80000 // not if we've switched translators
87 #define FLAG_LOOKUP_SYMBOL 0x40000000 // to indicate called from Lookup()
89 #define BITNUM_FLAG_ALLCAPS 0x2a
90 #define BITNUM_FLAG_HYPHENATED 0x2c
91 #define BITNUM_FLAG_ONLY 0x2e
92 #define BITNUM_FLAG_ONLY_S 0x2f
95 // wordflags, flags in source word
96 #define FLAG_ALL_UPPER 0x1 /* no lower case letters in the word */
97 #define FLAG_FIRST_UPPER 0x2 /* first letter is upper case */
98 #define FLAG_UPPERS 0x3 // FLAG_ALL_UPPER | FLAG_FIRST_UPPER
99 #define FLAG_HAS_PLURAL 0x4 /* upper-case word with s or 's lower-case ending */
100 #define FLAG_PHONEMES 0x8 /* word is phonemes */
101 #define FLAG_LAST_WORD 0x10 /* last word in clause */
102 #define FLAG_EMBEDDED 0x40 /* word is preceded by embedded commands */
103 #define FLAG_HYPHEN 0x80
104 #define FLAG_NOSPACE 0x100 // word is not seperated from previous word by a space
105 #define FLAG_FIRST_WORD 0x200 // first word in clause
106 #define FLAG_FOCUS 0x400 // the focus word of a clause
107 #define FLAG_EMPHASIZED 0x800
108 #define FLAG_EMPHASIZED2 0xc00 // FLAG_FOCUS | FLAG_EMPHASIZED
109 #define FLAG_DONT_SWITCH_TRANSLATOR 0x1000
110 #define FLAG_SUFFIX_REMOVED 0x2000
111 #define FLAG_HYPHEN_AFTER 0x4000
112 #define FLAG_ORDINAL 0x8000 // passed to TranslateNumber() to indicate an ordinal number
113 #define FLAG_HAS_DOT 0x10000 // dot after this word
114 #define FLAG_COMMA_AFTER 0x20000 // comma after this word
115 #define FLAG_MULTIPLE_SPACES 0x40000 // word is preceded by multiple spaces, newline, or tab
116 #define FLAG_INDIVIDUAL_DIGITS 0x80000 // speak number as individual digits
117 #define FLAG_DELETE_WORD 0x100000 // don't speak this word, it has been spoken as part of the previous word
118 #define FLAG_CHAR_REPLACED 0x200000 // characters have been replaced by .replace in the *_rules
119 #define FLAG_TRANSLATOR2 0x400000 // retranslating using a different language
121 #define FLAG_SUFFIX_VOWEL 0x08000000 // remember an initial vowel from the suffix
122 #define FLAG_NO_TRACE 0x10000000 // passed to TranslateRules() to suppress dictionary lookup printout
123 #define FLAG_NO_PREFIX 0x20000000
124 #define FLAG_UNPRON_TEST 0x80000000 // do unpronounability test on the beginning of the word
127 // prefix/suffix flags (bits 8 to 14, bits 16 to 22) don't use 0x8000, 0x800000
128 #define SUFX_E 0x0100 // e may have been added
129 #define SUFX_I 0x0200 // y may have been changed to i
130 #define SUFX_P 0x0400 // prefix
131 #define SUFX_V 0x0800 // suffix means use the verb form pronunciation
132 #define SUFX_D 0x1000 // previous letter may have been doubled
133 #define SUFX_F 0x2000 // verb follows
134 #define SUFX_Q 0x4000 // don't retranslate
135 #define SUFX_T 0x10000 // don't affect the stress position in the stem
136 #define SUFX_B 0x20000 // break, this character breaks the word into stem and suffix (used with SUFX_P)
137 #define SUFX_A 0x40000 // remember that the suffix starts with a vowel
138 #define SUFX_M 0x80000 // bit 19, allow multiple suffixes
140 #define SUFX_UNPRON 0x8000 // used to return $unpron flag from *_rules
143 #define FLAG_ALLOW_TEXTMODE 0x02 // allow dictionary to translate to text rather than phonemes
144 #define FLAG_SUFX 0x04
145 #define FLAG_SUFX_S 0x08
146 #define FLAG_SUFX_E_ADDED 0x10
149 // codes in dictionary rules
152 #define RULE_PHONEMES 3
153 #define RULE_PH_COMMON 4 // At start of rule. Its phoneme string is used by subsequent rules
154 #define RULE_CONDITION 5 // followed by condition number (byte)
155 #define RULE_GROUP_START 6
156 #define RULE_GROUP_END 7
157 #define RULE_PRE_ATSTART 8 // as RULE_PRE but also match with 'start of word'
158 #define RULE_LINENUM 9 // next 2 bytes give a line number, for debugging purposes
160 #define RULE_SPACE 32 // ascii space
161 #define RULE_SYLLABLE 21 // @
162 #define RULE_STRESSED 10 // &
163 #define RULE_DOUBLE 11 // %
164 #define RULE_INC_SCORE 12 // +
165 #define RULE_DEL_FWD 13 // #
166 #define RULE_ENDING 14 // S
167 #define RULE_DIGIT 15 // D digit
168 #define RULE_NONALPHA 16 // Z non-alpha
169 #define RULE_LETTERGP 17 // A B C H F G Y letter group number
170 #define RULE_LETTERGP2 18 // L + letter group number
171 #define RULE_CAPITAL 19 // ! word starts with a capital letter
172 #define RULE_REPLACEMENTS 20 // section for character replacements
173 #define RULE_SKIPCHARS 23 // J
174 #define RULE_NO_SUFFIX 24 // N
175 #define RULE_NOTVOWEL 25 // K
176 #define RULE_IFVERB 26 // V
177 #define RULE_DOLLAR 28 // $ commands
178 #define RULE_NOVOWELS 29 // X no vowels up to word boundary
179 #define RULE_SPELLING 31 // W while spelling letter-by-letter
180 #define RULE_LAST_RULE 31
189 #define LETTERGP_VOWEL2 7
192 // Punctuation types returned by ReadClause()
193 // bits 0-7 pause x 10mS, bits 12-14 intonation type,
194 // bits12-14 intonation type
195 // bit 15- don't need space after the punctuation
196 // bit 19=sentence, bit 18=clause, bits 17=voice change
197 // bit 16 used to distinguish otherwise identical types
198 // bit 20= punctuation character can be inside a word (Armenian)
199 // bit 21= speak the name of the punctuation character
200 // bit 22= dot after the last word
201 #define CLAUSE_BIT_SENTENCE 0x80000
202 #define CLAUSE_BIT_CLAUSE 0x40000
203 #define CLAUSE_BIT_VOICE 0x20000
204 #define CLAUSE_BITS_INTONATION 0x7000
205 #define PUNCT_IN_WORD 0x100000
206 #define PUNCT_SAY_NAME 0x200000
207 #define CLAUSE_DOT 0x400000
209 #define CLAUSE_NONE ( 0 + 0x04000)
210 #define CLAUSE_PARAGRAPH (70 + 0x80000)
211 #define CLAUSE_EOF (40 + 0x90000)
212 #define CLAUSE_VOICE ( 0 + 0x24000)
213 #define CLAUSE_PERIOD (40 + 0x80000)
214 #define CLAUSE_COMMA (20 + 0x41000)
215 #define CLAUSE_SHORTCOMMA ( 4 + 0x41000)
216 #define CLAUSE_SHORTFALL ( 4 + 0x40000)
217 #define CLAUSE_QUESTION (40 + 0x82000)
218 #define CLAUSE_EXCLAMATION (45 + 0x83000)
219 #define CLAUSE_COLON (30 + 0x40000)
220 #define CLAUSE_SEMICOLON (30 + 0x41000)
222 #define SAYAS_CHARS 0x12
223 #define SAYAS_GLYPHS 0x13
224 #define SAYAS_SINGLE_CHARS 0x14
225 #define SAYAS_KEY 0x24
226 #define SAYAS_DIGITS 0x40 // + number of digits
227 #define SAYAS_DIGITS1 0xc1
229 #define CHAR_EMPHASIS 0x0530 // this is an unused character code
230 #define CHAR_COMMA_BREAK 0x0557 // unused character code
233 // [4] [match] [1 pre] [2 post] [3 phonemes] 0
234 // match 1 pre 2 post 0 - use common phoneme string
235 // match 1 pre 2 post 3 0 - empty phoneme string
237 typedef const char * constcharptr;
241 const char *phonemes;
247 // used to mark words with the source[] buffer
250 unsigned short start;
251 unsigned char pre_pause;
253 unsigned short sourceix;
254 unsigned char length;
260 int parameter[N_SPEECH_PARAM];
263 extern PARAM_STACK param_stack[];
264 extern const int param_defaults[N_SPEECH_PARAM];
270 unsigned short range_min, range_max;
275 extern ALPHABET alphabets[];
276 extern ALPHABET *current_alphabet;
278 #define AL_DONT_NAME 0x01 // don't speak the alphabet name
279 #define AL_NOT_LETTERS 0x02 // don't use the language for speaking letters
280 #define AL_WORDS 0x04 // use the language to speak words
281 #define AL_NOT_CODE 0x08 // don't speak the character code
282 #define AL_NO_SYMBOL 0x10 // don't repeat "symbol" or "character"
286 #define LOPT_DIERESES 1
287 // 1=remove [:] from unstressed syllables, 2= remove from unstressed or non-penultimate syllables
288 // bit 4=0, if stress < 4, bit 4=1, if not the highest stress in the word
289 #define LOPT_IT_LENGTHEN 2
292 #define LOPT_PREFIXES 3
294 // non-zero, change voiced/unoiced to match last consonant in a cluster
295 // bit 0=use regressive voicing
296 // bit 1=LANG=cz,bg don't propagate over [v]
297 // bit 2=don't propagate acress word boundaries
298 // bit 3=LANG=pl, propagate over liquids and nasals
299 // bit 4=LANG=cz,sk don't progagate to [v]
300 // bit 8=devoice word-final consonants
301 #define LOPT_REGRESSIVE_VOICING 4
303 // 0=default, 1=no check, other allow this character as an extra initial letter (default is 's')
304 #define LOPT_UNPRONOUNCABLE 5
306 // select length_mods tables, (length_mod_tab) + (length_mod_tab0 * 100)
307 #define LOPT_LENGTH_MODS 6
309 // increase this to prevent sonorants being shortened before shortened (eg. unstressed) vowels
310 #define LOPT_SONORANT_MIN 7
312 // bit 0: don't break vowels at word boundary
313 #define LOPT_WORD_MERGE 8
315 // max. amplitude for vowel at the end of a clause
316 #define LOPT_MAXAMP_EOC 9
318 // bit 0=reduce even if phonemes are specified in the **_list file
319 // bit 1=don't reduce the strongest vowel in a word which is marked 'unstressed'
320 #define LOPT_REDUCE 10
322 // LANG=cs,sk combine some prepositions with the following word, if the combination has N or fewer syllables
323 // bits 0-3 N syllables
324 // bit 4=only if the second word has $alt attribute
325 // bit 5=not if the second word is end-of-sentence
326 #define LOPT_COMBINE_WORDS 11
328 // change [t] when followed by unstressed vowel
329 #define LOPT_REDUCE_T 12
331 // 1 = allow capitals inside a word
332 // 2 = stressed syllable is indicated by capitals
333 #define LOPT_CAPS_IN_WORD 13
335 // bit 0=Italian "syntactic doubling" of consoants in the word after a word marked with $double attribute
336 // bit 1=also after a word which ends with a stressed vowel
337 #define LOPT_IT_DOUBLING 14
339 // Call ApplySpecialAttributes() if $alt or $alt2 is set for a word
340 // bit 1: stressed syllable: $alt change [e],[o] to [E],[O], $alt2 change [E],[O] to [e],[o]
343 // pause for bracket (default=4), pause when annoucing bracket names (default=2)
344 #define LOPT_BRACKET_PAUSE 16
346 // bit 1, don't break clause before annoucning . ? !
347 #define LOPT_ANNOUNCE_PUNCT 17
349 // recognize long vowels (0 = don't recognize)
350 #define LOPT_LONG_VOWEL_THRESHOLD 18
352 // bit 0: Don't allow suffices if there is no previous syllable
353 #define LOPT_SUFFIX 19
355 // bit 0 Apostrophe at start of word is part of the word
356 // bit 1 Apostrophe at end of word is part of the word
357 #define LOPT_APOSTROPHE 20
361 #define STRESSPOSN_1L 0 // 1st syllable
362 #define STRESSPOSN_2L 1 // 2nd syllable
363 #define STRESSPOSN_2R 2 // penultimate
364 #define STRESSPOSN_1R 3 // final syllable
365 #define STRESSPOSN_3R 4 // antipenultimate
369 // bits0-2 separate words with (1=pause_vshort, 2=pause_short, 3=pause, 4=pause_long 5=[?] phonemme)
370 // bit 3=don't use linking phoneme
371 // bit4=longer pause before STOP, VSTOP,FRIC
372 // bit5=length of a final vowel doesn't depend on the next phoneme
375 int stress_rule; // 1=first syllable, 2=penultimate, 3=last
377 #define S_NO_DIM 0x02
378 #define S_FINAL_DIM 0x04
379 #define S_FINAL_DIM_ONLY 0x06
380 // bit1=don't set diminished stress,
381 // bit2=mark unstressed final syllables as diminished
383 // bit3=set consecutive unstressed syllables in unstressed words to diminished, but not in stressed words
385 #define S_FINAL_NO_2 0x10
386 // bit4=don't allow secondary stress on last syllable
388 #define S_NO_AUTO_2 0x20
389 // bit5-don't use automatic secondary stress
391 #define S_2_TO_HEAVY 0x40
392 // bit6=light syllable followed by heavy, move secondary stress to the heavy syllable. LANG=Finnish
394 #define S_FIRST_PRIMARY 0x80
395 // bit7=if more than one primary stress, make the subsequent primaries to secondary stress
397 #define S_FINAL_STRESS_C 0x100
398 // bit8=stress last syllable if it doesn't end in a vowel
400 #define S_FINAL_SPANISH 0x200
401 // bit9=stress last syllable if it doesn't end in vowel or "s" or "n" LANG=Spanish
403 #define S_2_SYL_2 0x1000
404 // bit12= In a 2-syllable word, if one has primary stress then give the other secondary stress
406 #define S_INITIAL_2 0x2000
407 // bit13= If there is only one syllable before the primary stress, give it a secondary stress
409 #define S_MID_DIM 0x10000
410 // bit 16= Set (not first or last) syllables to diminished stress
412 #define S_PRIORITY_STRESS 0x20000
413 // bit17= "priority" stress reduces other primary stress to "unstressed" not "secondary"
415 #define S_EO_CLAUSE1 0x40000
416 // bit18= don't lengthen short vowels more than long vowels at end-of-clause
418 #define S_FINAL_LONG 0x80000
419 // bit19=stress on final syllable if it has a long vowel, but previous syllable has a short vowel
422 #define S_HYPEN_UNSTRESS 0x100000
423 // bit20= hyphenated words, 2nd part is unstressed
425 #define S_NO_EOC_LENGTHEN 0x200000
426 // bit21= don't lengthen vowels at end-of-clause
428 // bit15= Give stress to the first unstressed syllable
432 int unstressed_wd1; // stress for $u word of 1 syllable
433 int unstressed_wd2; // stress for $u word of >1 syllable
436 unsigned char *length_mods;
437 unsigned char *length_mods0;
439 #define NUM_THOUS_SPACE 0x4
440 #define NUM_DECIMAL_COMMA 0x8
441 #define NUM_SWAP_TENS 0x10
442 #define NUM_AND_UNITS 0x20
443 #define NUM_HUNDRED_AND 0x40
444 #define NUM_SINGLE_AND 0x80
445 #define NUM_SINGLE_STRESS 0x100
446 #define NUM_SINGLE_VOWEL 0x200
447 #define NUM_OMIT_1_HUNDRED 0x400
448 #define NUM_1900 0x800
449 #define NUM_ALLOW_SPACE 0x1000
450 #define NUM_DFRACTION_1 0x2000
451 #define NUM_DFRACTION_2 0x4000
452 #define NUM_DFRACTION_3 0x6000
453 #define NUM_DFRACTION_4 0x8000
454 #define NUM_DFRACTION_5 0xa000
455 #define NUM_DFRACTION_6 0xc000
456 #define NUM_DFRACTION_7 0xe000 // lang=si, alternative form of number for decimal fraction digits (except the last)
457 #define NUM_ORDINAL_DOT 0x10000
458 #define NUM_NOPAUSE 0x20000
459 #define NUM_AND_HUNDRED 0x40000
460 #define NUM_THOUSAND_AND 0x80000
461 #define NUM_VIGESIMAL 0x100000
462 #define NUM_OMIT_1_THOUSAND 0x200000
463 #define NUM_ZERO_HUNDRED 0x400000
464 #define NUM_HUNDRED_AND_DIGIT 0x800000
465 #define NUM_ROMAN 0x1000000
466 #define NUM_ROMAN_CAPITALS 0x2000000
467 #define NUM_ROMAN_AFTER 0x4000000
468 #define NUM_ROMAN_ORDINAL 0x8000000
469 #define NUM_SINGLE_STRESS_L 0x10000000
471 // bits0-1=which numbers routine to use.
472 // bit2= thousands separator must be space
473 // bit3= , decimal separator, not .
474 // bit4=use three-and-twenty rather than twenty-three
475 // bit5='and' between tens and units
476 // bit6=add "and" after hundred or thousand
477 // bit7=don't have "and" both after hundreds and also between tens and units
478 // bit8=only one primary stress in tens+units
479 // bit9=only one vowel betwen tens and units
480 // bit10=omit "one" before "hundred"
481 // bit11=say 19** as nineteen hundred
482 // bit12=allow space as thousands separator (in addition to langopts.thousands_sep)
483 // bits13-15 post-decimal-digits 0=single digits, 1=(LANG=it) 2=(LANG=pl) 3=(LANG=ro)
485 // bit16= dot after number indicates ordinal
486 // bit17= don't add pause after a number
487 // bit18= 'and' before hundreds
488 // bit19= 'and' after thousands if there are no hundreds
489 // bit20= vigesimal number, if tens are not found
490 // bit21= omit "one" before "thousand"
491 // bit22= say "zero" before hundred
492 // bit23= add "and" after hundreds and thousands, only if there are digits and no tens
494 // bit24= recognize roman numbers
495 // bit25= Roman numbers only if upper case
496 // bit26= say "roman" after the number, not before
497 // bit27= Roman numbers are ordinal numbers
498 // bit28= only one primary stress in tens+units (on the tens)
501 #define NUM2_THOUSANDS_VAR1 0x40
502 #define NUM2_THOUSANDS_VAR2 0x80
503 #define NUM2_THOUSANDS_VAR3 0xc0
504 #define NUM2_THOUSANDS_VAR4 0x100
505 #define NUM2_THOUSANDS_VAR5 0x140
507 #define NUM2_ORDINAL_NO_AND 0x800
508 #define NUM2_MULTIPLE_ORDINAL 0x1000
509 #define NUM2_NO_TEEN_ORDINALS 0x2000
510 #define NUM2_MYRIADS 0x4000
511 #define NUM2_ENGLISH_NUMERALS 0x8000
512 #define NUM2_PERCENT_BEFORE 0x10000
513 // bits 1-4 use variant form of numbers before thousands,millions,etc.
514 // bits 6-8 use different forms of thousand, million, etc (M MA MB)
515 // bit9=(LANG=rw) say "thousand" and "million" before its number, not after
516 // bit11=(LANG=es,an) don't say 'and' between tens and units for ordinal numbers
517 // bit12=(LANG=el,es) use ordinal form of hundreds and tens as well as units
518 // bit13=(LANG=pt) don't use 11-19 numbers to make ordinals
519 // bit14=(LANG=ko) use myriads (groups of 4 digits) not thousands (groups of 3)
520 // bit15=(LANG=ne) speak (non-replaced) English numerals in English
521 // bit16=(LANG=si) say "%" before the number
524 #define BREAK_THOUSANDS 0x49249248
525 int break_numbers; // which digits to break the number into thousands, millions, etc (Hindi has 100,000 not 1,000,000)
530 int max_digits; // max number of digits which can be spoken as an integer number (rather than individual digits)
531 const char *ordinal_indicator; // UTF-8 string
533 // bit 0, accent name before the letter name, bit 1 "capital" after letter name
536 int tone_language; // 1=tone language
537 int intonation_group;
538 unsigned char tunes[6];
539 int long_stop; // extra mS pause for a lengthened stop
540 int phoneme_change; // TEST, change phonemes, after translation
541 char max_initial_consonants;
542 char spelling_stress; // 0=default, 1=stress first letter
544 char ideographs; // treat as separate words
545 char textmode; // the meaning of FLAG_TEXTMODE is reversed (to save data when *_list file is compiled)
546 char dotless_i; // uses letter U+0131
547 int testing; // testing options: bit 1= specify stressed syllable in the form: "outdoor/2"
548 int listx; // compile *_listx after *list
549 const unsigned int *replace_chars; // characters to be substitutes
550 char ascii_language[8]; // switch to this language for Latin characters
551 int our_alphabet; // offset for main alphabet (if not set in letter_bits_offset)
552 int alt_alphabet; // offset for another language to recognize
553 int alt_alphabet_lang; // language for the alt_alphabet
555 int lengthen_tonic; // lengthen the tonic syllable
556 int suffix_add_e; // replace a suffix (which has the SUFX_E flag) with this character
560 // a parameter of ChangePhonemes()
563 unsigned char stress; // stress level of this vowel
564 unsigned char stress_highest; // the highest stress level of a vowel in this word
565 unsigned char n_vowels; // number of vowels in the word
566 unsigned char vowel_this; // syllable number of this vowel (counting from 1)
567 unsigned char vowel_stressed; // syllable number of the highest stressed vowel
575 LANGUAGE_OPTIONS langopts;
579 const char *transpose_map;
580 char dictionary_name[40];
583 char phonemes_repeat[20];
584 int phonemes_repeat_count;
587 unsigned char stress_amps[8];
588 unsigned char stress_amps_r[8];
589 short stress_lengths[8];
590 int dict_condition; // conditional apply some pronunciation rules and dict.lookups
592 const unsigned short *charset_a0; // unicodes for characters 0xa0 to oxff
593 const wchar_t *char_plus_apostrophe; // single chars + apostrophe treated as words
594 const wchar_t *punct_within_word; // allow these punctuation characters within words
595 const unsigned short *chars_ignore;
597 // holds properties of characters: vowel, consonant, etc for pronunciation rules
598 unsigned char letter_bits[256];
599 int letter_bits_offset;
600 const wchar_t *letter_groups[8];
602 /* index1=option, index2 by 0=. 1=, 2=?, 3=! 4=none */
603 #define INTONATION_TYPES 8
604 #define PUNCT_INTONATIONS 6
605 unsigned char punct_to_tone[INTONATION_TYPES][PUNCT_INTONATIONS];
607 char *data_dictrules; // language_1 translation rules file
608 char *data_dictlist; // language_2 dictionary lookup file
609 char *dict_hashtab[N_HASH_DICT]; // hash table to index dictionary lookup file
610 char *letterGroups[N_LETTER_GROUPS];
612 // groups1 and groups2 are indexes into data_dictrules, set up by InitGroups()
613 // the two-letter rules for each letter must be consecutive in the language_rules source
615 char *groups1[256]; // translation rule lists, index by single letter
616 char *groups3[128]; // index by offset letter
617 char *groups2[N_RULE_GROUP2]; // translation rule lists, indexed by two-letter pairs
618 unsigned int groups2_name[N_RULE_GROUP2]; // the two letter pairs for groups2[]
619 int n_groups2; // number of groups2[] entries used
621 unsigned char groups2_count[256]; // number of 2 letter groups for this initial letter
622 unsigned char groups2_start[256]; // index into groups2
623 const short *frequent_pairs; // list of frequent pairs of letters, for use in compressed *_list
626 int expect_past; // expect past tense
629 int prev_last_stress;
632 int word_vowel_count; // number of vowels so far
633 int word_stressed_count; // number of vowels so far which could be stressed
635 int clause_upper_count; // number of upper case letters in the clause
636 int clause_lower_count; // number of lower case letters in the clause
638 int prepause_timeout;
639 int end_stressed_vowel; // word ends with stressed vowel
640 int prev_dict_flags[2]; // dictionary flags from previous word
641 int clause_terminator;
645 extern int option_tone2;
646 #define OPTION_EMPHASIZE_ALLCAPS 0x100
647 #define OPTION_EMPHASIZE_PENULTIMATE 0x200
648 extern int option_tone_flags;
649 extern int option_waveout;
650 extern int option_quiet;
651 extern int option_phonemes;
652 extern int option_mbrola_phonemes;
653 extern int option_phoneme_events;
654 extern int option_linelength; // treat lines shorter than this as end-of-clause
655 extern int option_multibyte;
656 extern int option_capitals;
657 extern int option_punctuation;
658 extern int option_endpause;
659 extern int option_ssml;
660 extern int option_phoneme_input; // allow [[phonemes]] in input text
661 extern int option_phoneme_variants;
662 extern int option_sayas;
663 extern int option_wordgap;
665 extern int count_characters;
666 extern int count_words;
667 extern int count_sentences;
668 extern int skip_characters;
669 extern int skip_words;
670 extern int skip_sentences;
671 extern int skipping_text;
672 extern int end_character_position;
673 extern int clause_start_char;
674 extern int clause_start_word;
675 extern char *namedata;
676 extern int pre_pause;
680 #define N_MARKER_LENGTH 50 // max.length of a mark name
681 extern char skip_marker[N_MARKER_LENGTH];
683 #define N_PUNCTLIST 60
684 extern wchar_t option_punctlist[N_PUNCTLIST]; // which punctuation characters to announce
685 extern unsigned char punctuation_to_tone[INTONATION_TYPES][PUNCT_INTONATIONS];
687 extern Translator *translator;
688 extern Translator *translator2;
689 extern const unsigned short *charsets[N_CHARSETS];
690 extern char dictionary_name[40];
691 extern char ctrl_embedded; // to allow an alternative CTRL for embedded commands
692 extern unsigned char *p_textinput;
693 extern wchar_t *p_wchar_input;
694 extern int dictionary_skipwords;
696 extern int (* uri_callback)(int, const char *, const char *);
697 extern int (* phoneme_callback)(const char *);
698 extern void SetLengthMods(Translator *tr, int value);
700 void LoadConfig(void);
701 int TransposeAlphabet(Translator *tr, char *text);
702 int utf8_in(int *c, const char *buf);
703 int utf8_in2(int *c, const char *buf, int backwards);
704 int utf8_out(unsigned int c, char *buf);
705 int utf8_nbytes(const char *buf);
706 int lookupwchar(const unsigned short *list,int c);
707 int lookupwchar2(const unsigned short *list,int c);
709 char *strchr_w(const char *s, int c);
710 int IsBracket(int c);
711 void InitNamedata(void);
712 void InitText(int flags);
713 void InitText2(void);
714 int IsDigit(unsigned int c);
715 int IsDigit09(unsigned int c);
716 int IsAlpha(unsigned int c);
717 int IsVowel(Translator *tr, int c);
718 int iswalpha2(int c);
719 int isspace2(unsigned int c);
720 int iswlower2(int c);
721 int iswupper2(int c);
722 int towlower2(unsigned int c);
723 int towupper2(unsigned int c);
724 void GetTranslatedPhonemeString(char *phon_out, int n_phon_out, int phoneme_mode);
725 const char *WordToString2(unsigned int word);
726 ALPHABET *AlphabetFromChar(int c);
727 ALPHABET *AlphabetFromName(const char *name);
729 Translator *SelectTranslator(const char *name);
730 int SetTranslator2(const char *name);
731 void DeleteTranslator(Translator *tr);
732 int Lookup(Translator *tr, const char *word, char *ph_out);
733 int LookupFlags(Translator *tr, const char *word);
735 int TranslateNumber(Translator *tr, char *word1, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control);
736 int TranslateRoman(Translator *tr, char *word, char *ph_out, WORD_TAB *wtab);
738 void ChangeWordStress(Translator *tr, char *word, int new_stress);
739 void SetSpellingStress(Translator *tr, char *phonemes, int control, int n_chars);
740 int TranslateLetter(Translator *tr, char *letter, char *phonemes, int control);
741 void LookupLetter(Translator *tr, unsigned int letter, int next_byte, char *ph_buf, int control);
742 void LookupAccentedLetter(Translator *tr, unsigned int letter, char *ph_buf);
744 int LoadDictionary(Translator *tr, const char *name, int no_error);
745 int LookupDictList(Translator *tr, char **wordptr, char *ph_out, unsigned int *flags, int end_flags, WORD_TAB *wtab);
747 void MakePhonemeList(Translator *tr, int post_pause, int new_sentence);
748 int ChangePhonemes_ru(Translator *tr, PHONEME_LIST2 *phlist, int n_ph, int index, PHONEME_TAB *ph, CHANGEPH *ch);
749 void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags);
750 void AppendPhonemes(Translator *tr, char *string, int size, const char *ph);
752 void CalcLengths(Translator *tr);
753 void CalcPitches(Translator *tr, int clause_tone);
755 int RemoveEnding(Translator *tr, char *word, int end_type, char *word_copy);
756 int Unpronouncable(Translator *tr, char *word, int posn);
757 void SetWordStress(Translator *tr, char *output, unsigned int *dictionary_flags, int tonic, int prev_stress);
758 int TranslateRules(Translator *tr, char *p, char *phonemes, int size, char *end_phonemes, int end_flags, unsigned int *dict_flags);
759 int TranslateWord(Translator *tr, char *word1, int next_pause, WORD_TAB *wtab, char *word_out);
760 void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *tone, char **voice_change);
761 int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix_top, int n_buf, int *tone_type, char *voice_change);
763 void SetVoiceStack(espeak_VOICE *v, const char *variant_name);
764 void InterpretPhoneme(Translator *tr, int control, PHONEME_LIST *plist, PHONEME_DATA *phdata, WORD_PH_DATA *worddata);
765 void InterpretPhoneme2(int phcode, PHONEME_DATA *phdata);
766 char *WritePhMnemonic(char *phon_out, PHONEME_TAB *ph, PHONEME_LIST *plist, int use_ipa, int *flags);
768 extern FILE *f_trans; // for logging
769 extern FILE *f_logespeak;
770 extern int logging_type; // from config file