1 /***************************************************************************
2 * Copyright (C) 2005 to 2013 by Jonathan Duddington *
3 * email: jonsd@users.sourceforge.net *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 3 of the License, or *
8 * (at your option) any later version. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, see: *
17 * <http://www.gnu.org/licenses/>. *
18 ***************************************************************************/
30 #include "speak_lib.h"
33 #include "synthesize.h"
35 #include "translate.h"
49 #define M_CIRCUMFLEX 9
50 #define M_DIAERESIS 10
51 #define M_DOUBLE_ACUTE 11
52 #define M_DOT_ABOVE 12
61 #define M_RETROFLEX 20
65 #define M_MIDDLE_DOT M_DOT_ABOVE // duplicate of M_DOT_ABOVE
66 #define M_IMPLOSIVE M_HOOK
68 static int n_digit_lookup;
69 static char *digit_lookup;
70 static int speak_missing_thousands;
71 static int number_control;
79 // these are tokens to look up in the *_list file.
80 static ACCENTS accents_tab[] = {
82 {"_smc", 1}, // smallcap
83 {"_tur", 1}, // turned
84 {"_rev", 1}, // reversed
89 {"_hac", 0}, // caron/hacek
90 {"_ced", 0}, // cedilla
91 {"_cir", 0}, // circumflex
92 {"_dia", 0}, // diaeresis
93 {"_ac2", 0}, // double acute
96 {"_mcn", 0}, // macron
97 {"_ogo", 0}, // ogonek
99 {"_stk", 0}, // stroke
100 {"_tld", 0}, // tilde
103 {"_rfx", 0}, // retroflex
109 #define LETTER(ch,mod1,mod2) (ch-59)+(mod1 << 6)+(mod2 << 11)
110 #define LIGATURE(ch1,ch2,mod1) (ch1-59)+((ch2-59) << 6)+(mod1 << 12)+0x8000
113 #define L_ALPHA 60 // U+3B1
114 #define L_SCHWA 61 // U+259
115 #define L_OPEN_E 62 // U+25B
116 #define L_GAMMA 63 // U+3B3
117 #define L_IOTA 64 // U+3B9
118 #define L_OE 65 // U+153
119 #define L_OMEGA 66 // U+3C9
121 #define L_PHI 67 // U+3C6
122 #define L_ESH 68 // U+283
123 #define L_UPSILON 69 // U+3C5
124 #define L_EZH 70 // U+292
125 #define L_GLOTTAL 71 // U+294
126 #define L_RTAP 72 // U+27E
129 static const short non_ascii_tab[] = {
130 0, 0x3b1, 0x259, 0x25b, 0x3b3, 0x3b9, 0x153, 0x3c9,
131 0x3c6, 0x283, 0x3c5, 0x292, 0x294, 0x27e
135 // characters U+00e0 to U+017f
136 static const unsigned short letter_accents_0e0[] = {
137 LETTER('a',M_GRAVE,0), // U+00e0
138 LETTER('a',M_ACUTE,0),
139 LETTER('a',M_CIRCUMFLEX,0),
140 LETTER('a',M_TILDE,0),
141 LETTER('a',M_DIAERESIS,0),
142 LETTER('a',M_RING,0),
144 LETTER('c',M_CEDILLA,0),
145 LETTER('e',M_GRAVE,0),
146 LETTER('e',M_ACUTE,0),
147 LETTER('e',M_CIRCUMFLEX,0),
148 LETTER('e',M_DIAERESIS,0),
149 LETTER('i',M_GRAVE,0),
150 LETTER('i',M_ACUTE,0),
151 LETTER('i',M_CIRCUMFLEX,0),
152 LETTER('i',M_DIAERESIS,0),
153 LETTER('d',M_NAME,0), // eth // U+00f0
154 LETTER('n',M_TILDE,0),
155 LETTER('o',M_GRAVE,0),
156 LETTER('o',M_ACUTE,0),
157 LETTER('o',M_CIRCUMFLEX,0),
158 LETTER('o',M_TILDE,0),
159 LETTER('o',M_DIAERESIS,0),
161 LETTER('o',M_STROKE,0),
162 LETTER('u',M_GRAVE,0),
163 LETTER('u',M_ACUTE,0),
164 LETTER('u',M_CIRCUMFLEX,0),
165 LETTER('u',M_DIAERESIS,0),
166 LETTER('y',M_ACUTE,0),
167 LETTER('t',M_NAME,0), // thorn
168 LETTER('y',M_DIAERESIS,0),
170 LETTER('a',M_MACRON,0),
172 LETTER('a',M_BREVE,0),
174 LETTER('a',M_OGONEK,0),
176 LETTER('c',M_ACUTE,0),
178 LETTER('c',M_CIRCUMFLEX,0),
180 LETTER('c',M_DOT_ABOVE,0),
182 LETTER('c',M_CARON,0),
184 LETTER('d',M_CARON,0),
186 LETTER('d',M_STROKE,0),
188 LETTER('e',M_MACRON,0),
190 LETTER('e',M_BREVE,0),
192 LETTER('e',M_DOT_ABOVE,0),
194 LETTER('e',M_OGONEK,0),
196 LETTER('e',M_CARON,0),
198 LETTER('g',M_CIRCUMFLEX,0),
200 LETTER('g',M_BREVE,0),
202 LETTER('g',M_DOT_ABOVE,0),
204 LETTER('g',M_CEDILLA,0),
206 LETTER('h',M_CIRCUMFLEX,0),
208 LETTER('h',M_STROKE,0),
210 LETTER('i',M_TILDE,0),
212 LETTER('i',M_MACRON,0),
214 LETTER('i',M_BREVE,0),
216 LETTER('i',M_OGONEK,0),
218 LETTER('i',M_NAME,0), // dotless i
222 LETTER('j',M_CIRCUMFLEX,0),
224 LETTER('k',M_CEDILLA,0),
225 LETTER('k',M_NAME,0), // kra
227 LETTER('l',M_ACUTE,0),
229 LETTER('l',M_CEDILLA,0),
231 LETTER('l',M_CARON,0),
233 LETTER('l',M_MIDDLE_DOT,0), // U+0140
235 LETTER('l',M_STROKE,0),
237 LETTER('n',M_ACUTE,0),
239 LETTER('n',M_CEDILLA,0),
241 LETTER('n',M_CARON,0),
242 LETTER('n',M_NAME,0), // apostrophe n
244 LETTER('n',M_NAME,0), // eng
246 LETTER('o',M_MACRON,0),
248 LETTER('o',M_BREVE,0),
250 LETTER('o',M_DOUBLE_ACUTE,0),
254 LETTER('r',M_ACUTE,0),
256 LETTER('r',M_CEDILLA,0),
258 LETTER('r',M_CARON,0),
260 LETTER('s',M_ACUTE,0),
262 LETTER('s',M_CIRCUMFLEX,0),
264 LETTER('s',M_CEDILLA,0),
266 LETTER('s',M_CARON,0),
268 LETTER('t',M_CEDILLA,0),
270 LETTER('t',M_CARON,0),
272 LETTER('t',M_STROKE,0),
274 LETTER('u',M_TILDE,0),
276 LETTER('u',M_MACRON,0),
278 LETTER('u',M_BREVE,0),
280 LETTER('u',M_RING,0),
282 LETTER('u',M_DOUBLE_ACUTE,0),
284 LETTER('u',M_OGONEK,0),
286 LETTER('w',M_CIRCUMFLEX,0),
288 LETTER('y',M_CIRCUMFLEX,0),
289 CAPITAL, // Y-DIAERESIS
291 LETTER('z',M_ACUTE,0),
293 LETTER('z',M_DOT_ABOVE,0),
295 LETTER('z',M_CARON,0),
296 LETTER('s',M_NAME,0), // long-s // U+17f
300 // characters U+0250 to U+029F
301 static const unsigned short letter_accents_250[] = {
302 LETTER('a',M_TURNED,0), // U+250
304 LETTER(L_ALPHA,M_TURNED,0),
305 LETTER('b',M_IMPLOSIVE,0),
307 LETTER('c',M_CURL,0),
308 LETTER('d',M_RETROFLEX,0),
309 LETTER('d',M_IMPLOSIVE,0),
310 LETTER('e',M_REVERSED,0), // U+258
312 LETTER(L_SCHWA,M_HOOK,0),
314 LETTER(L_OPEN_E,M_REVERSED,0),
315 LETTER(L_OPEN_E,M_HOOK,M_REVERSED),
316 0,//LETTER(L_OPEN_E,M_CLOSED,M_REVERSED),
318 LETTER('g',M_IMPLOSIVE,0), // U+260
320 LETTER('g',M_SMALLCAP,0),
323 LETTER('h',M_TURNED,0),
324 LETTER('h',M_HOOK,0),
325 0,//LETTER(L_HENG,M_HOOK,0),
326 LETTER('i',M_BAR,0), // U+268
328 LETTER('i',M_SMALLCAP,0),
329 LETTER('l',M_TILDE,0),
331 LETTER('l',M_RETROFLEX,0),
333 LETTER('m',M_TURNED,0),
334 0,//LETTER('m',M_TURNED,M_LEG), // U+270
335 LETTER('m',M_HOOK,0),
336 0,//LETTER('n',M_LEFTHOOK,0),
337 LETTER('n',M_RETROFLEX,0),
338 LETTER('n',M_SMALLCAP,0),
340 LIGATURE('o','e',M_SMALLCAP),
341 0,//LETTER(L_OMEGA,M_CLOSED,0),
342 LETTER(L_PHI,0,0), // U+278
343 LETTER('r',M_TURNED,0),
344 0,//LETTER('r',M_TURNED,M_LEG),
345 LETTER('r',M_RETROFLEX,M_TURNED),
346 0,//LETTER('r',M_LEG,0),
347 LETTER('r',M_RETROFLEX,0),
349 LETTER(L_RTAP,M_REVERSED,0),
350 LETTER('r',M_SMALLCAP,0), // U+280
351 LETTER('r',M_TURNED,M_SMALLCAP),
352 LETTER('s',M_RETROFLEX,0),
354 0,//LETTER('j',M_BAR,L_IMPLOSIVE),
355 LETTER(L_ESH,M_REVERSED,0),
356 LETTER(L_ESH,M_CURL,0),
357 LETTER('t',M_TURNED,0),
358 LETTER('t',M_RETROFLEX,0), // U+288
360 LETTER(L_UPSILON,0,0),
361 LETTER('v',M_HOOK,0),
362 LETTER('v',M_TURNED,0),
363 LETTER('w',M_TURNED,0),
364 LETTER('y',M_TURNED,0),
365 LETTER('y',M_SMALLCAP,0),
366 LETTER('z',M_RETROFLEX,0), // U+290
367 LETTER('z',M_CURL,0),
369 LETTER(L_EZH,M_CURL,0),
371 LETTER(L_GLOTTAL,M_REVERSED,0),
372 LETTER(L_GLOTTAL,M_TURNED,0),
373 0,//LETTER('c',M_LONG,0),
374 0, // bilabial click // U+298
375 LETTER('b',M_SMALLCAP,0),
376 0,//LETTER(L_OPEN_E,M_CLOSED,0),
377 LETTER('g',M_IMPLOSIVE,M_SMALLCAP),
378 LETTER('h',M_SMALLCAP,0),
379 LETTER('j',M_CURL,0),
380 LETTER('k',M_TURNED,0),
381 LETTER('l',M_SMALLCAP,0),
382 LETTER('q',M_HOOK,0), // U+2a0
383 LETTER(L_GLOTTAL,M_STROKE,0),
384 LETTER(L_GLOTTAL,M_STROKE,M_REVERSED),
387 LIGATURE('d','z',M_CURL),
390 LIGATURE('t','s',M_CURL),
393 static int LookupLetter2(Translator *tr, unsigned int letter, char *ph_buf)
394 { //========================================================================
396 char single_letter[10];
398 single_letter[0] = 0;
399 single_letter[1] = '_';
400 len = utf8_out(letter, &single_letter[2]);
401 single_letter[len+2] = ' ';
402 single_letter[len+3] = 0;
404 if(Lookup(tr, &single_letter[1], ph_buf) == 0)
406 single_letter[1] = ' ';
407 if(Lookup(tr, &single_letter[2], ph_buf) == 0)
409 TranslateRules(tr, &single_letter[2], ph_buf, 20, NULL,0,NULL);
416 void LookupAccentedLetter(Translator *tr, unsigned int letter, char *ph_buf)
417 {//=========================================================================
418 // lookup the character in the accents table
431 if((letter >= 0xe0) && (letter < 0x17f))
433 accent_data = letter_accents_0e0[letter - 0xe0];
435 else if((letter >= 0x250) && (letter <= 0x2a8))
437 accent_data = letter_accents_250[letter - 0x250];
442 basic_letter = (accent_data & 0x3f) + 59;
443 if(basic_letter < 'a')
444 basic_letter = non_ascii_tab[basic_letter-59];
446 if(accent_data & 0x8000)
448 letter2 = (accent_data >> 6) & 0x3f;
450 accent2 = (accent_data >> 12) & 0x7;
454 accent1 = (accent_data >> 6) & 0x1f;
455 accent2 = (accent_data >> 11) & 0xf;
459 if(Lookup(tr, accents_tab[accent1].name, ph_accent1) != 0)
462 if(LookupLetter2(tr, basic_letter, ph_letter1) != 0)
466 if(Lookup(tr, accents_tab[accent2].name, ph_accent2) == 0)
471 if(accents_tab[accent2].flags & 1)
473 strcpy(ph_buf,ph_accent2);
474 ph_buf += strlen(ph_buf);
481 LookupLetter2(tr, letter2, ph_letter2);
482 sprintf(ph_buf,"%s%c%s%c%s%s",ph_accent1, phonPAUSE_VSHORT, ph_letter1, phonSTRESS_P, ph_letter2, ph_accent2);
487 strcpy(ph_buf, ph_letter1);
488 else if((tr->langopts.accents & 1) || (accents_tab[accent1].flags & 1))
489 sprintf(ph_buf,"%s%c%c%s", ph_accent1, phonPAUSE_VSHORT, phonSTRESS_P, ph_letter1);
491 sprintf(ph_buf,"%c%s%c%s%c", phonSTRESS_2, ph_letter1, phonPAUSE_VSHORT, ph_accent1, phonPAUSE_VSHORT);
496 } // end of LookupAccentedLetter
500 void LookupLetter(Translator *tr, unsigned int letter, int next_byte, char *ph_buf1, int control)
501 {//==============================================================================================
502 // control, bit 0: not the first letter of a word
505 static char single_letter[10] = {0,0};
506 unsigned int dict_flags[2];
510 len = utf8_out(letter,&single_letter[2]);
511 single_letter[len+2] = ' ';
515 // speaking normal text, not individual characters
516 if(Lookup(tr, &single_letter[2], ph_buf1) != 0)
519 single_letter[1] = '_';
520 if(Lookup(tr, &single_letter[1], ph_buf3) != 0)
521 return; // the character is specified as _* so ignore it when speaking normal text
523 // check whether this character is specified for English
524 if(tr->translator_name == L('e','n'))
525 return; // we are already using English
527 SetTranslator2("en");
528 if(Lookup(translator2, &single_letter[2], ph_buf3) != 0)
530 // yes, switch to English and re-translate the word
531 sprintf(ph_buf1,"%c",phonSWITCH);
533 SelectPhonemeTable(voice->phoneme_tab_ix); // revert to original phoneme table
537 if((letter <= 32) || iswspace(letter))
539 // lookup space as _&32 etc.
540 sprintf(&single_letter[1],"_#%d ",letter);
541 Lookup(tr, &single_letter[1], ph_buf1);
546 next_byte = RULE_SPELLING;
547 single_letter[3+len] = next_byte; // follow by space-space if the end of the word, or space-31
549 single_letter[1] = '_';
551 // if the $accent flag is set for this letter, use the accents table (below)
554 if(Lookup(tr, &single_letter[1], ph_buf3) == 0)
556 single_letter[1] = ' ';
557 if(Lookup(tr, &single_letter[2], ph_buf3) == 0)
559 TranslateRules(tr, &single_letter[2], ph_buf3, sizeof(ph_buf3), NULL,FLAG_NO_TRACE,NULL);
565 LookupAccentedLetter(tr, letter, ph_buf3);
568 strcpy(ph_buf1, ph_buf3);
569 if((ph_buf1[0] == 0) || (ph_buf1[0] == phonSWITCH))
576 SetWordStress(tr, ph_buf1, dict_flags, -1, control & 1);
578 } // end of LookupLetter
581 // unicode ranges for non-ascii digits 0-9
582 static const int number_ranges[] = {
583 0x660, 0x6f0, // arabic
584 0x966, 0x9e6, 0xa66, 0xae6, 0xb66, 0xbe6, 0xc66, 0xce6, 0xd66, // indic
585 0xe50, 0xed0, 0xf20, 0x1040, 0x1090,
586 0 }; // these must be in ascending order
589 int NonAsciiNumber(int letter)
590 {//============================
591 // Change non-ascii digit into ascii digit '0' to '9', (or -1 if not)
595 for(p=number_ranges; (base = *p) != 0; p++)
599 if(letter < (base+10))
600 return(letter-base+'0');
605 #define L_SUB 0x4000 // subscript
606 #define L_SUP 0x8000 // superscript
608 static const char *modifiers[] = {NULL, "_sub", "_sup", NULL};
610 // this list must be in ascending order
611 static unsigned short derived_letters[] = {
677 static const char *hex_letters[] = {"'e:j","b'i:","s'i:","d'i:","'i:","'ef"}; // names, using phonemes available to all languages
679 int TranslateLetter(Translator *tr, char *word, char *phonemes, int control)
680 {//=========================================================================
681 // get pronunciation for an isolated letter
682 // return number of bytes used by the letter
683 // control bit 0: a non-initial letter in a word
684 // bit 1: say 'capital'
685 // bit 2: say character code for unknown letters
694 const char *modifier;
701 int speak_letter_number;
705 char ph_alphabet[80];
707 static char pause_string[] = {phonPAUSE, 0};
712 phontab_1 = translator->phoneme_tab_ix;
714 n_bytes = utf8_in(&letter,word);
716 if((letter & 0xfff00) == 0x0e000)
718 letter &= 0xff; // uncode private usage area
723 // include CAPITAL information
724 if(iswupper2(letter))
726 Lookup(tr, "_cap", capital);
729 letter = towlower2(letter);
730 LookupLetter(tr, letter, word[n_bytes], ph_buf, control & 1);
734 // is this a subscript or superscript letter ?
735 for(ix=0; (c = derived_letters[ix]) != 0; ix+=2)
741 c = derived_letters[ix+1];
743 if((modifier = modifiers[c >> 14]) != NULL)
745 Lookup(tr, modifier, capital);
748 capital[2] = SetTranslator2("en"); // overwrites previous contents of translator2
749 Lookup(translator2, modifier, &capital[3]);
752 capital[0] = phonPAUSE;
753 capital[1] = phonSWITCH;
754 len = strlen(&capital[3]);
755 capital[len+3] = phonSWITCH;
756 capital[len+4] = phontab_1;
763 LookupLetter(tr, letter, word[n_bytes], ph_buf, control & 1);
766 if(ph_buf[0] == phonSWITCH)
768 strcpy(phonemes,ph_buf);
773 if((ph_buf[0] == 0) && ((number = NonAsciiNumber(letter)) > 0))
775 // convert a non-ascii number to 0-9
776 LookupLetter(tr, number, 0, ph_buf, control & 1);
781 if((alphabet = AlphabetFromChar(letter)) != NULL)
783 al_offset = alphabet->offset;
784 al_flags = alphabet->flags;
787 if(alphabet != current_alphabet)
789 // speak the name of the alphabet
790 current_alphabet = alphabet;
791 if((alphabet != NULL) && !(al_flags & AL_DONT_NAME) && (al_offset != translator->letter_bits_offset))
793 if((al_flags & AL_DONT_NAME) || (al_offset == translator->langopts.alt_alphabet) || (al_offset == translator->langopts.our_alphabet))
795 // don't say the alphabet name
800 if(Lookup(translator, alphabet->name, ph_alphabet) == 0) // the original language for the current voice
802 // Can't find the local name for this alphabet, use the English name
803 ph_alphabet[2] = SetTranslator2("en"); // overwrites previous contents of translator2
804 Lookup(translator2, alphabet->name, ph_buf2);
806 else if(translator != tr)
808 phontab_1 = tr->phoneme_tab_ix;
809 strcpy(ph_buf2, ph_alphabet);
810 ph_alphabet[2] = translator->phoneme_tab_ix;
815 // we used a different language for the alphabet name (now in ph_buf2)
816 ph_alphabet[0] = phonPAUSE;
817 ph_alphabet[1] = phonSWITCH;
818 strcpy(&ph_alphabet[3], ph_buf2);
819 len = strlen(ph_buf2) + 3;
820 ph_alphabet[len] = phonSWITCH;
821 ph_alphabet[len+1] = phontab_1;
822 ph_alphabet[len+2] = 0;
829 // caution: SetWordStress() etc don't expect phonSWITCH + phoneme table number
833 if((al_offset != 0) && (al_offset == translator->langopts.alt_alphabet))
834 language = translator->langopts.alt_alphabet_lang;
836 if((alphabet != NULL) && (alphabet->language != 0) && !(al_flags & AL_NOT_LETTERS))
837 language = alphabet->language;
839 language = L('e','n');
841 if((language != tr->translator_name) || (language == L('k','o')))
847 // speak in the language for this alphabet (or English)
848 ph_buf[2] = SetTranslator2(WordToString2(language));
850 if(((code = letter - 0xac00) >= 0) && (letter <= 0xd7af))
852 // Special case for Korean letters.
853 // break a syllable hangul into 2 or 3 individual jamo
857 if((initial = (code/28)/21) != 11)
859 p3 += utf8_out(initial + 0x1100, p3);
861 utf8_out(((code/28) % 21) + 0x1161, p3); // medial
862 utf8_out((code % 28) + 0x11a7, &p3[3]); // final
866 TranslateRules(translator2, &hangul_buf[1], &ph_buf[3], sizeof(ph_buf)-3, NULL, 0, NULL);
867 SetWordStress(translator2, &ph_buf[3], NULL, -1, 0);
871 LookupLetter(translator2, letter, word[n_bytes], &ph_buf[3], control & 1);
874 if(ph_buf[3] == phonSWITCH)
876 // another level of language change
877 ph_buf[2] = SetTranslator2(&ph_buf[4]);
878 LookupLetter(translator2, letter, word[n_bytes], &ph_buf[3], control & 1);
881 SelectPhonemeTable(voice->phoneme_tab_ix); // revert to original phoneme table
885 ph_buf[0] = phonPAUSE;
886 ph_buf[1] = phonSWITCH;
887 len = strlen(&ph_buf[3]) + 3;
888 ph_buf[len] = phonSWITCH; // switch back
889 ph_buf[len+1] = tr->phoneme_tab_ix;
897 // character name not found
901 speak_letter_number = 1;
902 if(!(al_flags & AL_NO_SYMBOL))
904 if(iswalpha2(letter))
905 Lookup(translator, "_?A", ph_buf);
907 if((ph_buf[0]==0) && !iswspace(letter))
908 Lookup(translator, "_??", ph_buf);
912 EncodePhonemes("l'et@", ph_buf, NULL);
916 if(!(control & 4) && (al_flags & AL_NOT_CODE))
918 // don't speak the character code number, unless we want full details of this character
919 speak_letter_number = 0;
922 // if((ph_alphabet[0] != 0) && speak_letter_number)
923 // ph_buf[0] = 0; // don't speak "letter" if we speak alphabet name
925 if(speak_letter_number)
927 if(al_offset == 0x2800)
929 // braille dots symbol, list the numbered dots
931 for(ix=0; ix<8; ix++)
933 if(letter & (1 << ix))
942 // speak the hexadecimal number of the character code
943 sprintf(hexbuf,"%x",letter);
947 for(p2 = hexbuf; *p2 != 0; p2++)
949 pbuf += strlen(pbuf);
950 *pbuf++ = phonPAUSE_VSHORT;
951 LookupLetter(translator, *p2, 0, pbuf, 1);
952 if(((pbuf[0] == 0) || (pbuf[0]==phonSWITCH)) && (*p2 >= 'a'))
954 // This language has no translation for 'a' to 'f', speak English names using base phonemes
955 EncodePhonemes(hex_letters[*p2 - 'a'], pbuf, NULL);
958 strcat(pbuf, pause_string);
963 len = strlen(phonemes);
965 if(tr->langopts.accents & 2) // 'capital' before or after the word ?
966 sprintf(ph_buf2,"%c%s%s%s",0xff,ph_alphabet,ph_buf,capital);
968 sprintf(ph_buf2,"%c%s%s%s",0xff,ph_alphabet,capital,ph_buf); // the 0xff marker will be removed or replaced in SetSpellingStress()
969 if((len + strlen(ph_buf2)) < N_WORD_PHONEMES)
971 strcpy(&phonemes[len],ph_buf2);
974 } // end of TranslateLetter
978 void SetSpellingStress(Translator *tr, char *phonemes, int control, int n_chars)
979 {//=============================================================================
980 // Individual letter names, reduce the stress of some.
986 unsigned char buf[N_WORD_PHONEMES];
988 for(ix=0; (c = phonemes[ix]) != 0; ix++)
990 if((c == phonSTRESS_P) && (prev != phonSWITCH))
1000 for(ix=0; (c = buf[ix]) != 0; ix++)
1002 if((c == phonSTRESS_P) && (n_chars > 1) && (prev != phonSWITCH))
1006 if(tr->langopts.spelling_stress == 1)
1008 // stress on initial letter when spelling
1014 if(count != n_stress)
1016 if(((count % 3) != 0) || (count == n_stress-1))
1017 c = phonSTRESS_3; // reduce to secondary stress
1023 if((control < 2) || (ix==0))
1024 continue; // don't insert pauses
1027 c = phonPAUSE; // pause after each character
1028 if(((count % 3) == 0) || (control > 2))
1029 c = phonPAUSE_NOLINK; // pause following a primary stress
1031 c = phonPAUSE_VSHORT;
1033 *phonemes++ = prev = c;
1036 *phonemes++ = phonPAUSE_NOLINK;
1038 } // end of SetSpellingStress
1044 static char ph_ordinal2[12];
1045 static char ph_ordinal2x[12];
1048 static int CheckDotOrdinal(Translator *tr, char *word, char *word_end, WORD_TAB *wtab, int roman)
1049 {//==============================================================================================
1055 if((tr->langopts.numbers & NUM_ORDINAL_DOT) && ((word_end[0] == '.') || (wtab[0].flags & FLAG_HAS_DOT)) && !(wtab[1].flags & FLAG_NOSPACE))
1057 if(roman || !(wtab[1].flags & FLAG_FIRST_UPPER))
1059 if(word_end[0] == '.')
1060 utf8_in(&c2, &word_end[2]);
1062 utf8_in(&c2, &word_end[0]);
1064 if((word_end[0] != 0) && (word_end[1] != 0) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || IsAlpha(c2)))
1066 // ordinal number is indicated by dot after the number
1067 // but not if the next word starts with an upper-case letter
1068 // (c2 == 0) is for cases such as, "2.,"
1070 if(word_end[0] == '.')
1073 if((roman==0) && (tr->translator_name == L('h','u')))
1075 // lang=hu don't treat dot as ordinal indicator if the next word is a month name ($alt). It may have a suffix.
1079 nextflags = TranslateWord(tr, &word_end[2], 0, NULL, NULL);
1082 if((tr->prev_dict_flags[0] & FLAG_ALT_TRANS) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || iswdigit(c2)))
1083 ordinal = 0; // TEST 09.02.10
1085 if(nextflags & FLAG_ALT_TRANS)
1088 if(nextflags & FLAG_ALT3_TRANS)
1091 ordinal = 0; // eg. december 2-5. között
1093 if(tr->prev_dict_flags[0] & (FLAG_ALT_TRANS | FLAG_ALT3_TRANS))
1101 } // end of CheckDotOrdinal
1104 static int hu_number_e(const char *word, int thousandplex, int value)
1105 {//==================================================================
1106 // lang-hu: variant form of numbers when followed by hyphen and a suffix starting with 'a' or 'e' (but not a, e, az, ez, azt, ezt, att. ett
1108 if((word[0] == 'a') || (word[0] == 'e'))
1110 if((word[1] == ' ') || (word[1] == 'z') || ((word[1] == 't') && (word[2] == 't')))
1113 if(((thousandplex==1) || ((value % 1000) == 0)) && (word[1] == 'l'))
1114 return(0); // 1000-el
1119 } // end of hu_numnber_e
1123 int TranslateRoman(Translator *tr, char *word, char *ph_out, WORD_TAB *wtab)
1124 {//=========================================================================
1135 int num_control = 0;
1136 unsigned int flags[2];
1138 char number_chars[N_WORD_BYTES];
1140 static const char *roman_numbers = "ixcmvld";
1141 static int roman_values[] = {1,10,100,1000,5,50,500};
1150 if(((tr->langopts.numbers & NUM_ROMAN_CAPITALS) && !(wtab[0].flags & FLAG_ALL_UPPER)) || IsDigit09(word[-2]))
1151 return(0); // not '2xx'
1154 while((c = *word++) != ' ')
1156 if((p2 = strchr(roman_numbers,c)) == NULL)
1159 value = roman_values[p2 - roman_numbers];
1169 if((prev > 1) && (prev != 10) && (prev != 100))
1174 if((prev != 0) && (prev < value))
1176 if(((acc % 10) != 0) || ((prev*10) < value))
1181 else if(value >= subtract)
1189 if(IsDigit09(word[0]))
1190 return(0); // eg. 'xx2'
1193 if(acc < tr->langopts.min_roman)
1196 if(acc > tr->langopts.max_roman)
1200 Lookup(tr, "_roman",ph_roman); // precede by "roman" if _rom is defined in *_list
1203 if((tr->langopts.numbers & NUM_ROMAN_AFTER) == 0)
1205 strcpy(ph_out,ph_roman);
1206 p = &ph_out[strlen(ph_roman)];
1209 sprintf(number_chars," %d ",acc);
1213 // dot has not been removed. This implies that there was no space after it
1217 if(CheckDotOrdinal(tr, word_start, word, wtab, 1))
1218 wtab[0].flags |= FLAG_ORDINAL;
1220 if(tr->langopts.numbers & NUM_ROMAN_ORDINAL)
1222 if(tr->translator_name == L('h','u'))
1224 if(!(wtab[0].flags & FLAG_ORDINAL))
1226 if((wtab[0].flags & FLAG_HYPHEN_AFTER) && hu_number_e(word, 0, acc))
1228 // should use the 'e' form of the number
1237 wtab[0].flags |= FLAG_ORDINAL;
1241 tr->prev_dict_flags[0] = 0;
1242 tr->prev_dict_flags[1] = 0;
1243 TranslateNumber(tr, &number_chars[2], p, flags, wtab, num_control);
1245 if(tr->langopts.numbers & NUM_ROMAN_AFTER)
1246 strcat(ph_out,ph_roman);
1249 } // end of TranslateRoman
1252 static const char *M_Variant(int value)
1253 {//====================================
1254 // returns M, or perhaps MA or MB for some cases
1258 if(((value % 100) > 10) && ((value % 100) < 20))
1261 switch((translator->langopts.numbers2 >> 6) & 0x7)
1263 case 1: // lang=ru use singular for xx1 except for x11
1264 if((teens == 0) && ((value % 10) == 1))
1268 case 2: // lang=cs,sk
1269 if((value >= 2) && (value <= 4))
1274 if((teens == 0) && (((value % 10) >= 2) && ((value % 10) <= 4)))
1279 if((teens == 1) || ((value % 10) == 0))
1281 if((value % 10) == 1)
1285 case 5: // lang=bs,hr,sr
1288 if((value % 10) == 1)
1290 if(((value % 10) >= 2) && ((value % 10) <= 4))
1299 static int LookupThousands(Translator *tr, int value, int thousandplex, int thousands_exact, char *ph_out)
1300 {//=======================================================================================================
1301 // thousands_exact: bit 0 no hundreds,tens,or units, bit 1 ordinal numberr
1306 char ph_thousands[40];
1311 // first look for a match with the exact value of thousands
1314 if(thousands_exact & 1)
1316 if(thousands_exact & 2)
1319 sprintf(string,"_%dM%do",value,thousandplex);
1320 found_value = Lookup(tr, string, ph_thousands);
1322 if(!found_value & (number_control & 1))
1324 // look for the 'e' variant
1325 sprintf(string,"_%dM%de",value,thousandplex);
1326 found_value = Lookup(tr, string, ph_thousands);
1330 // is there a different pronunciation if there are no hundreds,tens,or units ? (LANG=ta)
1331 sprintf(string,"_%dM%dx",value,thousandplex);
1332 found_value = Lookup(tr, string, ph_thousands);
1335 if(found_value == 0)
1337 sprintf(string,"_%dM%d",value,thousandplex);
1338 found_value = Lookup(tr, string, ph_thousands);
1342 if(found_value == 0)
1344 if((value % 100) >= 20)
1346 Lookup(tr, "_0of", ph_of);
1350 if(thousands_exact & 1)
1352 if(thousands_exact & 2)
1355 sprintf(string,"_%s%do",M_Variant(value), thousandplex);
1356 found = Lookup(tr, string, ph_thousands);
1358 if(!found && (number_control & 1))
1360 // look for the 'e' variant
1361 sprintf(string,"_%s%de",M_Variant(value), thousandplex);
1362 found = Lookup(tr, string, ph_thousands);
1366 // is there a different pronunciation if there are no hundreds,tens,or units ?
1367 sprintf(string,"_%s%dx",M_Variant(value), thousandplex);
1368 found = Lookup(tr, string, ph_thousands);
1373 sprintf(string,"_%s%d",M_Variant(value), thousandplex);
1375 if(Lookup(tr, string, ph_thousands) == 0)
1377 if(thousandplex > 3)
1379 sprintf(string,"_0M%d", thousandplex-1);
1380 if(Lookup(tr, string, ph_buf) == 0)
1382 // say "millions" if this name is not available and neither is the next lower
1383 Lookup(tr, "_0M2", ph_thousands);
1384 speak_missing_thousands = 3;
1387 if(ph_thousands[0] == 0)
1389 // repeat "thousand" if higher order names are not available
1390 sprintf(string,"_%dM1",value);
1391 if((found_value = Lookup(tr, string, ph_thousands)) == 0)
1392 Lookup(tr, "_0M1", ph_thousands);
1393 speak_missing_thousands = 2;
1398 sprintf(ph_out,"%s%s",ph_of,ph_thousands);
1400 if((value == 1) && (thousandplex == 1) && (tr->langopts.numbers & NUM_OMIT_1_THOUSAND))
1403 return(found_value);
1404 } // end f LookupThousands
1407 static int LookupNum2(Translator *tr, int value, const int control, char *ph_out)
1408 {//=============================================================================
1409 // Lookup a 2 digit number
1410 // control bit 0: ordinal number
1411 // control bit 1: final tens and units (not number of thousands) (use special form of '1', LANG=de "eins")
1412 // control bit 2: tens and units only, no higher digits
1413 // control bit 3: use feminine form of '2' (for thousands
1414 // control bit 4: speak zero tens
1415 // control bit 5: variant of ordinal number (lang=hu)
1416 // bit 8 followed by decimal fraction
1424 int found_ordinal = 0;
1427 char string[12]; // for looking up entries in *_list
1428 char ph_ordinal[20];
1447 is_ordinal = control & 1;
1449 if((control & 2) && (n_digit_lookup == 2))
1451 // pronunciation of the final 2 digits has already been found
1452 strcpy(ph_out, digit_lookup);
1456 if(digit_lookup[0] == 0)
1458 // is there a special pronunciation for this 2-digit number
1461 // is there a feminine form?
1462 sprintf(string,"_%df",value);
1463 found = Lookup(tr, string, ph_digits);
1467 strcpy(ph_ordinal, ph_ordinal2);
1471 sprintf(string,"_%d%cx",value,ord_type); // LANG=hu, special word for 1. 2. when there are no higher digits
1472 if((found = Lookup(tr, string, ph_digits)) != 0)
1474 if(ph_ordinal2x[0] != 0)
1475 strcpy(ph_ordinal, ph_ordinal2x); // alternate pronunciation (lang=an)
1480 sprintf(string,"_%d%c",value,ord_type);
1481 found = Lookup(tr, string, ph_digits);
1483 found_ordinal = found;
1490 // the final tens and units of a number
1491 if(number_control & 1)
1493 // look for 'e' variant
1494 sprintf(string,"_%de",value);
1495 found = Lookup(tr, string, ph_digits);
1500 // followed by hundreds or thousands etc
1501 sprintf(string,"_%da",value);
1502 found = Lookup(tr, string, ph_digits);
1507 if((is_ordinal) && (tr->langopts.numbers2 & NUM2_NO_TEEN_ORDINALS))
1509 // don't use numbers 10-99 to make ordinals, always use _1Xo etc (lang=pt)
1513 sprintf(string,"_%d",value);
1514 found = Lookup(tr, string, ph_digits);
1520 // no, speak as tens+units
1522 if((control & 0x10) && (value < 10))
1524 // speak leading zero
1525 Lookup(tr, "_0", ph_tens);
1538 sprintf(string,"_%dX%c", tens, ord_type);
1539 if(Lookup(tr, string, ph_tens) != 0)
1543 if((units != 0) && (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL))
1545 // Use the ordinal form of tens as well as units. Add the ordinal ending
1546 strcat(ph_tens, ph_ordinal2);
1550 if(found_ordinal == 0)
1552 sprintf(string,"_%dX", tens);
1553 Lookup(tr, string, ph_tens);
1556 if((ph_tens[0] == 0) && (tr->langopts.numbers & NUM_VIGESIMAL))
1558 // tens not found, (for example) 73 is 60+13
1559 units = (value % 20);
1560 sprintf(string,"_%dX", tens & 0xfe);
1561 Lookup(tr, string, ph_tens);
1569 if((control & 2) && (digit_lookup[0] != 0))
1571 // we have an entry for this digit (possibly together with the next word)
1572 strcpy(ph_digits, digit_lookup);
1580 // is there a variant form of this number?
1581 sprintf(string,"_%df",units);
1582 found = Lookup(tr, string, ph_digits);
1584 if((is_ordinal) && ((tr->langopts.numbers & NUM_SWAP_TENS) == 0))
1587 sprintf(string,"_%d%c",units,ord_type);
1588 if((found = Lookup(tr, string, ph_digits)) != 0)
1595 if((number_control & 1) && (control & 2))
1597 // look for 'e' variant
1598 sprintf(string,"_%de",units);
1599 found = Lookup(tr, string, ph_digits);
1601 else if(((control & 2) == 0) || ((tr->langopts.numbers & NUM_SWAP_TENS) != 0))
1603 // followed by hundreds or thousands (or tens)
1604 sprintf(string,"_%da",units);
1605 found = Lookup(tr, string, ph_digits);
1610 sprintf(string,"_%d",units);
1611 Lookup(tr, string, ph_digits);
1618 if((is_ordinal) && (found_ordinal == 0) && (ph_ordinal[0] == 0))
1620 if((value >= 20) && (((value % 10) == 0) || (tr->langopts.numbers & NUM_SWAP_TENS)))
1621 Lookup(tr, "_ord20", ph_ordinal);
1622 if(ph_ordinal[0] == 0)
1623 Lookup(tr, "_ord", ph_ordinal);
1626 if((tr->langopts.numbers & (NUM_SWAP_TENS | NUM_AND_UNITS)) && (ph_tens[0] != 0) && (ph_digits[0] != 0))
1628 Lookup(tr, "_0and", ph_and);
1630 if((is_ordinal) && (tr->langopts.numbers2 & NUM2_ORDINAL_NO_AND))
1633 if(tr->langopts.numbers & NUM_SWAP_TENS)
1634 sprintf(ph_out,"%s%s%s%s",ph_digits, ph_and, ph_tens, ph_ordinal);
1636 sprintf(ph_out,"%s%s%s%s",ph_tens, ph_and, ph_digits, ph_ordinal);
1641 if(tr->langopts.numbers & NUM_SINGLE_VOWEL)
1643 // remove vowel from the end of tens if units starts with a vowel (LANG=Italian)
1644 if(((ix = strlen(ph_tens)-1) >= 0) && (ph_digits[0] != 0))
1646 if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS)
1647 next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type;
1649 if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL))
1653 sprintf(ph_out,"%s%s%s",ph_tens, ph_digits, ph_ordinal);
1657 if(tr->langopts.numbers & NUM_SINGLE_STRESS_L)
1659 // only one primary stress, on the first part (tens)
1661 for(ix=0; ix < (signed)strlen(ph_out); ix++)
1663 if(ph_out[ix] == phonSTRESS_P)
1666 ph_out[ix] = phonSTRESS_3;
1672 else if(tr->langopts.numbers & NUM_SINGLE_STRESS)
1674 // only one primary stress
1676 for(ix=strlen(ph_out)-1; ix>=0; ix--)
1678 if(ph_out[ix] == phonSTRESS_P)
1681 ph_out[ix] = phonSTRESS_3;
1688 } // end of LookupNum2
1691 static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null, int thousandplex, int control)
1692 {//=============================================================================================================
1693 // Translate a 3 digit number
1694 // control bit 0, previous thousands
1695 // bit 1, ordinal number
1696 // bit 5 variant form of ordinal number
1697 // bit 8 followed by decimal fraction
1706 int say_zero_hundred=0;
1707 char string[12]; // for looking up entries in **_list
1713 char ph_thousands[50];
1714 char ph_hundred_and[12];
1715 char ph_thousand_and[12];
1717 ordinal = control & 0x22;
1718 hundreds = value / 100;
1719 tensunits = value % 100;
1722 ph_thousands[0] = 0;
1723 ph_thousand_and[0] = 0;
1725 if((tr->langopts.numbers & NUM_ZERO_HUNDRED) && ((control & 1) || (hundreds >= 10)))
1727 say_zero_hundred = 1; // lang=vi
1730 if((hundreds > 0) || say_zero_hundred)
1733 if(ordinal && (tensunits == 0))
1735 // ordinal number, with no tens or units
1736 found = Lookup(tr, "_0Co", ph_100);
1742 // special form for exact hundreds?
1743 found = Lookup(tr, "_0C0", ph_100);
1747 Lookup(tr, "_0C", ph_100);
1751 if(((tr->langopts.numbers & NUM_1900) != 0) && (hundreds == 19))
1753 // speak numbers such as 1984 as years: nineteen-eighty-four
1754 // ph_100[0] = 0; // don't say "hundred", we also need to surpess "and"
1756 else if(hundreds >= 10)
1761 if ((value % 1000) == 0)
1764 tplex = thousandplex+1;
1765 if(tr->langopts.numbers2 & NUM2_MYRIADS)
1770 if(LookupThousands(tr, hundreds / 10, tplex, exact | ordinal, ph_10T) == 0)
1773 if(tr->langopts.numbers2 & (1 << tplex))
1774 x = 8; // use variant (feminine) for before thousands and millions
1775 LookupNum2(tr, hundreds/10, x, ph_digits);
1778 if(tr->langopts.numbers2 & 0x200)
1779 sprintf(ph_thousands,"%s%c%s%c",ph_10T,phonEND_WORD,ph_digits,phonEND_WORD); // say "thousands" before its number, not after
1781 sprintf(ph_thousands,"%s%c%s%c",ph_digits,phonEND_WORD,ph_10T,phonEND_WORD);
1784 if((hundreds == 0) && (say_zero_hundred == 0))
1791 if((hundreds > 0) || say_zero_hundred)
1793 if((tr->langopts.numbers & NUM_AND_HUNDRED) && ((control & 1) || (ph_thousands[0] != 0)))
1795 Lookup(tr, "_0and", ph_thousand_and);
1802 && ((tensunits == 0) || (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL)))
1805 sprintf(string, "_%dCo", hundreds);
1806 found = Lookup(tr, string, ph_digits);
1808 if((tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL) && (tensunits > 0))
1810 // Use ordinal form of hundreds, as well as for tens and units
1811 // Add ordinal suffix to the hundreds
1812 strcat(ph_digits, ph_ordinal2);
1816 if((hundreds == 0) && say_zero_hundred)
1818 Lookup(tr, "_0", ph_digits);
1822 if((!found) && (tensunits == 0))
1824 // is there a special pronunciation for exactly n00 ?
1825 sprintf(string,"_%dC0",hundreds);
1826 found = Lookup(tr, string, ph_digits);
1831 sprintf(string,"_%dC",hundreds);
1832 found = Lookup(tr, string, ph_digits); // is there a specific pronunciation for n-hundred ?
1841 if((hundreds > 1) || ((tr->langopts.numbers & NUM_OMIT_1_HUNDRED) == 0))
1843 LookupNum2(tr, hundreds, 0, ph_digits);
1849 sprintf(buf1,"%s%s%s%s",ph_thousands,ph_thousand_and,ph_digits,ph_100);
1852 ph_hundred_and[0] = 0;
1855 if((control & 2) && (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL))
1857 // Don't use "and" if we apply ordinal to both hundreds and units
1861 if((value > 100) || ((control & 1) && (thousandplex==0)))
1863 if((tr->langopts.numbers & NUM_HUNDRED_AND) || ((tr->langopts.numbers & NUM_HUNDRED_AND_DIGIT) && (tensunits < 10)))
1865 Lookup(tr, "_0and", ph_hundred_and);
1868 if((tr->langopts.numbers & NUM_THOUSAND_AND) && (hundreds == 0) && ((control & 1) || (ph_thousands[0] != 0)))
1870 Lookup(tr, "_0and", ph_hundred_and);
1878 if((tensunits != 0) || (suppress_null == 0))
1883 x = 2; // allow "eins" for 1 rather than "ein"
1885 x = 3; // ordinal number
1886 if((value < 100) && !(control & 1))
1887 x |= 4; // tens and units only, no higher digits
1889 x |= 0x20; // variant form of ordinal number
1893 if(tr->langopts.numbers2 & (1 << thousandplex))
1894 x = 8; // use variant (feminine) for before thousands and millions
1897 if(LookupNum2(tr, tensunits, x | (control & 0x100), buf2) != 0)
1899 if(tr->langopts.numbers & NUM_SINGLE_AND)
1900 ph_hundred_and[0] = 0; // don't put 'and' after 'hundred' if there's 'and' between tens and units
1905 if(ph_ordinal2[0] != 0)
1908 if((ix > 0) && (buf1[ix-1] == phonPAUSE_SHORT))
1909 buf1[ix-1] = 0; // remove pause before addding ordinal suffix
1910 strcpy(buf2, ph_ordinal2);
1914 sprintf(ph_out,"%s%s%c%s",buf1,ph_hundred_and,phonEND_WORD,buf2);
1917 } // end of LookupNum3
1920 bool CheckThousandsGroup(char *word, int group_len)
1921 {//================================================
1922 // Is this a group of 3 digits which looks like a thousands group?
1925 if(IsDigit09(word[group_len]) || IsDigit09(-1))
1928 for(ix=0; ix < group_len; ix++)
1930 if(!IsDigit09(word[ix]))
1937 static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control)
1938 {//=====================================================================================================================
1939 // Number translation with various options
1940 // the "word" may be up to 4 digits
1941 // "words" of 3 digits may be preceded by another number "word" for thousands or millions
1948 int suppress_null = 0;
1949 int decimal_point = 0;
1950 int thousandplex = 0;
1951 int thousands_exact = 1;
1952 int thousands_inc = 0;
1953 int prev_thousands = 0;
1957 int max_decimal_count;
1964 char string[32]; // for looking up entries in **_list
1970 char suffix[30]; // string[] must be long enough for sizeof(suffix)+2
1971 char buf_digit_lookup[50];
1973 static const char str_pause[2] = {phonPAUSE_NOLINK,0};
1977 buf_digit_lookup[0] = 0;
1978 digit_lookup = buf_digit_lookup;
1979 number_control = control;
1981 for(ix=0; IsDigit09(word[ix]); ix++) ;
1983 value = this_value = atoi(word);
1986 if(tr->langopts.numbers2 & NUM2_MYRIADS)
1989 // is there a previous thousands part (as a previous "word") ?
1990 if((n_digits == group_len) && (word[-2] == tr->langopts.thousands_sep) && IsDigit09(word[-3]))
1994 else if((tr->langopts.thousands_sep == ' ') || (tr->langopts.numbers & NUM_ALLOW_SPACE))
1996 // thousands groups can be separated by spaces
1997 if((n_digits == 3) && !(wtab->flags & FLAG_MULTIPLE_SPACES) && IsDigit09(word[-2]))
2002 if(prev_thousands == 0)
2004 speak_missing_thousands = 0;
2010 if(prev_thousands || (word[0] != '0'))
2012 // don't check for ordinal if the number has a leading zero
2013 if((ordinal = CheckDotOrdinal(tr, word, &word[ix], wtab, 0)) != 0)
2019 if((word[ix] == '.') && !IsDigit09(word[ix+1]) && !IsDigit09(word[ix+2]) && !(wtab[1].flags & FLAG_NOSPACE))
2021 // remove dot unless followed by another number
2025 if((ordinal == 0) || (tr->translator_name == L('h','u')))
2027 // NOTE lang=hu, allow both dot and ordinal suffix, eg. "december 21.-én"
2028 // look for an ordinal number suffix after the number
2031 if(wtab[0].flags & FLAG_HYPHEN_AFTER)
2036 while((word[ix] != 0) && (word[ix] != ' ') && (ix < (int)(sizeof(suffix)-1)))
2044 if((tr->langopts.ordinal_indicator != NULL) && (strcmp(suffix, tr->langopts.ordinal_indicator) == 0))
2048 else if(!IsDigit09(suffix[0])) // not _#9 (tab)
2050 sprintf(string,"_#%s",suffix);
2051 if(Lookup(tr, string, ph_ordinal2))
2053 // this is an ordinal suffix
2055 flags[0] |= FLAG_SKIPWORDS;
2057 sprintf(string,"_x#%s",suffix);
2058 Lookup(tr, string, ph_ordinal2x); // is there an alternate pronunciation?
2064 if(wtab[0].flags & FLAG_ORDINAL)
2071 if((word[0] == '0') && (prev_thousands == 0) && (word[1] != ' ') && (word[1] != tr->langopts.decimal_sep))
2073 if((n_digits == 2) && (word[3] == ':') && IsDigit09(word[5]) && isspace(word[7]))
2075 // looks like a time 02:30, omit the leading zero
2081 flags[0] &= ~FLAG_SKIPWORDS;
2082 return(0); // long number string with leading zero, speak as individual digits
2085 // speak leading zeros
2086 for(ix=0; (word[ix] == '0') && (ix < (n_digits-1)); ix++)
2088 Lookup(tr, "_0", &ph_zeros[strlen(ph_zeros)]);
2093 if((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[n_digits] == ' '))
2095 else if(word[n_digits] == tr->langopts.thousands_sep)
2098 suffix_ix = n_digits+2;
2099 if(thousands_inc > 0)
2101 // if the following "words" are three-digit groups, count them and add
2102 // a "thousand"/"million" suffix to this one
2103 digix = n_digits + thousands_inc;
2105 while(((wtab[thousandplex+1].flags & FLAG_MULTIPLE_SPACES) == 0) && CheckThousandsGroup(&word[digix], group_len))
2107 for(ix=0; ix<group_len; ix++)
2109 if(word[digix+ix] != '0')
2111 thousands_exact = 0;
2118 if((word[digix] == tr->langopts.thousands_sep) || ((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[digix] == ' ')))
2120 suffix_ix = digix+2;
2121 digix += thousands_inc;
2128 if((value == 0) && prev_thousands)
2133 if(tr->translator_name == L('h','u'))
2135 // variant form of numbers when followed by hyphen and a suffix starting with 'a' or 'e' (but not a, e, az, ez, azt, ezt
2136 if((wtab[thousandplex].flags & FLAG_HYPHEN_AFTER) && (thousands_exact==1) && hu_number_e(&word[suffix_ix], thousandplex, value))
2138 number_control |= 1; // use _1e variant of number
2142 if((word[n_digits] == tr->langopts.decimal_sep) && IsDigit09(word[n_digits+1]))
2144 // this "word" ends with a decimal point
2145 Lookup(tr, "_dpt", ph_append);
2146 decimal_point = 0x100;
2148 else if(suppress_null == 0)
2150 if(thousands_inc > 0)
2152 if(thousandplex > 0)
2153 // if((thousandplex > 0) && (value < 1000))
2155 if((suppress_null == 0) && (LookupThousands(tr,value,thousandplex, thousands_exact, ph_append)))
2157 // found an exact match for N thousand
2166 if(speak_missing_thousands == 1)
2168 // speak this thousandplex if there was no word for the previous thousandplex
2169 sprintf(string,"_0M%d",thousandplex+1);
2170 if(Lookup(tr, string, buf1)==0)
2172 sprintf(string,"_0M%d",thousandplex);
2173 Lookup(tr, string, ph_append);
2177 if((ph_append[0] == 0) && (word[n_digits] == '.') && (thousandplex == 0))
2179 Lookup(tr, "_.", ph_append);
2182 if(thousandplex == 0)
2185 // look for combinations of the number with the next word
2187 while(IsDigit09(p[1])) p++; // just use the last digit
2188 if(IsDigit09(p[-1]))
2191 if(LookupDictList(tr, &p2, buf_digit_lookup, flags, FLAG_SUFX, wtab)) // lookup 2 digits
2197 // if((buf_digit_lookup[0] == 0) && (*p != '0') && (dot_ordinal==0))
2198 if((buf_digit_lookup[0] == 0) && (*p != '0'))
2201 // not found, lookup only the last digit (?? but not if dot-ordinal has been found)
2202 if(LookupDictList(tr, &p, buf_digit_lookup, flags, FLAG_SUFX, wtab)) // don't match '0', or entries with $only
2208 if(prev_thousands == 0)
2210 if((decimal_point == 0) && (ordinal == 0))
2212 // Look for special pronunciation for this number in isolation (LANG=kl)
2213 sprintf(string, "_%dn", value);
2214 if(Lookup(tr, string, ph_out))
2220 if(tr->langopts.numbers2 & NUM2_PERCENT_BEFORE)
2222 // LANG=si, say "percent" before the number
2224 while((*p2 != ' ') && (*p2 != 0))
2230 Lookup(tr, "%", ph_out);
2231 ph_out += strlen(ph_out);
2239 LookupNum3(tr, value, ph_buf, suppress_null, thousandplex, prev_thousands | ordinal | decimal_point);
2240 if((thousandplex > 0) && (tr->langopts.numbers2 & 0x200))
2241 sprintf(ph_out,"%s%s%c%s%s",ph_zeros,ph_append,phonEND_WORD,ph_buf2,ph_buf); // say "thousands" before its number
2243 sprintf(ph_out,"%s%s%s%c%s",ph_zeros,ph_buf2,ph_buf,phonEND_WORD,ph_append);
2246 while(decimal_point)
2251 while(IsDigit09(word[n_digits+decimal_count]))
2254 // if(decimal_count > 1)
2256 max_decimal_count = 2;
2257 switch(decimal_mode = (tr->langopts.numbers & 0xe000))
2259 case NUM_DFRACTION_4:
2260 max_decimal_count = 5;
2261 case NUM_DFRACTION_2:
2262 // French/Polish decimal fraction
2263 while(word[n_digits] == '0')
2265 Lookup(tr, "_0", buf1);
2266 strcat(ph_out,buf1);
2270 if((decimal_count <= max_decimal_count) && IsDigit09(word[n_digits]))
2272 LookupNum3(tr, atoi(&word[n_digits]), buf1, 0,0,0);
2273 strcat(ph_out,buf1);
2274 n_digits += decimal_count;
2278 case NUM_DFRACTION_1: // italian, say "hundredths" if leading zero
2279 case NUM_DFRACTION_5: // hungarian, always say "tenths" etc.
2280 case NUM_DFRACTION_6: // kazakh, always say "tenths" etc, before the decimal fraction
2281 LookupNum3(tr, atoi(&word[n_digits]), ph_buf, 0,0,0);
2282 if((word[n_digits]=='0') || (decimal_mode != NUM_DFRACTION_1))
2284 // decimal part has leading zeros, so add a "hundredths" or "thousandths" suffix
2285 sprintf(string,"_0Z%d",decimal_count);
2286 if(Lookup(tr, string, buf1) == 0)
2287 break; // revert to speaking single digits
2289 if(decimal_mode == NUM_DFRACTION_6)
2290 strcat(ph_out, buf1);
2292 strcat(ph_buf, buf1);
2294 strcat(ph_out,ph_buf);
2295 n_digits += decimal_count;
2298 case NUM_DFRACTION_3:
2299 // Romanian decimal fractions
2300 if((decimal_count <= 4) && (word[n_digits] != '0'))
2302 LookupNum3(tr, atoi(&word[n_digits]), buf1, 0,0,0);
2303 strcat(ph_out,buf1);
2304 n_digits += decimal_count;
2308 case NUM_DFRACTION_7:
2309 // alternative form of decimal fraction digits, except the final digit
2310 while(decimal_count-- > 1)
2312 sprintf(string,"_%cd", word[n_digits]);
2313 if(Lookup(tr, string, buf1) == 0)
2316 strcat(ph_out, buf1);
2321 while(IsDigit09(c = word[n_digits]) && (strlen(ph_out) < (N_WORD_PHONEMES - 10)))
2323 // speak any remaining decimal fraction digits individually
2324 value = word[n_digits++] - '0';
2325 LookupNum2(tr, value, 2, buf1);
2326 len = strlen(ph_out);
2327 sprintf(&ph_out[len],"%c%s", phonEND_WORD, buf1);
2330 // something after the decimal part ?
2331 if(Lookup(tr, "_dpt2", buf1))
2332 strcat(ph_out,buf1);
2334 if((c == tr->langopts.decimal_sep) && IsDigit09(word[n_digits+1]))
2336 Lookup(tr, "_dpt", buf1);
2337 strcat(ph_out,buf1);
2344 if((ph_out[0] != 0) && (ph_out[0] != phonSWITCH))
2348 p = &word[n_digits+1];
2350 p += utf8_in(&next_char,p);
2351 if((tr->langopts.numbers & NUM_NOPAUSE) && (next_char == ' '))
2352 utf8_in(&next_char,p);
2354 if(!iswalpha2(next_char) && (thousands_exact==0))
2355 // if(!iswalpha2(next_char) && !((wtab[thousandplex].flags & FLAG_HYPHEN_AFTER) && (thousands_exact != 0)))
2356 strcat(ph_out,str_pause); // don't add pause for 100s, 6th, etc.
2359 *flags |= FLAG_FOUND;
2360 speak_missing_thousands--;
2363 dictionary_skipwords = skipwords;
2365 } // end of TranslateNumber_1
2369 int TranslateNumber(Translator *tr, char *word1, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control)
2370 {//=============================================================================================================
2371 if((option_sayas == SAYAS_DIGITS1) || (wtab[0].flags & FLAG_INDIVIDUAL_DIGITS))
2372 return(0); // speak digits individually
2374 if(tr->langopts.numbers != 0)
2376 return(TranslateNumber_1(tr, word1, ph_out, flags, wtab, control));
2379 } // end of TranslateNumber