OSDN Git Service

imported espeak-1.47.09
[nvdajp/nvdajpmiscdep.git] / include / espeak / src / numbers.cpp
1 /***************************************************************************
2  *   Copyright (C) 2005 to 2013 by Jonathan Duddington                     *
3  *   email: jonsd@users.sourceforge.net                                    *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 3 of the License, or     *
8  *   (at your option) any later version.                                   *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU General Public License     *
16  *   along with this program; if not, see:                                 *
17  *               <http://www.gnu.org/licenses/>.                           *
18  ***************************************************************************/
19
20 #include "StdAfx.h"
21
22 #include <stdio.h>
23 #include <ctype.h>
24 #include <stdlib.h>
25 #include <string.h>
26
27 #include <wctype.h>
28 #include <wchar.h>
29
30 #include "speak_lib.h"
31 #include "speech.h"
32 #include "phoneme.h"
33 #include "synthesize.h"
34 #include "voice.h"
35 #include "translate.h"
36
37
38
39 #define M_NAME      0
40 #define M_SMALLCAP  1
41 #define M_TURNED    2
42 #define M_REVERSED  3
43 #define M_CURL      4
44
45 #define M_ACUTE     5
46 #define M_BREVE     6
47 #define M_CARON     7
48 #define M_CEDILLA   8
49 #define M_CIRCUMFLEX 9
50 #define M_DIAERESIS 10
51 #define M_DOUBLE_ACUTE 11
52 #define M_DOT_ABOVE 12
53 #define M_GRAVE     13
54 #define M_MACRON    14
55 #define M_OGONEK    15
56 #define M_RING      16
57 #define M_STROKE    17
58 #define M_TILDE     18
59
60 #define M_BAR       19
61 #define M_RETROFLEX 20
62 #define M_HOOK      21
63
64
65 #define M_MIDDLE_DOT  M_DOT_ABOVE  // duplicate of M_DOT_ABOVE
66 #define M_IMPLOSIVE   M_HOOK
67
68 static int n_digit_lookup;
69 static char *digit_lookup;
70 static int speak_missing_thousands;
71 static int number_control;
72
73
74 typedef struct {
75         const char *name;
76         int  flags;
77 } ACCENTS;
78
79 // these are tokens to look up in the *_list file.
80 static ACCENTS accents_tab[] = {
81         {"_lig", 1},
82         {"_smc", 1},  // smallcap
83         {"_tur", 1},  // turned
84         {"_rev", 1},  // reversed
85         {"_crl", 0},  // curl
86
87         {"_acu", 0},  // acute
88         {"_brv", 0},  // breve
89         {"_hac", 0},  // caron/hacek
90         {"_ced", 0},  // cedilla
91         {"_cir", 0},  // circumflex
92         {"_dia", 0},  // diaeresis
93         {"_ac2", 0},  // double acute
94         {"_dot", 0},  // dot
95         {"_grv", 0},  // grave
96         {"_mcn", 0},  // macron
97         {"_ogo", 0},  // ogonek
98         {"_rng", 0},  // ring
99         {"_stk", 0},  // stroke
100         {"_tld", 0},  // tilde
101
102         {"_bar", 0},  // bar
103         {"_rfx", 0},  // retroflex
104         {"_hok", 0},  // hook
105 };
106
107
108 #define CAPITAL  0
109 #define LETTER(ch,mod1,mod2) (ch-59)+(mod1 << 6)+(mod2 << 11)
110 #define LIGATURE(ch1,ch2,mod1) (ch1-59)+((ch2-59) << 6)+(mod1 << 12)+0x8000
111
112
113 #define L_ALPHA  60   // U+3B1
114 #define L_SCHWA  61   // U+259
115 #define L_OPEN_E 62   // U+25B
116 #define L_GAMMA  63   // U+3B3
117 #define L_IOTA   64   // U+3B9
118 #define L_OE     65   // U+153
119 #define L_OMEGA  66   // U+3C9
120
121 #define L_PHI    67   // U+3C6
122 #define L_ESH    68   // U+283
123 #define L_UPSILON 69 // U+3C5
124 #define L_EZH     70 // U+292
125 #define L_GLOTTAL 71 // U+294
126 #define L_RTAP    72 // U+27E
127
128
129 static const short non_ascii_tab[] = {
130         0, 0x3b1, 0x259, 0x25b, 0x3b3, 0x3b9, 0x153, 0x3c9,
131         0x3c6, 0x283, 0x3c5, 0x292, 0x294, 0x27e
132 };
133
134
135 // characters U+00e0 to U+017f
136 static const unsigned short letter_accents_0e0[] = {
137         LETTER('a',M_GRAVE,0),   // U+00e0
138         LETTER('a',M_ACUTE,0),
139         LETTER('a',M_CIRCUMFLEX,0),
140         LETTER('a',M_TILDE,0),
141         LETTER('a',M_DIAERESIS,0),
142         LETTER('a',M_RING,0),
143         LIGATURE('a','e',0),
144         LETTER('c',M_CEDILLA,0),
145         LETTER('e',M_GRAVE,0),
146         LETTER('e',M_ACUTE,0),
147         LETTER('e',M_CIRCUMFLEX,0),
148         LETTER('e',M_DIAERESIS,0),
149         LETTER('i',M_GRAVE,0),
150         LETTER('i',M_ACUTE,0),
151         LETTER('i',M_CIRCUMFLEX,0),
152         LETTER('i',M_DIAERESIS,0),
153         LETTER('d',M_NAME,0),  // eth  // U+00f0
154         LETTER('n',M_TILDE,0),
155         LETTER('o',M_GRAVE,0),
156         LETTER('o',M_ACUTE,0),
157         LETTER('o',M_CIRCUMFLEX,0),
158         LETTER('o',M_TILDE,0),
159         LETTER('o',M_DIAERESIS,0),
160         0,     // division sign
161         LETTER('o',M_STROKE,0),
162         LETTER('u',M_GRAVE,0),
163         LETTER('u',M_ACUTE,0),
164         LETTER('u',M_CIRCUMFLEX,0),
165         LETTER('u',M_DIAERESIS,0),
166         LETTER('y',M_ACUTE,0),
167         LETTER('t',M_NAME,0),  // thorn
168         LETTER('y',M_DIAERESIS,0),
169         CAPITAL,                 // U+0100
170         LETTER('a',M_MACRON,0),
171         CAPITAL,
172         LETTER('a',M_BREVE,0),
173         CAPITAL,
174         LETTER('a',M_OGONEK,0),
175         CAPITAL,
176         LETTER('c',M_ACUTE,0),
177         CAPITAL,
178         LETTER('c',M_CIRCUMFLEX,0),
179         CAPITAL,
180         LETTER('c',M_DOT_ABOVE,0),
181         CAPITAL,
182         LETTER('c',M_CARON,0),
183         CAPITAL,
184         LETTER('d',M_CARON,0),
185         CAPITAL,                 // U+0110
186         LETTER('d',M_STROKE,0),
187         CAPITAL,
188         LETTER('e',M_MACRON,0),
189         CAPITAL,
190         LETTER('e',M_BREVE,0),
191         CAPITAL,
192         LETTER('e',M_DOT_ABOVE,0),
193         CAPITAL,
194         LETTER('e',M_OGONEK,0),
195         CAPITAL,
196         LETTER('e',M_CARON,0),
197         CAPITAL,
198         LETTER('g',M_CIRCUMFLEX,0),
199         CAPITAL,
200         LETTER('g',M_BREVE,0),
201         CAPITAL,                // U+0120
202         LETTER('g',M_DOT_ABOVE,0),
203         CAPITAL,
204         LETTER('g',M_CEDILLA,0),
205         CAPITAL,
206         LETTER('h',M_CIRCUMFLEX,0),
207         CAPITAL,
208         LETTER('h',M_STROKE,0),
209         CAPITAL,
210         LETTER('i',M_TILDE,0),
211         CAPITAL,
212         LETTER('i',M_MACRON,0),
213         CAPITAL,
214         LETTER('i',M_BREVE,0),
215         CAPITAL,
216         LETTER('i',M_OGONEK,0),
217         CAPITAL,               // U+0130
218         LETTER('i',M_NAME,0), // dotless i
219         CAPITAL,
220         LIGATURE('i','j',0),
221         CAPITAL,
222         LETTER('j',M_CIRCUMFLEX,0),
223         CAPITAL,
224         LETTER('k',M_CEDILLA,0),
225         LETTER('k',M_NAME,0),  // kra
226         CAPITAL,
227         LETTER('l',M_ACUTE,0),
228         CAPITAL,
229         LETTER('l',M_CEDILLA,0),
230         CAPITAL,
231         LETTER('l',M_CARON,0),
232         CAPITAL,
233         LETTER('l',M_MIDDLE_DOT,0),  // U+0140
234         CAPITAL,
235         LETTER('l',M_STROKE,0),
236         CAPITAL,
237         LETTER('n',M_ACUTE,0),
238         CAPITAL,
239         LETTER('n',M_CEDILLA,0),
240         CAPITAL,
241         LETTER('n',M_CARON,0),
242         LETTER('n',M_NAME,0),  // apostrophe n
243         CAPITAL,
244         LETTER('n',M_NAME,0),  // eng
245         CAPITAL,
246         LETTER('o',M_MACRON,0),
247         CAPITAL,
248         LETTER('o',M_BREVE,0),
249         CAPITAL,             // U+0150
250         LETTER('o',M_DOUBLE_ACUTE,0),
251         CAPITAL,
252         LIGATURE('o','e',0),
253         CAPITAL,
254         LETTER('r',M_ACUTE,0),
255         CAPITAL,
256         LETTER('r',M_CEDILLA,0),
257         CAPITAL,
258         LETTER('r',M_CARON,0),
259         CAPITAL,
260         LETTER('s',M_ACUTE,0),
261         CAPITAL,
262         LETTER('s',M_CIRCUMFLEX,0),
263         CAPITAL,
264         LETTER('s',M_CEDILLA,0),
265         CAPITAL,              // U+0160
266         LETTER('s',M_CARON,0),
267         CAPITAL,
268         LETTER('t',M_CEDILLA,0),
269         CAPITAL,
270         LETTER('t',M_CARON,0),
271         CAPITAL,
272         LETTER('t',M_STROKE,0),
273         CAPITAL,
274         LETTER('u',M_TILDE,0),
275         CAPITAL,
276         LETTER('u',M_MACRON,0),
277         CAPITAL,
278         LETTER('u',M_BREVE,0),
279         CAPITAL,
280         LETTER('u',M_RING,0),
281         CAPITAL,              // U+0170
282         LETTER('u',M_DOUBLE_ACUTE,0),
283         CAPITAL,
284         LETTER('u',M_OGONEK,0),
285         CAPITAL,
286         LETTER('w',M_CIRCUMFLEX,0),
287         CAPITAL,
288         LETTER('y',M_CIRCUMFLEX,0),
289         CAPITAL,   // Y-DIAERESIS
290         CAPITAL,
291         LETTER('z',M_ACUTE,0),
292         CAPITAL,
293         LETTER('z',M_DOT_ABOVE,0),
294         CAPITAL,
295         LETTER('z',M_CARON,0),
296         LETTER('s',M_NAME,0), // long-s  // U+17f
297 };
298
299
300 // characters U+0250 to U+029F
301 static const unsigned short letter_accents_250[] = {
302         LETTER('a',M_TURNED,0),         // U+250
303         LETTER(L_ALPHA,0,0),
304         LETTER(L_ALPHA,M_TURNED,0),
305         LETTER('b',M_IMPLOSIVE,0),
306         0,  // open-o
307         LETTER('c',M_CURL,0),
308         LETTER('d',M_RETROFLEX,0),
309         LETTER('d',M_IMPLOSIVE,0),
310         LETTER('e',M_REVERSED,0),       // U+258
311         0,   // schwa
312         LETTER(L_SCHWA,M_HOOK,0),
313         0,   // open-e
314         LETTER(L_OPEN_E,M_REVERSED,0),
315         LETTER(L_OPEN_E,M_HOOK,M_REVERSED),
316         0,//LETTER(L_OPEN_E,M_CLOSED,M_REVERSED),
317         LETTER('j',M_BAR,0),
318         LETTER('g',M_IMPLOSIVE,0),      // U+260
319         LETTER('g',0,0),
320         LETTER('g',M_SMALLCAP,0),
321         LETTER(L_GAMMA,0,0),
322         0,   // ramshorn
323         LETTER('h',M_TURNED,0),
324         LETTER('h',M_HOOK,0),
325         0,//LETTER(L_HENG,M_HOOK,0),
326         LETTER('i',M_BAR,0),            // U+268
327         LETTER(L_IOTA,0,0),
328         LETTER('i',M_SMALLCAP,0),
329         LETTER('l',M_TILDE,0),
330         LETTER('l',M_BAR,0),
331         LETTER('l',M_RETROFLEX,0),
332         LIGATURE('l','z',0),
333         LETTER('m',M_TURNED,0),
334         0,//LETTER('m',M_TURNED,M_LEG), // U+270
335         LETTER('m',M_HOOK,0),
336         0,//LETTER('n',M_LEFTHOOK,0),
337         LETTER('n',M_RETROFLEX,0),
338         LETTER('n',M_SMALLCAP,0),
339         LETTER('o',M_BAR,0),
340         LIGATURE('o','e',M_SMALLCAP),
341         0,//LETTER(L_OMEGA,M_CLOSED,0),
342         LETTER(L_PHI,0,0),              // U+278
343         LETTER('r',M_TURNED,0),
344         0,//LETTER('r',M_TURNED,M_LEG),
345         LETTER('r',M_RETROFLEX,M_TURNED),
346         0,//LETTER('r',M_LEG,0),
347         LETTER('r',M_RETROFLEX,0),
348         0,  // r-tap
349         LETTER(L_RTAP,M_REVERSED,0),
350         LETTER('r',M_SMALLCAP,0),       // U+280
351         LETTER('r',M_TURNED,M_SMALLCAP),
352         LETTER('s',M_RETROFLEX,0),
353         0,  // esh
354         0,//LETTER('j',M_BAR,L_IMPLOSIVE),
355         LETTER(L_ESH,M_REVERSED,0),
356         LETTER(L_ESH,M_CURL,0),
357         LETTER('t',M_TURNED,0),
358         LETTER('t',M_RETROFLEX,0),      // U+288
359         LETTER('u',M_BAR,0),
360         LETTER(L_UPSILON,0,0),
361         LETTER('v',M_HOOK,0),
362         LETTER('v',M_TURNED,0),
363         LETTER('w',M_TURNED,0),
364         LETTER('y',M_TURNED,0),
365         LETTER('y',M_SMALLCAP,0),
366         LETTER('z',M_RETROFLEX,0),      // U+290
367         LETTER('z',M_CURL,0),
368         0,  // ezh
369         LETTER(L_EZH,M_CURL,0),
370         0,  // glottal stop
371         LETTER(L_GLOTTAL,M_REVERSED,0),
372         LETTER(L_GLOTTAL,M_TURNED,0),
373         0,//LETTER('c',M_LONG,0),
374         0,  // bilabial click           // U+298
375         LETTER('b',M_SMALLCAP,0),
376         0,//LETTER(L_OPEN_E,M_CLOSED,0),
377         LETTER('g',M_IMPLOSIVE,M_SMALLCAP),
378         LETTER('h',M_SMALLCAP,0),
379         LETTER('j',M_CURL,0),
380         LETTER('k',M_TURNED,0),
381         LETTER('l',M_SMALLCAP,0),
382         LETTER('q',M_HOOK,0),      // U+2a0
383         LETTER(L_GLOTTAL,M_STROKE,0),
384         LETTER(L_GLOTTAL,M_STROKE,M_REVERSED),
385         LIGATURE('d','z',0),
386         0,   // dezh
387         LIGATURE('d','z',M_CURL),
388         LIGATURE('t','s',0),
389         0,   // tesh
390         LIGATURE('t','s',M_CURL),
391 };
392
393 static int LookupLetter2(Translator *tr, unsigned int letter, char *ph_buf)
394 {       //========================================================================
395         int len;
396         char single_letter[10];
397
398         single_letter[0] = 0;
399         single_letter[1] = '_';
400         len = utf8_out(letter, &single_letter[2]);
401         single_letter[len+2] = ' ';
402         single_letter[len+3] = 0;
403
404         if(Lookup(tr, &single_letter[1], ph_buf) == 0)
405         {
406                 single_letter[1] = ' ';
407                 if(Lookup(tr, &single_letter[2], ph_buf) == 0)
408                 {
409                         TranslateRules(tr, &single_letter[2], ph_buf, 20, NULL,0,NULL);
410                 }
411         }
412         return(ph_buf[0]);
413 }
414
415
416 void LookupAccentedLetter(Translator *tr, unsigned int letter, char *ph_buf)
417 {//=========================================================================
418         // lookup the character in the accents table
419         int accent_data = 0;
420         int accent1 = 0;
421         int accent2 = 0;
422         int basic_letter;
423         int letter2=0;
424         char ph_letter1[30];
425         char ph_letter2[30];
426         char ph_accent1[30];
427         char ph_accent2[30];
428
429         ph_accent2[0] = 0;
430
431         if((letter >= 0xe0) && (letter < 0x17f))
432         {
433                 accent_data = letter_accents_0e0[letter - 0xe0];
434         }
435         else if((letter >= 0x250) && (letter <= 0x2a8))
436         {
437                 accent_data = letter_accents_250[letter - 0x250];
438         }
439
440         if(accent_data != 0)
441         {
442                 basic_letter = (accent_data & 0x3f) + 59;
443                 if(basic_letter < 'a')
444                         basic_letter = non_ascii_tab[basic_letter-59];
445
446                 if(accent_data & 0x8000)
447                 {
448                         letter2 = (accent_data >> 6) & 0x3f;
449                         letter2 += 59;
450                         accent2 = (accent_data >> 12) & 0x7;
451                 }
452                 else
453                 {
454                         accent1 = (accent_data >> 6) & 0x1f;
455                         accent2 = (accent_data >> 11) & 0xf;
456                 }
457
458
459                 if(Lookup(tr, accents_tab[accent1].name, ph_accent1) != 0)
460                 {
461
462                         if(LookupLetter2(tr, basic_letter, ph_letter1) != 0)
463                         {
464                                 if(accent2 != 0)
465                                 {
466                                         if(Lookup(tr, accents_tab[accent2].name, ph_accent2) == 0)
467                                         {
468 //                                              break;
469                                         }
470
471                                         if(accents_tab[accent2].flags & 1)
472                                         {
473                                                 strcpy(ph_buf,ph_accent2);
474                                                 ph_buf += strlen(ph_buf);
475                                                 ph_accent2[0] = 0;
476                                         }
477                                 }
478                                 if(letter2 != 0)
479                                 {
480                                         //ligature
481                                         LookupLetter2(tr, letter2, ph_letter2);
482                                         sprintf(ph_buf,"%s%c%s%c%s%s",ph_accent1, phonPAUSE_VSHORT, ph_letter1, phonSTRESS_P, ph_letter2, ph_accent2);
483                                 }
484                                 else
485                                 {
486                                         if(accent1 == 0)
487                                                 strcpy(ph_buf, ph_letter1);
488                                         else if((tr->langopts.accents & 1) || (accents_tab[accent1].flags & 1))
489                                                 sprintf(ph_buf,"%s%c%c%s", ph_accent1, phonPAUSE_VSHORT, phonSTRESS_P, ph_letter1);
490                                         else
491                                                 sprintf(ph_buf,"%c%s%c%s%c", phonSTRESS_2, ph_letter1, phonPAUSE_VSHORT, ph_accent1, phonPAUSE_VSHORT);
492                                 }
493                         }
494                 }
495         }
496 }  // end of LookupAccentedLetter
497
498
499
500 void LookupLetter(Translator *tr, unsigned int letter, int next_byte, char *ph_buf1, int control)
501 {//==============================================================================================
502 // control, bit 0:  not the first letter of a word
503
504         int len;
505         static char single_letter[10] = {0,0};
506         unsigned int dict_flags[2];
507         char ph_buf3[40];
508
509         ph_buf1[0] = 0;
510         len = utf8_out(letter,&single_letter[2]);
511         single_letter[len+2] = ' ';
512
513         if(next_byte == -1)
514         {
515                 // speaking normal text, not individual characters
516                 if(Lookup(tr, &single_letter[2], ph_buf1) != 0)
517                         return;
518
519                 single_letter[1] = '_';
520                 if(Lookup(tr, &single_letter[1], ph_buf3) != 0)
521                         return;   // the character is specified as _* so ignore it when speaking normal text
522
523                 // check whether this character is specified for English
524                 if(tr->translator_name == L('e','n'))
525                         return;   // we are already using English
526
527                 SetTranslator2("en");
528                 if(Lookup(translator2, &single_letter[2], ph_buf3) != 0)
529                 {
530                         // yes, switch to English and re-translate the word
531                         sprintf(ph_buf1,"%c",phonSWITCH);
532                 }
533                 SelectPhonemeTable(voice->phoneme_tab_ix);  // revert to original phoneme table
534                 return;
535         }
536
537         if((letter <= 32) || iswspace(letter))
538         {
539                 // lookup space as _&32 etc.
540                 sprintf(&single_letter[1],"_#%d ",letter);
541                 Lookup(tr, &single_letter[1], ph_buf1);
542                 return;
543         }
544
545         if(next_byte != ' ')
546                 next_byte = RULE_SPELLING;
547         single_letter[3+len] = next_byte;   // follow by space-space if the end of the word, or space-31
548
549         single_letter[1] = '_';
550
551         // if the $accent flag is set for this letter, use the accents table (below)
552         dict_flags[1] = 0;
553
554         if(Lookup(tr, &single_letter[1], ph_buf3) == 0)
555         {
556                 single_letter[1] = ' ';
557                 if(Lookup(tr, &single_letter[2], ph_buf3) == 0)
558                 {
559                         TranslateRules(tr, &single_letter[2], ph_buf3, sizeof(ph_buf3), NULL,FLAG_NO_TRACE,NULL);
560                 }
561         }
562
563         if(ph_buf3[0] == 0)
564         {
565                 LookupAccentedLetter(tr, letter, ph_buf3);
566         }
567
568         strcpy(ph_buf1, ph_buf3);
569         if((ph_buf1[0] == 0) || (ph_buf1[0] == phonSWITCH))
570         {
571                 return;
572         }
573
574         dict_flags[0] = 0;
575         dict_flags[1] = 0;
576         SetWordStress(tr, ph_buf1, dict_flags, -1, control & 1);
577
578 }  // end of LookupLetter
579
580
581 // unicode ranges for non-ascii digits 0-9
582 static const int number_ranges[] = {
583         0x660, 0x6f0,  // arabic
584         0x966, 0x9e6, 0xa66, 0xae6, 0xb66, 0xbe6, 0xc66, 0xce6, 0xd66,  // indic
585         0xe50, 0xed0, 0xf20, 0x1040, 0x1090,
586         0 };  // these must be in ascending order
587
588
589 int NonAsciiNumber(int letter)
590 {//============================
591 // Change non-ascii digit into ascii digit '0' to '9', (or -1 if not)
592         const int *p;
593         int base;
594
595         for(p=number_ranges; (base = *p) != 0; p++)
596         {
597                 if(letter < base)
598                         break;  // not found
599                 if(letter < (base+10))
600                         return(letter-base+'0');
601         }
602         return(-1);
603 }
604
605 #define L_SUB 0x4000   // subscript
606 #define L_SUP 0x8000   // superscript
607
608 static const char *modifiers[] = {NULL, "_sub", "_sup", NULL};
609
610 // this list must be in ascending order
611 static unsigned short derived_letters[] = {
612         0x00aa, 'a'+L_SUP,
613         0x00b2, '2'+L_SUP,
614         0x00b3, '3'+L_SUP,
615         0x00b9, '1'+L_SUP,
616         0x00ba, 'o'+L_SUP,
617         0x02b0, 'h'+L_SUP,
618         0x02b1, 0x266+L_SUP,
619         0x02b2, 'j'+L_SUP,
620         0x02b3, 'r'+L_SUP,
621         0x02b4, 0x279+L_SUP,
622         0x02b5, 0x27b+L_SUP,
623         0x02b6, 0x281+L_SUP,
624         0x02b7, 'w'+L_SUP,
625         0x02b8, 'y'+L_SUP,
626         0x02c0, 0x294+L_SUP,
627         0x02c1, 0x295+L_SUP,
628         0x02e0, 0x263+L_SUP,
629         0x02e1, 'l'+L_SUP,
630         0x02e2, 's'+L_SUP,
631         0x02e3, 'x'+L_SUP,
632         0x2070, '0'+L_SUP,
633         0x2071, 'i'+L_SUP,
634         0x2074, '4'+L_SUP,
635         0x2075, '5'+L_SUP,
636         0x2076, '6'+L_SUP,
637         0x2077, '7'+L_SUP,
638         0x2078, '8'+L_SUP,
639         0x2079, '9'+L_SUP,
640         0x207a, '+'+L_SUP,
641         0x207b, '-'+L_SUP,
642         0x207c, '='+L_SUP,
643         0x207d, '('+L_SUP,
644         0x207e, ')'+L_SUP,
645         0x207f, 'n'+L_SUP,
646         0x2080, '0'+L_SUB,
647         0x2081, '1'+L_SUB,
648         0x2082, '2'+L_SUB,
649         0x2083, '3'+L_SUB,
650         0x2084, '4'+L_SUB,
651         0x2085, '5'+L_SUB,
652         0x2086, '6'+L_SUB,
653         0x2087, '7'+L_SUB,
654         0x2088, '8'+L_SUB,
655         0x2089, '9'+L_SUB,
656         0x208a, '+'+L_SUB,
657         0x208b, '-'+L_SUB,
658         0x208c, '='+L_SUB,
659         0x208d, '('+L_SUB,
660         0x208e, ')'+L_SUB,
661         0x2090, 'a'+L_SUB,
662         0x2091, 'e'+L_SUB,
663         0x2092, 'o'+L_SUB,
664         0x2093, 'x'+L_SUB,
665         0x2094, 0x259+L_SUB,
666         0x2095, 'h'+L_SUB,
667         0x2096, 'k'+L_SUB,
668         0x2097, 'l'+L_SUB,
669         0x2098, 'm'+L_SUB,
670         0x2099, 'n'+L_SUB,
671         0x209a, 'p'+L_SUB,
672         0x209b, 's'+L_SUB,
673         0x209c, 't'+L_SUB,
674         0,0};
675
676
677 static const char *hex_letters[] = {"'e:j","b'i:","s'i:","d'i:","'i:","'ef"};  // names, using phonemes available to all languages
678
679 int TranslateLetter(Translator *tr, char *word, char *phonemes, int control)
680 {//=========================================================================
681 // get pronunciation for an isolated letter
682 // return number of bytes used by the letter
683 // control bit 0:  a non-initial letter in a word
684 //         bit 1:  say 'capital'
685 //         bit 2:  say character code for unknown letters
686
687         int n_bytes;
688         int letter;
689         int len;
690         int ix;
691         int c;
692         char *p2;
693         char *pbuf;
694         const char *modifier;
695         ALPHABET *alphabet;
696         int al_offset;
697         int al_flags;
698         int language;
699         int number;
700         int phontab_1;
701         int speak_letter_number;
702         char capital[30];
703         char ph_buf[80];
704         char ph_buf2[80];
705         char ph_alphabet[80];
706         char hexbuf[12];
707         static char pause_string[] = {phonPAUSE, 0};
708
709         ph_buf[0] = 0;
710         ph_alphabet[0] = 0;
711         capital[0] = 0;
712         phontab_1 = translator->phoneme_tab_ix;
713
714         n_bytes = utf8_in(&letter,word);
715
716         if((letter & 0xfff00) == 0x0e000)
717         {
718                 letter &= 0xff;   // uncode private usage area
719         }
720
721         if(control & 2)
722         {
723                 // include CAPITAL information
724                 if(iswupper2(letter))
725                 {
726                         Lookup(tr, "_cap", capital);
727                 }
728         }
729         letter = towlower2(letter);
730         LookupLetter(tr, letter, word[n_bytes], ph_buf, control & 1);
731
732         if(ph_buf[0] == 0)
733         {
734                 // is this a subscript or superscript letter ?
735                 for(ix=0; (c = derived_letters[ix]) != 0; ix+=2)
736                 {
737                         if(c > letter)
738                                 break;
739                         if(c == letter)
740                         {
741                                 c = derived_letters[ix+1];
742                                 letter = c & 0x3fff;
743                                 if((modifier = modifiers[c >> 14]) != NULL)
744                                 {
745                                         Lookup(tr, modifier, capital);
746                                         if(capital[0] == 0)
747                                         {
748                                                 capital[2] = SetTranslator2("en");   // overwrites previous contents of translator2
749                                                 Lookup(translator2, modifier, &capital[3]);
750                                                 if(capital[3] != 0)
751                                                 {
752                                                         capital[0] = phonPAUSE;
753                                                         capital[1] = phonSWITCH;
754                                                         len = strlen(&capital[3]);
755                                                         capital[len+3] = phonSWITCH;
756                                                         capital[len+4] = phontab_1;
757                                                         capital[len+5] = 0;
758                                                 }
759                                         }
760                                 }
761                         }
762                 }
763                 LookupLetter(tr, letter, word[n_bytes], ph_buf, control & 1);
764         }
765
766         if(ph_buf[0] == phonSWITCH)
767         {
768                 strcpy(phonemes,ph_buf);
769                 return(0);
770         }
771
772
773         if((ph_buf[0] == 0) && ((number = NonAsciiNumber(letter)) > 0))
774         {
775                 // convert a non-ascii number to 0-9
776                 LookupLetter(tr, number, 0, ph_buf, control & 1);
777         }
778
779         al_offset = 0;
780         al_flags = 0;
781         if((alphabet = AlphabetFromChar(letter)) != NULL)
782         {
783                 al_offset = alphabet->offset;
784                 al_flags = alphabet->flags;
785         }
786
787         if(alphabet != current_alphabet)
788         {
789                 // speak the name of the alphabet
790                 current_alphabet = alphabet;
791                 if((alphabet != NULL) && !(al_flags & AL_DONT_NAME) && (al_offset != translator->letter_bits_offset))
792                 {
793                         if((al_flags & AL_DONT_NAME) || (al_offset == translator->langopts.alt_alphabet) || (al_offset == translator->langopts.our_alphabet))
794                         {
795                                 // don't say the alphabet name
796                         }
797                         else
798                         {
799                                 ph_buf2[0] = 0;
800                                 if(Lookup(translator, alphabet->name, ph_alphabet) == 0)  // the original language for the current voice
801                                 {
802                                         // Can't find the local name for this alphabet, use the English name
803                                         ph_alphabet[2] = SetTranslator2("en");   // overwrites previous contents of translator2
804                                         Lookup(translator2, alphabet->name, ph_buf2);
805                                 }
806                                 else if(translator != tr)
807                                 {
808                                         phontab_1 = tr->phoneme_tab_ix;
809                                         strcpy(ph_buf2, ph_alphabet);
810                                         ph_alphabet[2] = translator->phoneme_tab_ix;
811                                 }
812
813                                 if(ph_buf2[0] != 0)
814                                 {
815                                         // we used a different language for the alphabet name (now in ph_buf2)
816                                         ph_alphabet[0] = phonPAUSE;
817                                         ph_alphabet[1] = phonSWITCH;
818                                         strcpy(&ph_alphabet[3], ph_buf2);
819                                         len = strlen(ph_buf2) + 3;
820                                         ph_alphabet[len] = phonSWITCH;
821                                         ph_alphabet[len+1] = phontab_1;
822                                         ph_alphabet[len+2] = 0;
823                                 }
824                         }
825                 }
826         }
827
828
829 // caution: SetWordStress() etc don't expect phonSWITCH + phoneme table number
830
831         if(ph_buf[0] == 0)
832         {
833                 if((al_offset != 0) && (al_offset == translator->langopts.alt_alphabet))
834                         language = translator->langopts.alt_alphabet_lang;
835                 else
836                 if((alphabet != NULL) && (alphabet->language != 0) && !(al_flags & AL_NOT_LETTERS))
837                         language = alphabet->language;
838                 else
839                         language = L('e','n');
840
841                 if((language != tr->translator_name) || (language == L('k','o')))
842                 {
843                         char *p3;
844                         int initial, code;
845                         char hangul_buf[12];
846
847                         // speak in the language for this alphabet (or English)
848                         ph_buf[2] = SetTranslator2(WordToString2(language));
849
850                         if(((code = letter - 0xac00) >= 0) && (letter <= 0xd7af))
851                         {
852                                 // Special case for Korean letters.
853                                 // break a syllable hangul into 2 or 3 individual jamo
854
855                                 hangul_buf[0] = ' ';
856                                 p3 = &hangul_buf[1];
857                                 if((initial = (code/28)/21) != 11)
858                                 {
859                                         p3 += utf8_out(initial + 0x1100, p3);
860                                 }
861                                 utf8_out(((code/28) % 21) + 0x1161, p3);  // medial
862                                 utf8_out((code % 28) + 0x11a7, &p3[3]);   // final
863                                 p3[6] = ' ';
864                                 p3[7] = 0;
865                                 ph_buf[3] = 0;
866                                 TranslateRules(translator2, &hangul_buf[1], &ph_buf[3], sizeof(ph_buf)-3, NULL, 0, NULL);
867                                 SetWordStress(translator2, &ph_buf[3], NULL, -1, 0);
868                         }
869                         else
870                         {
871                                 LookupLetter(translator2, letter, word[n_bytes], &ph_buf[3], control & 1);
872                         }
873
874                         if(ph_buf[3] == phonSWITCH)
875                         {
876                                 // another level of language change
877                                 ph_buf[2] = SetTranslator2(&ph_buf[4]);
878                                 LookupLetter(translator2, letter, word[n_bytes], &ph_buf[3], control & 1);
879                         }
880
881                         SelectPhonemeTable(voice->phoneme_tab_ix);  // revert to original phoneme table
882
883                         if(ph_buf[3] != 0)
884                         {
885                                 ph_buf[0] = phonPAUSE;
886                                 ph_buf[1] = phonSWITCH;
887                                 len = strlen(&ph_buf[3]) + 3;
888                                 ph_buf[len] = phonSWITCH;  // switch back
889                                 ph_buf[len+1] = tr->phoneme_tab_ix;
890                                 ph_buf[len+2] = 0;
891                         }
892                 }
893         }
894
895         if(ph_buf[0] == 0)
896         {
897                 // character name not found
898
899                 if(ph_buf[0]== 0)
900                 {
901                         speak_letter_number = 1;
902                         if(!(al_flags & AL_NO_SYMBOL))
903                         {
904                                 if(iswalpha2(letter))
905                                         Lookup(translator, "_?A", ph_buf);
906
907                                 if((ph_buf[0]==0) && !iswspace(letter))
908                                         Lookup(translator, "_??", ph_buf);
909
910                                 if(ph_buf[0] == 0)
911                                 {
912                                         EncodePhonemes("l'et@", ph_buf, NULL);
913                                 }
914                         }
915
916                         if(!(control & 4) && (al_flags & AL_NOT_CODE))
917                         {
918                                 // don't speak the character code number, unless we want full details of this character
919                                 speak_letter_number = 0;
920                         }
921
922 //                      if((ph_alphabet[0] != 0) && speak_letter_number)
923 //                              ph_buf[0] = 0;  // don't speak "letter" if we speak alphabet name
924
925                         if(speak_letter_number)
926                         {
927                                 if(al_offset == 0x2800)
928                                 {
929                                         // braille dots symbol, list the numbered dots
930                                         p2 = hexbuf;
931                                         for(ix=0; ix<8; ix++)
932                                         {
933                                                 if(letter & (1 << ix))
934                                                 {
935                                                         *p2++ = '1'+ix;
936                                                 }
937                                         }
938                                         *p2 = 0;
939                                 }
940                                 else
941                                 {
942                                         // speak the hexadecimal number of the character code
943                                         sprintf(hexbuf,"%x",letter);
944                                 }
945
946                                 pbuf = ph_buf;
947                                 for(p2 = hexbuf; *p2 != 0; p2++)
948                                 {
949                                         pbuf += strlen(pbuf);
950                                         *pbuf++ = phonPAUSE_VSHORT;
951                                         LookupLetter(translator, *p2, 0, pbuf, 1);
952                                         if(((pbuf[0] == 0) || (pbuf[0]==phonSWITCH)) && (*p2 >= 'a'))
953                                         {
954                                                 // This language has no translation for 'a' to 'f', speak English names using base phonemes
955                                                 EncodePhonemes(hex_letters[*p2 - 'a'], pbuf, NULL);
956                                         }
957                                 }
958                                 strcat(pbuf, pause_string);
959                         }
960                 }
961         }
962
963         len = strlen(phonemes);
964
965         if(tr->langopts.accents & 2)  // 'capital' before or after the word ?
966                 sprintf(ph_buf2,"%c%s%s%s",0xff,ph_alphabet,ph_buf,capital);
967         else
968                 sprintf(ph_buf2,"%c%s%s%s",0xff,ph_alphabet,capital,ph_buf);  // the 0xff marker will be removed or replaced in SetSpellingStress()
969         if((len + strlen(ph_buf2)) < N_WORD_PHONEMES)
970         {
971                 strcpy(&phonemes[len],ph_buf2);
972         }
973         return(n_bytes);
974 }  // end of TranslateLetter
975
976
977
978 void SetSpellingStress(Translator *tr, char *phonemes, int control, int n_chars)
979 {//=============================================================================
980 // Individual letter names, reduce the stress of some.
981         int ix;
982         unsigned int c;
983         int n_stress=0;
984         int prev = 0;
985         int count;
986         unsigned char buf[N_WORD_PHONEMES];
987
988         for(ix=0; (c = phonemes[ix]) != 0; ix++)
989         {
990                 if((c == phonSTRESS_P) && (prev != phonSWITCH))
991                 {
992                         n_stress++;
993                 }
994                 buf[ix] = prev = c;
995         }
996         buf[ix] = 0;
997
998         count = 0;
999         prev = 0;
1000         for(ix=0; (c = buf[ix]) != 0; ix++)
1001         {
1002                 if((c == phonSTRESS_P) && (n_chars > 1) && (prev != phonSWITCH))
1003                 {
1004                         count++;
1005
1006                         if(tr->langopts.spelling_stress == 1)
1007                         {
1008                                 // stress on initial letter when spelling
1009                                 if(count > 1)
1010                                         c = phonSTRESS_3;
1011                         }
1012                         else
1013                         {
1014                                 if(count != n_stress)
1015                                 {
1016                                         if(((count % 3) != 0) || (count == n_stress-1))
1017                                                 c = phonSTRESS_3;   // reduce to secondary stress
1018                                 }
1019                         }
1020                 }
1021                 else if(c == 0xff)
1022                 {
1023                         if((control < 2) || (ix==0))
1024                                 continue;   // don't insert pauses
1025
1026                         if(control == 4)
1027                                 c = phonPAUSE;    // pause after each character
1028                         if(((count % 3) == 0) || (control > 2))
1029                                 c = phonPAUSE_NOLINK;  // pause following a primary stress
1030                         else
1031                                 c = phonPAUSE_VSHORT;
1032                 }
1033                 *phonemes++ = prev = c;
1034         }
1035         if(control >= 2)
1036                 *phonemes++ = phonPAUSE_NOLINK;
1037         *phonemes = 0;
1038 }  // end of SetSpellingStress
1039
1040
1041
1042 // Numbers
1043
1044 static char ph_ordinal2[12];
1045 static char ph_ordinal2x[12];
1046
1047
1048 static int CheckDotOrdinal(Translator *tr, char *word, char *word_end, WORD_TAB *wtab, int roman)
1049 {//==============================================================================================
1050
1051         int ordinal = 0;
1052         int c2;
1053         int nextflags;
1054
1055         if((tr->langopts.numbers & NUM_ORDINAL_DOT) && ((word_end[0] == '.') || (wtab[0].flags & FLAG_HAS_DOT)) && !(wtab[1].flags & FLAG_NOSPACE))
1056         {
1057                 if(roman || !(wtab[1].flags & FLAG_FIRST_UPPER))
1058                 {
1059                         if(word_end[0] == '.')
1060                                 utf8_in(&c2, &word_end[2]);
1061                         else
1062                                 utf8_in(&c2, &word_end[0]);
1063
1064                         if((word_end[0] != 0) && (word_end[1] != 0) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || IsAlpha(c2)))
1065                         {
1066                                 // ordinal number is indicated by dot after the number
1067                                 // but not if the next word starts with an upper-case letter
1068                                 // (c2 == 0) is for cases such as, "2.,"
1069                                 ordinal = 2;
1070                                 if(word_end[0] == '.')
1071                                         word_end[0] = ' ';
1072
1073                                 if((roman==0) && (tr->translator_name == L('h','u')))
1074                                 {
1075                                         // lang=hu don't treat dot as ordinal indicator if the next word is a month name ($alt). It may have a suffix.
1076                                         nextflags = 0;
1077                                         if(IsAlpha(c2))
1078                                         {
1079                                                 nextflags = TranslateWord(tr, &word_end[2], 0, NULL, NULL);
1080                                         }
1081
1082                                         if((tr->prev_dict_flags[0] & FLAG_ALT_TRANS) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || iswdigit(c2)))
1083                                                 ordinal = 0;   // TEST  09.02.10
1084
1085                                         if(nextflags & FLAG_ALT_TRANS)
1086                                                 ordinal = 0;
1087
1088                                         if(nextflags & FLAG_ALT3_TRANS)
1089                                         {
1090                                                 if(word[-2] == '-')
1091                                                         ordinal = 0;   // eg. december 2-5. között
1092
1093                                                 if(tr->prev_dict_flags[0] & (FLAG_ALT_TRANS | FLAG_ALT3_TRANS))
1094                                                         ordinal = 0x22;
1095                                         }
1096                                 }
1097                         }
1098                 }
1099         }
1100         return(ordinal);
1101 }  // end of CheckDotOrdinal
1102
1103
1104 static int hu_number_e(const char *word, int thousandplex, int value)
1105 {//==================================================================
1106 // lang-hu: variant form of numbers when followed by hyphen and a suffix starting with 'a' or 'e' (but not a, e, az, ez, azt, ezt, att. ett
1107
1108         if((word[0] == 'a') || (word[0] == 'e'))
1109         {
1110                 if((word[1] == ' ') || (word[1] == 'z') || ((word[1] == 't') && (word[2] == 't')))
1111                         return(0);
1112
1113                 if(((thousandplex==1) || ((value % 1000) == 0)) && (word[1] == 'l'))
1114                         return(0);   // 1000-el
1115
1116                 return(1);
1117         }
1118         return(0);
1119 }  // end of hu_numnber_e
1120
1121
1122
1123 int TranslateRoman(Translator *tr, char *word, char *ph_out, WORD_TAB *wtab)
1124 {//=========================================================================
1125         int c;
1126         char *p;
1127         const char *p2;
1128         int acc;
1129         int prev;
1130         int value;
1131         int subtract;
1132         int repeat = 0;
1133         int n_digits = 0;
1134         char *word_start;
1135         int num_control = 0;
1136         unsigned int flags[2];
1137         char ph_roman[30];
1138         char number_chars[N_WORD_BYTES];
1139
1140         static const char *roman_numbers = "ixcmvld";
1141         static int roman_values[] = {1,10,100,1000,5,50,500};
1142
1143         acc = 0;
1144         prev = 0;
1145         subtract = 0x7fff;
1146         ph_out[0] = 0;
1147         flags[0] = 0;
1148         flags[1] = 0;
1149
1150         if(((tr->langopts.numbers & NUM_ROMAN_CAPITALS) && !(wtab[0].flags & FLAG_ALL_UPPER)) || IsDigit09(word[-2]))
1151                 return(0);    // not '2xx'
1152
1153         word_start = word;
1154         while((c = *word++) != ' ')
1155         {
1156                 if((p2 = strchr(roman_numbers,c)) == NULL)
1157                         return(0);
1158
1159                 value = roman_values[p2 - roman_numbers];
1160                 if(value == prev)
1161                 {
1162                         repeat++;
1163                         if(repeat >= 3)
1164                                 return(0);
1165                 }
1166                 else
1167                         repeat = 0;
1168
1169                 if((prev > 1) && (prev != 10) && (prev != 100))
1170                 {
1171                         if(value >= prev)
1172                                 return(0);
1173                 }
1174                 if((prev != 0) && (prev < value))
1175                 {
1176                         if(((acc % 10) != 0) || ((prev*10) < value))
1177                                 return(0);
1178                         subtract = prev;
1179                         value -= subtract;
1180                 }
1181                 else if(value >= subtract)
1182                         return(0);
1183                 else
1184                         acc += prev;
1185                 prev = value;
1186                 n_digits++;
1187         }
1188
1189         if(IsDigit09(word[0]))
1190                 return(0);      // eg. 'xx2'
1191
1192         acc += prev;
1193         if(acc < tr->langopts.min_roman)
1194                 return(0);
1195
1196         if(acc > tr->langopts.max_roman)
1197                 return(0);
1198
1199
1200         Lookup(tr, "_roman",ph_roman);   // precede by "roman" if _rom is defined in *_list
1201         p = &ph_out[0];
1202
1203         if((tr->langopts.numbers & NUM_ROMAN_AFTER) == 0)
1204         {
1205                 strcpy(ph_out,ph_roman);
1206                 p = &ph_out[strlen(ph_roman)];
1207         }
1208
1209         sprintf(number_chars,"  %d    ",acc);
1210
1211         if(word[0] == '.')
1212         {
1213                 // dot has not been removed.  This implies that there was no space after it
1214                 return(0);
1215         }
1216
1217         if(CheckDotOrdinal(tr, word_start, word, wtab, 1))
1218                 wtab[0].flags |= FLAG_ORDINAL;
1219
1220         if(tr->langopts.numbers & NUM_ROMAN_ORDINAL)
1221         {
1222                 if(tr->translator_name == L('h','u'))
1223                 {
1224                         if(!(wtab[0].flags & FLAG_ORDINAL))
1225                         {
1226                                 if((wtab[0].flags & FLAG_HYPHEN_AFTER) && hu_number_e(word, 0, acc))
1227                                 {
1228                                         // should use the 'e' form of the number
1229                                         num_control |= 1;
1230                                 }
1231                                 else
1232                                         return(0);
1233                         }
1234                 }
1235                 else
1236                 {
1237                         wtab[0].flags |= FLAG_ORDINAL;
1238                 }
1239         }
1240
1241         tr->prev_dict_flags[0] = 0;
1242         tr->prev_dict_flags[1] = 0;
1243         TranslateNumber(tr, &number_chars[2], p, flags, wtab, num_control);
1244
1245         if(tr->langopts.numbers & NUM_ROMAN_AFTER)
1246                 strcat(ph_out,ph_roman);
1247
1248         return(1);
1249 }  // end of TranslateRoman
1250
1251
1252 static const char *M_Variant(int value)
1253 {//====================================
1254         // returns M, or perhaps MA or MB for some cases
1255
1256         int teens = 0;
1257
1258         if(((value % 100) > 10) && ((value % 100) < 20))
1259                 teens = 1;
1260
1261         switch((translator->langopts.numbers2 >> 6) & 0x7)
1262         {
1263         case 1:  // lang=ru  use singular for xx1 except for x11
1264                 if((teens == 0) && ((value % 10) == 1))
1265                         return("1M");
1266                 break;
1267
1268         case 2:  // lang=cs,sk
1269                 if((value >= 2) && (value <= 4))
1270                         return("0MA");
1271                 break;
1272
1273         case 3:  // lang=pl
1274                 if((teens == 0) && (((value % 10) >= 2) && ((value % 10) <= 4)))
1275                         return("0MA");
1276                 break;
1277
1278         case 4:  // lang=lt
1279                 if((teens == 1) || ((value % 10) == 0))
1280                         return("0MB");
1281                 if((value % 10) == 1)
1282                         return("0MA");
1283                 break;
1284
1285         case 5:  // lang=bs,hr,sr
1286                 if(teens == 0)
1287                 {
1288                         if((value % 10) == 1)
1289                                 return("1M");
1290                         if(((value % 10) >= 2) && ((value % 10) <= 4))
1291                                 return("0MA");
1292                 }
1293                 break;
1294         }
1295         return("0M");
1296 }
1297
1298
1299 static int LookupThousands(Translator *tr, int value, int thousandplex, int thousands_exact, char *ph_out)
1300 {//=======================================================================================================
1301 // thousands_exact:  bit 0  no hundreds,tens,or units,  bit 1  ordinal numberr
1302         int found;
1303         int found_value=0;
1304         char string[12];
1305         char ph_of[12];
1306         char ph_thousands[40];
1307         char ph_buf[40];
1308
1309         ph_of[0] = 0;
1310
1311         // first look for a match with the exact value of thousands
1312         if(value > 0)
1313         {
1314                 if(thousands_exact & 1)
1315                 {
1316                         if(thousands_exact & 2)
1317                         {
1318                                 // ordinal number
1319                                 sprintf(string,"_%dM%do",value,thousandplex);
1320                                 found_value = Lookup(tr, string, ph_thousands);
1321                         }
1322                         if(!found_value & (number_control & 1))
1323                         {
1324                                 // look for the 'e' variant
1325                                 sprintf(string,"_%dM%de",value,thousandplex);
1326                                 found_value = Lookup(tr, string, ph_thousands);
1327                         }
1328                         if(!found_value)
1329                         {
1330                                 // is there a different pronunciation if there are no hundreds,tens,or units ? (LANG=ta)
1331                                 sprintf(string,"_%dM%dx",value,thousandplex);
1332                                 found_value = Lookup(tr, string, ph_thousands);
1333                         }
1334                 }
1335                 if(found_value == 0)
1336                 {
1337                         sprintf(string,"_%dM%d",value,thousandplex);
1338                         found_value = Lookup(tr, string, ph_thousands);
1339                 }
1340         }
1341
1342         if(found_value == 0)
1343         {
1344                 if((value % 100) >= 20)
1345                 {
1346                         Lookup(tr, "_0of", ph_of);
1347                 }
1348
1349                 found = 0;
1350                 if(thousands_exact & 1)
1351                 {
1352                         if(thousands_exact & 2)
1353                         {
1354                                 // ordinal number
1355                                 sprintf(string,"_%s%do",M_Variant(value), thousandplex);
1356                                 found = Lookup(tr, string, ph_thousands);
1357                         }
1358                         if(!found && (number_control & 1))
1359                         {
1360                                 // look for the 'e' variant
1361                                 sprintf(string,"_%s%de",M_Variant(value), thousandplex);
1362                                 found = Lookup(tr, string, ph_thousands);
1363                         }
1364                         if(!found)
1365                         {
1366                                 // is there a different pronunciation if there are no hundreds,tens,or units ?
1367                                 sprintf(string,"_%s%dx",M_Variant(value), thousandplex);
1368                                 found = Lookup(tr, string, ph_thousands);
1369                         }
1370                 }
1371                 if(found == 0)
1372                 {
1373                         sprintf(string,"_%s%d",M_Variant(value), thousandplex);
1374
1375                         if(Lookup(tr, string, ph_thousands) == 0)
1376                         {
1377                                 if(thousandplex > 3)
1378                                 {
1379                                         sprintf(string,"_0M%d", thousandplex-1);
1380                                         if(Lookup(tr, string, ph_buf) == 0)
1381                                         {
1382                                                 // say "millions" if this name is not available and neither is the next lower
1383                                                 Lookup(tr, "_0M2", ph_thousands);
1384                                                 speak_missing_thousands = 3;
1385                                         }
1386                                 }
1387                                 if(ph_thousands[0] == 0)
1388                                 {
1389                                         // repeat "thousand" if higher order names are not available
1390                                         sprintf(string,"_%dM1",value);
1391                                         if((found_value = Lookup(tr, string, ph_thousands)) == 0)
1392                                                 Lookup(tr, "_0M1", ph_thousands);
1393                                         speak_missing_thousands = 2;
1394                                 }
1395                         }
1396                 }
1397         }
1398         sprintf(ph_out,"%s%s",ph_of,ph_thousands);
1399
1400         if((value == 1) && (thousandplex == 1) && (tr->langopts.numbers & NUM_OMIT_1_THOUSAND))
1401                 return(1);
1402
1403         return(found_value);
1404 }  // end f LookupThousands
1405
1406
1407 static int LookupNum2(Translator *tr, int value, const int control, char *ph_out)
1408 {//=============================================================================
1409 // Lookup a 2 digit number
1410 // control bit 0: ordinal number
1411 // control bit 1: final tens and units (not number of thousands) (use special form of '1', LANG=de "eins")
1412 // control bit 2: tens and units only, no higher digits
1413 // control bit 3: use feminine form of '2' (for thousands
1414 // control bit 4: speak zero tens
1415 // control bit 5: variant of ordinal number (lang=hu)
1416 //         bit 8   followed by decimal fraction
1417
1418         int found;
1419         int ix;
1420         int units;
1421         int tens;
1422         int is_ordinal;
1423         int used_and=0;
1424         int found_ordinal = 0;
1425         int next_phtype;
1426         int ord_type = 'o';
1427         char string[12];  // for looking up entries in *_list
1428         char ph_ordinal[20];
1429         char ph_tens[50];
1430         char ph_digits[50];
1431         char ph_and[12];
1432
1433         units = value % 10;
1434         tens = value / 10;
1435
1436         found = 0;
1437         ph_ordinal[0] = 0;
1438         ph_tens[0] = 0;
1439         ph_digits[0] = 0;
1440         ph_and[0] = 0;
1441
1442         if(control & 0x20)
1443         {
1444                 ord_type = 'q';
1445         }
1446
1447         is_ordinal = control & 1;
1448
1449         if((control & 2) && (n_digit_lookup == 2))
1450         {
1451                 // pronunciation of the final 2 digits has already been found
1452                 strcpy(ph_out, digit_lookup);
1453         }
1454         else
1455         {
1456                 if(digit_lookup[0] == 0)
1457                 {
1458                         // is there a special pronunciation for this 2-digit number
1459                         if(control & 8)
1460                         {
1461                                 // is there a feminine form?
1462                                 sprintf(string,"_%df",value);
1463                                 found = Lookup(tr, string, ph_digits);
1464                         }
1465                         else if(is_ordinal)
1466                         {
1467                                 strcpy(ph_ordinal, ph_ordinal2);
1468
1469                                 if(control & 4)
1470                                 {
1471                                         sprintf(string,"_%d%cx",value,ord_type);  // LANG=hu, special word for 1. 2. when there are no higher digits
1472                                         if((found = Lookup(tr, string, ph_digits)) != 0)
1473                                         {
1474                                                 if(ph_ordinal2x[0] != 0)
1475                                                         strcpy(ph_ordinal, ph_ordinal2x);  // alternate pronunciation (lang=an)
1476                                         }
1477                                 }
1478                                 if(found == 0)
1479                                 {
1480                                         sprintf(string,"_%d%c",value,ord_type);
1481                                         found = Lookup(tr, string, ph_digits);
1482                                 }
1483                                 found_ordinal = found;
1484                         }
1485
1486                         if(found == 0)
1487                         {
1488                                 if(control & 2)
1489                                 {
1490                                         // the final tens and units of a number
1491                                         if(number_control & 1)
1492                                         {
1493                                                 // look for 'e' variant
1494                                                 sprintf(string,"_%de",value);
1495                                                 found = Lookup(tr, string, ph_digits);
1496                                         }
1497                                 }
1498                                 else
1499                                 {
1500                                         // followed by hundreds or thousands etc
1501                                         sprintf(string,"_%da",value);
1502                                         found = Lookup(tr, string, ph_digits);
1503                                 }
1504
1505                                 if(!found)
1506                                 {
1507                                         if((is_ordinal) && (tr->langopts.numbers2 & NUM2_NO_TEEN_ORDINALS))
1508                                         {
1509                                                 // don't use numbers 10-99 to make ordinals, always use _1Xo etc (lang=pt)
1510                                         }
1511                                         else
1512                                         {
1513                                                 sprintf(string,"_%d",value);
1514                                                 found = Lookup(tr, string, ph_digits);
1515                                         }
1516                                 }
1517                         }
1518                 }
1519
1520                 // no, speak as tens+units
1521
1522                 if((control & 0x10) && (value < 10))
1523                 {
1524                         // speak leading zero
1525                         Lookup(tr, "_0", ph_tens);
1526                 }
1527                 else
1528                 {
1529                         if(found)
1530                         {
1531                                 ph_tens[0] = 0;
1532                         }
1533                         else
1534                         {
1535
1536                                 if(is_ordinal)
1537                                 {
1538                                         sprintf(string,"_%dX%c", tens, ord_type);
1539                                         if(Lookup(tr, string, ph_tens) != 0)
1540                                         {
1541                                                 found_ordinal = 1;
1542
1543                                                 if((units != 0) && (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL))
1544                                                 {
1545                                                         // Use the ordinal form of tens as well as units. Add the ordinal ending
1546                                                         strcat(ph_tens, ph_ordinal2);
1547                                                 }
1548                                         }
1549                                 }
1550                                 if(found_ordinal == 0)
1551                                 {
1552                                         sprintf(string,"_%dX", tens);
1553                                         Lookup(tr, string, ph_tens);
1554                                 }
1555
1556                                 if((ph_tens[0] == 0) && (tr->langopts.numbers & NUM_VIGESIMAL))
1557                                 {
1558                                         // tens not found,  (for example) 73 is 60+13
1559                                         units = (value % 20);
1560                                         sprintf(string,"_%dX", tens & 0xfe);
1561                                         Lookup(tr, string, ph_tens);
1562                                 }
1563
1564                                 ph_digits[0] = 0;
1565                                 if(units > 0)
1566                                 {
1567                                         found = 0;
1568
1569                                         if((control & 2) && (digit_lookup[0] != 0))
1570                                         {
1571                                                 // we have an entry for this digit (possibly together with the next word)
1572                                                 strcpy(ph_digits, digit_lookup);
1573                                                 found_ordinal = 1;
1574                                                 ph_ordinal[0] = 0;
1575                                         }
1576                                         else
1577                                         {
1578                                                 if(control & 8)
1579                                                 {
1580                                                         // is there a variant form of this number?
1581                                                         sprintf(string,"_%df",units);
1582                                                         found = Lookup(tr, string, ph_digits);
1583                                                 }
1584                                                 if((is_ordinal) && ((tr->langopts.numbers & NUM_SWAP_TENS) == 0))
1585                                                 {
1586                                                         // ordinal
1587                                                         sprintf(string,"_%d%c",units,ord_type);
1588                                                         if((found = Lookup(tr, string, ph_digits)) != 0)
1589                                                         {
1590                                                                 found_ordinal = 1;
1591                                                         }
1592                                                 }
1593                                                 if(found == 0)
1594                                                 {
1595                                                         if((number_control & 1) && (control & 2))
1596                                                         {
1597                                                                 // look for 'e' variant
1598                                                                 sprintf(string,"_%de",units);
1599                                                                 found = Lookup(tr, string, ph_digits);
1600                                                         }
1601                                                         else if(((control & 2) == 0) || ((tr->langopts.numbers & NUM_SWAP_TENS) != 0))
1602                                                         {
1603                                                                 // followed by hundreds or thousands (or tens)
1604                                                                 sprintf(string,"_%da",units);
1605                                                                 found = Lookup(tr, string, ph_digits);
1606                                                         }
1607                                                 }
1608                                                 if(found == 0)
1609                                                 {
1610                                                         sprintf(string,"_%d",units);
1611                                                         Lookup(tr, string, ph_digits);
1612                                                 }
1613                                         }
1614                                 }
1615                         }
1616                 }
1617
1618                 if((is_ordinal) && (found_ordinal == 0) && (ph_ordinal[0] == 0))
1619                 {
1620                         if((value >= 20) && (((value % 10) == 0) || (tr->langopts.numbers & NUM_SWAP_TENS)))
1621                                 Lookup(tr, "_ord20", ph_ordinal);
1622                         if(ph_ordinal[0] == 0)
1623                                 Lookup(tr, "_ord", ph_ordinal);
1624                 }
1625
1626                 if((tr->langopts.numbers & (NUM_SWAP_TENS | NUM_AND_UNITS)) && (ph_tens[0] != 0) && (ph_digits[0] != 0))
1627                 {
1628                         Lookup(tr, "_0and", ph_and);
1629
1630                         if((is_ordinal) && (tr->langopts.numbers2 & NUM2_ORDINAL_NO_AND))
1631                                 ph_and[0] = 0;
1632
1633                         if(tr->langopts.numbers & NUM_SWAP_TENS)
1634                                 sprintf(ph_out,"%s%s%s%s",ph_digits, ph_and, ph_tens, ph_ordinal);
1635                         else
1636                                 sprintf(ph_out,"%s%s%s%s",ph_tens, ph_and, ph_digits, ph_ordinal);
1637                         used_and = 1;
1638                 }
1639                 else
1640                 {
1641                         if(tr->langopts.numbers & NUM_SINGLE_VOWEL)
1642                         {
1643                                 // remove vowel from the end of tens if units starts with a vowel (LANG=Italian)
1644                                 if(((ix = strlen(ph_tens)-1) >= 0) && (ph_digits[0] != 0))
1645                                 {
1646                                         if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS)
1647                                                 next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type;
1648
1649                                         if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL))
1650                                                 ph_tens[ix] = 0;
1651                                 }
1652                         }
1653                         sprintf(ph_out,"%s%s%s",ph_tens, ph_digits, ph_ordinal);
1654                 }
1655         }
1656
1657         if(tr->langopts.numbers & NUM_SINGLE_STRESS_L)
1658         {
1659                 // only one primary stress, on the first part (tens)
1660                 found = 0;
1661                 for(ix=0; ix < (signed)strlen(ph_out); ix++)
1662                 {
1663                         if(ph_out[ix] == phonSTRESS_P)
1664                         {
1665                                 if(found)
1666                                         ph_out[ix] = phonSTRESS_3;
1667                                 else
1668                                         found = 1;
1669                         }
1670                 }
1671         }
1672         else if(tr->langopts.numbers & NUM_SINGLE_STRESS)
1673         {
1674                 // only one primary stress
1675                 found = 0;
1676                 for(ix=strlen(ph_out)-1; ix>=0; ix--)
1677                 {
1678                         if(ph_out[ix] == phonSTRESS_P)
1679                         {
1680                                 if(found)
1681                                         ph_out[ix] = phonSTRESS_3;
1682                                 else
1683                                         found = 1;
1684                         }
1685                 }
1686         }
1687         return(used_and);
1688 }  // end of LookupNum2
1689
1690
1691 static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null, int thousandplex, int control)
1692 {//=============================================================================================================
1693 // Translate a 3 digit number
1694 //  control  bit 0,  previous thousands
1695 //           bit 1,  ordinal number
1696 //           bit 5   variant form of ordinal number
1697 //           bit 8   followed by decimal fraction
1698         int found;
1699         int hundreds;
1700         int tensunits;
1701         int x;
1702         int ix;
1703         int exact;
1704         int ordinal;
1705         int tplex;
1706         int say_zero_hundred=0;
1707         char string[12];  // for looking up entries in **_list
1708         char buf1[100];
1709         char buf2[100];
1710         char ph_100[20];
1711         char ph_10T[20];
1712         char ph_digits[50];
1713         char ph_thousands[50];
1714         char ph_hundred_and[12];
1715         char ph_thousand_and[12];
1716
1717         ordinal = control & 0x22;
1718         hundreds = value / 100;
1719         tensunits = value % 100;
1720         buf1[0] = 0;
1721
1722         ph_thousands[0] = 0;
1723         ph_thousand_and[0] = 0;
1724
1725         if((tr->langopts.numbers & NUM_ZERO_HUNDRED) && ((control & 1) || (hundreds >= 10)))
1726         {
1727                 say_zero_hundred = 1;  // lang=vi
1728         }
1729
1730         if((hundreds > 0) || say_zero_hundred)
1731         {
1732                 found = 0;
1733                 if(ordinal && (tensunits == 0))
1734                 {
1735                         // ordinal number, with no tens or units
1736                         found = Lookup(tr, "_0Co", ph_100);
1737                 }
1738                 if(found == 0)
1739                 {
1740                         if(tensunits==0)
1741                         {
1742                                 // special form for exact hundreds?
1743                                 found = Lookup(tr, "_0C0", ph_100);
1744                         }
1745                         if(!found)
1746                         {
1747                                 Lookup(tr, "_0C", ph_100);
1748                         }
1749                 }
1750
1751                 if(((tr->langopts.numbers & NUM_1900) != 0) && (hundreds == 19))
1752                 {
1753                         // speak numbers such as 1984 as years: nineteen-eighty-four
1754 //                      ph_100[0] = 0;   // don't say "hundred", we also need to surpess "and"
1755                 }
1756                 else if(hundreds >= 10)
1757                 {
1758                         ph_digits[0] = 0;
1759
1760                         exact = 0;
1761                         if ((value % 1000) == 0)
1762                                 exact = 1;
1763
1764                         tplex = thousandplex+1;
1765                         if(tr->langopts.numbers2 & NUM2_MYRIADS)
1766                         {
1767                                 tplex = 0;
1768                         }
1769
1770                         if(LookupThousands(tr, hundreds / 10, tplex, exact | ordinal, ph_10T) == 0)
1771                         {
1772                                 x = 0;
1773                                 if(tr->langopts.numbers2 & (1 << tplex))
1774                                         x = 8;   // use variant (feminine) for before thousands and millions
1775                                 LookupNum2(tr, hundreds/10, x, ph_digits);
1776                         }
1777
1778                         if(tr->langopts.numbers2 & 0x200)
1779                                 sprintf(ph_thousands,"%s%c%s%c",ph_10T,phonEND_WORD,ph_digits,phonEND_WORD);  // say "thousands" before its number, not after
1780                         else
1781                                 sprintf(ph_thousands,"%s%c%s%c",ph_digits,phonEND_WORD,ph_10T,phonEND_WORD);
1782
1783                         hundreds %= 10;
1784                         if((hundreds == 0) && (say_zero_hundred == 0))
1785                                 ph_100[0] = 0;
1786                         suppress_null = 1;
1787                 }
1788
1789                 ph_digits[0] = 0;
1790
1791                 if((hundreds > 0) || say_zero_hundred)
1792                 {
1793                         if((tr->langopts.numbers & NUM_AND_HUNDRED) && ((control & 1) || (ph_thousands[0] != 0)))
1794                         {
1795                                 Lookup(tr, "_0and", ph_thousand_and);
1796                         }
1797
1798                         suppress_null = 1;
1799
1800                         found = 0;
1801                         if((ordinal)
1802                                         && ((tensunits == 0) || (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL)))
1803                         {
1804                                 // ordinal number
1805                                 sprintf(string, "_%dCo", hundreds);
1806                                 found = Lookup(tr, string, ph_digits);
1807
1808                                 if((tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL) && (tensunits > 0))
1809                                 {
1810                                         // Use ordinal form of hundreds, as well as for tens and units
1811                                         // Add ordinal suffix to the hundreds
1812                                         strcat(ph_digits, ph_ordinal2);
1813                                 }
1814                         }
1815
1816                         if((hundreds == 0) && say_zero_hundred)
1817                         {
1818                                 Lookup(tr, "_0", ph_digits);
1819                         }
1820                         else
1821                         {
1822                                 if((!found) && (tensunits == 0))
1823                                 {
1824                                         // is there a special pronunciation for exactly n00 ?
1825                                         sprintf(string,"_%dC0",hundreds);
1826                                         found = Lookup(tr, string, ph_digits);
1827                                 }
1828
1829                                 if(!found)
1830                                 {
1831                                         sprintf(string,"_%dC",hundreds);
1832                                         found = Lookup(tr, string, ph_digits);  // is there a specific pronunciation for n-hundred ?
1833                                 }
1834
1835                                 if(found)
1836                                 {
1837                                         ph_100[0] = 0;
1838                                 }
1839                                 else
1840                                 {
1841                                         if((hundreds > 1) || ((tr->langopts.numbers & NUM_OMIT_1_HUNDRED) == 0))
1842                                         {
1843                                                 LookupNum2(tr, hundreds, 0, ph_digits);
1844                                         }
1845                                 }
1846                         }
1847                 }
1848
1849                 sprintf(buf1,"%s%s%s%s",ph_thousands,ph_thousand_and,ph_digits,ph_100);
1850         }
1851
1852         ph_hundred_and[0] = 0;
1853         if(tensunits > 0)
1854         {
1855                 if((control & 2) && (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL))
1856                 {
1857                         // Don't use "and" if we apply ordinal to both hundreds and units
1858                 }
1859                 else
1860                 {
1861                         if((value > 100) || ((control & 1) && (thousandplex==0)))
1862                         {
1863                                 if((tr->langopts.numbers & NUM_HUNDRED_AND) || ((tr->langopts.numbers & NUM_HUNDRED_AND_DIGIT) && (tensunits < 10)))
1864                                 {
1865                                         Lookup(tr, "_0and", ph_hundred_and);
1866                                 }
1867                         }
1868                         if((tr->langopts.numbers & NUM_THOUSAND_AND) && (hundreds == 0) && ((control & 1) || (ph_thousands[0] != 0)))
1869                         {
1870                                 Lookup(tr, "_0and", ph_hundred_and);
1871                         }
1872                 }
1873         }
1874
1875
1876         buf2[0] = 0;
1877
1878         if((tensunits != 0) || (suppress_null == 0))
1879         {
1880                 x = 0;
1881                 if(thousandplex==0)
1882                 {
1883                         x = 2;   // allow "eins" for 1 rather than "ein"
1884                         if(ordinal)
1885                                 x = 3;   // ordinal number
1886                         if((value < 100) && !(control & 1))
1887                                 x |= 4;   // tens and units only, no higher digits
1888                         if(ordinal & 0x20)
1889                                 x |= 0x20;  // variant form of ordinal number
1890                 }
1891                 else
1892                 {
1893                         if(tr->langopts.numbers2 & (1 << thousandplex))
1894                                 x = 8;   // use variant (feminine) for before thousands and millions
1895                 }
1896
1897                 if(LookupNum2(tr, tensunits, x | (control & 0x100), buf2) != 0)
1898                 {
1899                         if(tr->langopts.numbers & NUM_SINGLE_AND)
1900                                 ph_hundred_and[0] = 0;  // don't put 'and' after 'hundred' if there's 'and' between tens and units
1901                 }
1902         }
1903         else
1904         {
1905                 if(ph_ordinal2[0] != 0)
1906                 {
1907                         ix = strlen(buf1);
1908                         if((ix > 0) && (buf1[ix-1] == phonPAUSE_SHORT))
1909                                 buf1[ix-1] = 0;   // remove pause before addding ordinal suffix
1910                         strcpy(buf2, ph_ordinal2);
1911                 }
1912         }
1913
1914         sprintf(ph_out,"%s%s%c%s",buf1,ph_hundred_and,phonEND_WORD,buf2);
1915
1916         return(0);
1917 }  // end of LookupNum3
1918
1919
1920 bool CheckThousandsGroup(char *word, int group_len)
1921 {//================================================
1922 // Is this a group of 3 digits which looks like a thousands group?
1923         int ix;
1924
1925         if(IsDigit09(word[group_len]) || IsDigit09(-1))
1926                 return(false);
1927
1928         for(ix=0; ix < group_len; ix++)
1929         {
1930                 if(!IsDigit09(word[ix]))
1931                         return(false);
1932         }
1933         return(true);
1934 }
1935
1936
1937 static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control)
1938 {//=====================================================================================================================
1939 //  Number translation with various options
1940 // the "word" may be up to 4 digits
1941 // "words" of 3 digits may be preceded by another number "word" for thousands or millions
1942
1943         int n_digits;
1944         int value;
1945         int ix;
1946         int digix;
1947         unsigned char c;
1948         int suppress_null = 0;
1949         int decimal_point = 0;
1950         int thousandplex = 0;
1951         int thousands_exact = 1;
1952         int thousands_inc = 0;
1953         int prev_thousands = 0;
1954         int ordinal = 0;
1955         int this_value;
1956         int decimal_count;
1957         int max_decimal_count;
1958         int decimal_mode;
1959         int suffix_ix;
1960         int skipwords = 0;
1961         int group_len;
1962         int len;
1963         char *p;
1964         char string[32];  // for looking up entries in **_list
1965         char buf1[100];
1966         char ph_append[50];
1967         char ph_buf[200];
1968         char ph_buf2[50];
1969         char ph_zeros[50];
1970         char suffix[30];  // string[] must be long enough for sizeof(suffix)+2
1971         char buf_digit_lookup[50];
1972
1973         static const char str_pause[2] = {phonPAUSE_NOLINK,0};
1974
1975         *flags = 0;
1976         n_digit_lookup = 0;
1977         buf_digit_lookup[0] = 0;
1978         digit_lookup = buf_digit_lookup;
1979         number_control = control;
1980
1981         for(ix=0; IsDigit09(word[ix]); ix++) ;
1982         n_digits = ix;
1983         value = this_value = atoi(word);
1984
1985         group_len = 3;
1986         if(tr->langopts.numbers2 & NUM2_MYRIADS)
1987                 group_len = 4;
1988
1989         // is there a previous thousands part (as a previous "word") ?
1990         if((n_digits == group_len) && (word[-2] == tr->langopts.thousands_sep) && IsDigit09(word[-3]))
1991         {
1992                 prev_thousands = 1;
1993         }
1994         else if((tr->langopts.thousands_sep == ' ') || (tr->langopts.numbers & NUM_ALLOW_SPACE))
1995         {
1996                 // thousands groups can be separated by spaces
1997                 if((n_digits == 3) && !(wtab->flags & FLAG_MULTIPLE_SPACES) && IsDigit09(word[-2]))
1998                 {
1999                         prev_thousands = 1;
2000                 }
2001         }
2002         if(prev_thousands == 0)
2003         {
2004                 speak_missing_thousands = 0;
2005         }
2006
2007         ph_ordinal2[0] = 0;
2008         ph_zeros[0] = 0;
2009
2010         if(prev_thousands || (word[0] != '0'))
2011         {
2012                 // don't check for ordinal if the number has a leading zero
2013                 if((ordinal = CheckDotOrdinal(tr, word, &word[ix], wtab, 0)) != 0)
2014                 {
2015 //                      dot_ordinal = 1;
2016                 }
2017         }
2018
2019         if((word[ix] == '.') && !IsDigit09(word[ix+1]) && !IsDigit09(word[ix+2]) && !(wtab[1].flags & FLAG_NOSPACE))
2020         {
2021                 // remove dot unless followed by another number
2022                 word[ix] = 0;
2023         }
2024
2025         if((ordinal == 0) || (tr->translator_name == L('h','u')))
2026         {
2027 // NOTE lang=hu, allow both dot and ordinal suffix, eg. "december 21.-én"
2028                 // look for an ordinal number suffix after the number
2029                 ix++;
2030                 p = suffix;
2031                 if(wtab[0].flags & FLAG_HYPHEN_AFTER)
2032                 {
2033                         *p++ = '-';
2034                         ix++;
2035                 }
2036                 while((word[ix] != 0) && (word[ix] != ' ') && (ix < (int)(sizeof(suffix)-1)))
2037                 {
2038                         *p++ = word[ix++];
2039                 }
2040                 *p = 0;
2041
2042                 if(suffix[0] != 0)
2043                 {
2044                         if((tr->langopts.ordinal_indicator != NULL) && (strcmp(suffix, tr->langopts.ordinal_indicator) == 0))
2045                         {
2046                                 ordinal = 2;
2047                         }
2048                         else if(!IsDigit09(suffix[0]))  // not _#9 (tab)
2049                         {
2050                                 sprintf(string,"_#%s",suffix);
2051                                 if(Lookup(tr, string, ph_ordinal2))
2052                                 {
2053                                         // this is an ordinal suffix
2054                                         ordinal = 2;
2055                                         flags[0] |= FLAG_SKIPWORDS;
2056                                         skipwords = 1;
2057                                         sprintf(string,"_x#%s",suffix);
2058                                         Lookup(tr, string, ph_ordinal2x);  // is there an alternate pronunciation?
2059                                 }
2060                         }
2061                 }
2062         }
2063
2064         if(wtab[0].flags & FLAG_ORDINAL)
2065                 ordinal = 2;
2066
2067         ph_append[0] = 0;
2068         ph_buf2[0] = 0;
2069
2070
2071         if((word[0] == '0') && (prev_thousands == 0) && (word[1] != ' ') && (word[1] != tr->langopts.decimal_sep))
2072         {
2073                 if((n_digits == 2) && (word[3] == ':') && IsDigit09(word[5]) && isspace(word[7]))
2074                 {
2075                         // looks like a time 02:30, omit the leading zero
2076                 }
2077                 else
2078                 {
2079                         if(n_digits > 3)
2080                         {
2081                                 flags[0] &= ~FLAG_SKIPWORDS;
2082                                 return(0);     // long number string with leading zero, speak as individual digits
2083                         }
2084
2085                         // speak leading zeros
2086                         for(ix=0; (word[ix] == '0') && (ix < (n_digits-1)); ix++)
2087                         {
2088                                 Lookup(tr, "_0", &ph_zeros[strlen(ph_zeros)]);
2089                         }
2090                 }
2091         }
2092
2093         if((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[n_digits] == ' '))
2094                 thousands_inc = 1;
2095         else if(word[n_digits] == tr->langopts.thousands_sep)
2096                 thousands_inc = 2;
2097
2098         suffix_ix = n_digits+2;
2099         if(thousands_inc > 0)
2100         {
2101                 // if the following "words" are three-digit groups, count them and add
2102                 // a "thousand"/"million" suffix to this one
2103                 digix = n_digits + thousands_inc;
2104
2105                 while(((wtab[thousandplex+1].flags & FLAG_MULTIPLE_SPACES) == 0) && CheckThousandsGroup(&word[digix], group_len))
2106                 {
2107                         for(ix=0; ix<group_len; ix++)
2108                         {
2109                                 if(word[digix+ix] != '0')
2110                                 {
2111                                         thousands_exact = 0;
2112                                         break;
2113                                 }
2114                         }
2115
2116                         thousandplex++;
2117                         digix += group_len;
2118                         if((word[digix] == tr->langopts.thousands_sep) || ((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[digix] == ' ')))
2119                         {
2120                                 suffix_ix = digix+2;
2121                                 digix += thousands_inc;
2122                         }
2123                         else
2124                                 break;
2125                 }
2126         }
2127
2128         if((value == 0) && prev_thousands)
2129         {
2130                 suppress_null = 1;
2131         }
2132
2133         if(tr->translator_name == L('h','u'))
2134         {
2135                 // variant form of numbers when followed by hyphen and a suffix starting with 'a' or 'e' (but not a, e, az, ez, azt, ezt
2136                 if((wtab[thousandplex].flags & FLAG_HYPHEN_AFTER) && (thousands_exact==1) && hu_number_e(&word[suffix_ix], thousandplex, value))
2137                 {
2138                         number_control |= 1;  // use _1e variant of number
2139                 }
2140         }
2141
2142         if((word[n_digits] == tr->langopts.decimal_sep) && IsDigit09(word[n_digits+1]))
2143         {
2144                 // this "word" ends with a decimal point
2145                 Lookup(tr, "_dpt", ph_append);
2146                 decimal_point = 0x100;
2147         }
2148         else if(suppress_null == 0)
2149         {
2150                 if(thousands_inc > 0)
2151                 {
2152                         if(thousandplex > 0)
2153 //                      if((thousandplex > 0) && (value < 1000))
2154                         {
2155                                 if((suppress_null == 0) && (LookupThousands(tr,value,thousandplex, thousands_exact, ph_append)))
2156                                 {
2157                                         // found an exact match for N thousand
2158                                         value = 0;
2159                                         suppress_null = 1;
2160                                 }
2161                         }
2162                 }
2163         }
2164         else
2165
2166                 if(speak_missing_thousands == 1)
2167                 {
2168                         // speak this thousandplex if there was no word for the previous thousandplex
2169                         sprintf(string,"_0M%d",thousandplex+1);
2170                         if(Lookup(tr, string, buf1)==0)
2171                         {
2172                                 sprintf(string,"_0M%d",thousandplex);
2173                                 Lookup(tr, string, ph_append);
2174                         }
2175                 }
2176
2177         if((ph_append[0] == 0) && (word[n_digits] == '.') && (thousandplex == 0))
2178         {
2179                 Lookup(tr, "_.", ph_append);
2180         }
2181
2182         if(thousandplex == 0)
2183         {
2184                 char *p2;
2185                 // look for combinations of the number with the next word
2186                 p = word;
2187                 while(IsDigit09(p[1])) p++;  // just use the last digit
2188                 if(IsDigit09(p[-1]))
2189                 {
2190                         p2 = p - 1;
2191                         if(LookupDictList(tr, &p2, buf_digit_lookup, flags, FLAG_SUFX, wtab))  // lookup 2 digits
2192                         {
2193                                 n_digit_lookup = 2;
2194                         }
2195                 }
2196
2197 //              if((buf_digit_lookup[0] == 0) && (*p != '0') && (dot_ordinal==0))
2198                 if((buf_digit_lookup[0] == 0) && (*p != '0'))
2199                 {
2200                         // LANG=hu ?
2201                         // not found, lookup only the last digit (?? but not if dot-ordinal has been found)
2202                         if(LookupDictList(tr, &p, buf_digit_lookup, flags, FLAG_SUFX, wtab))  // don't match '0', or entries with $only
2203                         {
2204                                 n_digit_lookup = 1;
2205                         }
2206                 }
2207
2208                 if(prev_thousands == 0)
2209                 {
2210                         if((decimal_point == 0) && (ordinal == 0))
2211                         {
2212                                 // Look for special pronunciation for this number in isolation (LANG=kl)
2213                                 sprintf(string, "_%dn", value);
2214                                 if(Lookup(tr, string, ph_out))
2215                                 {
2216                                         return(1);
2217                                 }
2218                         }
2219
2220                         if(tr->langopts.numbers2 & NUM2_PERCENT_BEFORE)
2221                         {
2222                                 // LANG=si, say "percent" before the number
2223                                 p2 = word;
2224                                 while((*p2 != ' ') && (*p2 != 0))
2225                                 {
2226                                         p2++;
2227                                 }
2228                                 if(p2[1] == '%')
2229                                 {
2230                                         Lookup(tr, "%", ph_out);
2231                                         ph_out += strlen(ph_out);
2232                                         p2[1] = ' ';
2233                                 }
2234                         }
2235                 }
2236
2237         }
2238
2239         LookupNum3(tr, value, ph_buf, suppress_null, thousandplex, prev_thousands | ordinal | decimal_point);
2240         if((thousandplex > 0) && (tr->langopts.numbers2 & 0x200))
2241                 sprintf(ph_out,"%s%s%c%s%s",ph_zeros,ph_append,phonEND_WORD,ph_buf2,ph_buf);  // say "thousands" before its number
2242         else
2243                 sprintf(ph_out,"%s%s%s%c%s",ph_zeros,ph_buf2,ph_buf,phonEND_WORD,ph_append);
2244
2245
2246         while(decimal_point)
2247         {
2248                 n_digits++;
2249
2250                 decimal_count = 0;
2251                 while(IsDigit09(word[n_digits+decimal_count]))
2252                         decimal_count++;
2253
2254 //              if(decimal_count > 1)
2255                 {
2256                         max_decimal_count = 2;
2257                         switch(decimal_mode = (tr->langopts.numbers & 0xe000))
2258                         {
2259                         case NUM_DFRACTION_4:
2260                                 max_decimal_count = 5;
2261                         case NUM_DFRACTION_2:
2262                                 // French/Polish decimal fraction
2263                                 while(word[n_digits] == '0')
2264                                 {
2265                                         Lookup(tr, "_0", buf1);
2266                                         strcat(ph_out,buf1);
2267                                         decimal_count--;
2268                                         n_digits++;
2269                                 }
2270                                 if((decimal_count <= max_decimal_count) && IsDigit09(word[n_digits]))
2271                                 {
2272                                         LookupNum3(tr, atoi(&word[n_digits]), buf1, 0,0,0);
2273                                         strcat(ph_out,buf1);
2274                                         n_digits += decimal_count;
2275                                 }
2276                                 break;
2277
2278                         case NUM_DFRACTION_1:   // italian, say "hundredths" if leading zero
2279                         case NUM_DFRACTION_5:   // hungarian, always say "tenths" etc.
2280                         case NUM_DFRACTION_6:   // kazakh, always say "tenths" etc, before the decimal fraction
2281                                 LookupNum3(tr, atoi(&word[n_digits]), ph_buf, 0,0,0);
2282                                 if((word[n_digits]=='0') || (decimal_mode != NUM_DFRACTION_1))
2283                                 {
2284                                         // decimal part has leading zeros, so add a "hundredths" or "thousandths" suffix
2285                                         sprintf(string,"_0Z%d",decimal_count);
2286                                         if(Lookup(tr, string, buf1) == 0)
2287                                                 break;   // revert to speaking single digits
2288
2289                                         if(decimal_mode == NUM_DFRACTION_6)
2290                                                 strcat(ph_out, buf1);
2291                                         else
2292                                                 strcat(ph_buf, buf1);
2293                                 }
2294                                 strcat(ph_out,ph_buf);
2295                                 n_digits += decimal_count;
2296                                 break;
2297
2298                         case NUM_DFRACTION_3:
2299                                 // Romanian decimal fractions
2300                                 if((decimal_count <= 4) && (word[n_digits] != '0'))
2301                                 {
2302                                         LookupNum3(tr, atoi(&word[n_digits]), buf1, 0,0,0);
2303                                         strcat(ph_out,buf1);
2304                                         n_digits += decimal_count;
2305                                 }
2306                                 break;
2307
2308                         case NUM_DFRACTION_7:
2309                                 // alternative form of decimal fraction digits, except the final digit
2310                                 while(decimal_count-- > 1)
2311                                 {
2312                                         sprintf(string,"_%cd", word[n_digits]);
2313                                         if(Lookup(tr, string, buf1) == 0)
2314                                                 break;
2315                                         n_digits++;
2316                                         strcat(ph_out, buf1);
2317                                 }
2318                         }
2319                 }
2320
2321                 while(IsDigit09(c = word[n_digits]) && (strlen(ph_out) < (N_WORD_PHONEMES - 10)))
2322                 {
2323                         // speak any remaining decimal fraction digits individually
2324                         value = word[n_digits++] - '0';
2325                         LookupNum2(tr, value, 2, buf1);
2326                         len = strlen(ph_out);
2327                         sprintf(&ph_out[len],"%c%s", phonEND_WORD, buf1);
2328                 }
2329
2330                 // something after the decimal part ?
2331                 if(Lookup(tr, "_dpt2", buf1))
2332                         strcat(ph_out,buf1);
2333
2334                 if((c == tr->langopts.decimal_sep) && IsDigit09(word[n_digits+1]))
2335                 {
2336                         Lookup(tr, "_dpt", buf1);
2337                         strcat(ph_out,buf1);
2338                 }
2339                 else
2340                 {
2341                         decimal_point = 0;
2342                 }
2343         }
2344         if((ph_out[0] != 0) && (ph_out[0] != phonSWITCH))
2345         {
2346                 int next_char;
2347                 char *p;
2348                 p = &word[n_digits+1];
2349
2350                 p += utf8_in(&next_char,p);
2351                 if((tr->langopts.numbers & NUM_NOPAUSE) && (next_char == ' '))
2352                         utf8_in(&next_char,p);
2353
2354                 if(!iswalpha2(next_char) && (thousands_exact==0))
2355 //              if(!iswalpha2(next_char) && !((wtab[thousandplex].flags & FLAG_HYPHEN_AFTER) && (thousands_exact != 0)))
2356                         strcat(ph_out,str_pause);  // don't add pause for 100s,  6th, etc.
2357         }
2358
2359         *flags |= FLAG_FOUND;
2360         speak_missing_thousands--;
2361
2362         if(skipwords)
2363                 dictionary_skipwords = skipwords;
2364         return(1);
2365 }  // end of TranslateNumber_1
2366
2367
2368
2369 int TranslateNumber(Translator *tr, char *word1, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control)
2370 {//=============================================================================================================
2371         if((option_sayas == SAYAS_DIGITS1) || (wtab[0].flags & FLAG_INDIVIDUAL_DIGITS))
2372                 return(0);  // speak digits individually
2373
2374         if(tr->langopts.numbers != 0)
2375         {
2376                 return(TranslateNumber_1(tr, word1, ph_out, flags, wtab, control));
2377         }
2378         return(0);
2379 }  // end of TranslateNumber
2380