include/espeak/src/numbers.cpp

   1 /***************************************************************************
   2  *   Copyright (C) 2005 to 2013 by Jonathan Duddington                     *
   3  *   email: jonsd@users.sourceforge.net                                    *
   4  *                                                                         *
   5  *   This program is free software; you can redistribute it and/or modify  *
   6  *   it under the terms of the GNU General Public License as published by  *
   7  *   the Free Software Foundation; either version 3 of the License, or     *
   8  *   (at your option) any later version.                                   *
   9  *                                                                         *
  10  *   This program is distributed in the hope that it will be useful,       *
  11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
  12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
  13  *   GNU General Public License for more details.                          *
  14  *                                                                         *
  15  *   You should have received a copy of the GNU General Public License     *
  16  *   along with this program; if not, see:                                 *
  17  *               <http://www.gnu.org/licenses/>.                           *
  18  ***************************************************************************/
  19
  20 #include "StdAfx.h"
  21
  22 #include <stdio.h>
  23 #include <ctype.h>
  24 #include <stdlib.h>
  25 #include <string.h>
  26
  27 #include <wctype.h>
  28 #include <wchar.h>
  29
  30 #include "speak_lib.h"
  31 #include "speech.h"
  32 #include "phoneme.h"
  33 #include "synthesize.h"
  34 #include "voice.h"
  35 #include "translate.h"
  36
  37
  38
  39 #define M_NAME      0
  40 #define M_SMALLCAP  1
  41 #define M_TURNED    2
  42 #define M_REVERSED  3
  43 #define M_CURL      4
  44
  45 #define M_ACUTE     5
  46 #define M_BREVE     6
  47 #define M_CARON     7
  48 #define M_CEDILLA   8
  49 #define M_CIRCUMFLEX 9
  50 #define M_DIAERESIS 10
  51 #define M_DOUBLE_ACUTE 11
  52 #define M_DOT_ABOVE 12
  53 #define M_GRAVE     13
  54 #define M_MACRON    14
  55 #define M_OGONEK    15
  56 #define M_RING      16
  57 #define M_STROKE    17
  58 #define M_TILDE     18
  59
  60 #define M_BAR       19
  61 #define M_RETROFLEX 20
  62 #define M_HOOK      21
  63
  64
  65 #define M_MIDDLE_DOT  M_DOT_ABOVE  // duplicate of M_DOT_ABOVE
  66 #define M_IMPLOSIVE   M_HOOK
  67
  68 static int n_digit_lookup;
  69 static char *digit_lookup;
  70 static int speak_missing_thousands;
  71 static int number_control;
  72
  73
  74 typedef struct {
  75         const char *name;
  76         int  flags;
  77 } ACCENTS;
  78
  79 // these are tokens to look up in the *_list file.
  80 static ACCENTS accents_tab[] = {
  81         {"_lig", 1},
  82         {"_smc", 1},  // smallcap
  83         {"_tur", 1},  // turned
  84         {"_rev", 1},  // reversed
  85         {"_crl", 0},  // curl
  86
  87         {"_acu", 0},  // acute
  88         {"_brv", 0},  // breve
  89         {"_hac", 0},  // caron/hacek
  90         {"_ced", 0},  // cedilla
  91         {"_cir", 0},  // circumflex
  92         {"_dia", 0},  // diaeresis
  93         {"_ac2", 0},  // double acute
  94         {"_dot", 0},  // dot
  95         {"_grv", 0},  // grave
  96         {"_mcn", 0},  // macron
  97         {"_ogo", 0},  // ogonek
  98         {"_rng", 0},  // ring
  99         {"_stk", 0},  // stroke
 100         {"_tld", 0},  // tilde
 101
 102         {"_bar", 0},  // bar
 103         {"_rfx", 0},  // retroflex
 104         {"_hok", 0},  // hook
 105 };
 106
 107
 108 #define CAPITAL  0
 109 #define LETTER(ch,mod1,mod2) (ch-59)+(mod1 << 6)+(mod2 << 11)
 110 #define LIGATURE(ch1,ch2,mod1) (ch1-59)+((ch2-59) << 6)+(mod1 << 12)+0x8000
 111
 112
 113 #define L_ALPHA  60   // U+3B1
 114 #define L_SCHWA  61   // U+259
 115 #define L_OPEN_E 62   // U+25B
 116 #define L_GAMMA  63   // U+3B3
 117 #define L_IOTA   64   // U+3B9
 118 #define L_OE     65   // U+153
 119 #define L_OMEGA  66   // U+3C9
 120
 121 #define L_PHI    67   // U+3C6
 122 #define L_ESH    68   // U+283
 123 #define L_UPSILON 69 // U+3C5
 124 #define L_EZH     70 // U+292
 125 #define L_GLOTTAL 71 // U+294
 126 #define L_RTAP    72 // U+27E
 127
 128
 129 static const short non_ascii_tab[] = {
 130         0, 0x3b1, 0x259, 0x25b, 0x3b3, 0x3b9, 0x153, 0x3c9,
 131         0x3c6, 0x283, 0x3c5, 0x292, 0x294, 0x27e
 132 };
 133
 134
 135 // characters U+00e0 to U+017f
 136 static const unsigned short letter_accents_0e0[] = {
 137         LETTER('a',M_GRAVE,0),   // U+00e0
 138         LETTER('a',M_ACUTE,0),
 139         LETTER('a',M_CIRCUMFLEX,0),
 140         LETTER('a',M_TILDE,0),
 141         LETTER('a',M_DIAERESIS,0),
 142         LETTER('a',M_RING,0),
 143         LIGATURE('a','e',0),
 144         LETTER('c',M_CEDILLA,0),
 145         LETTER('e',M_GRAVE,0),
 146         LETTER('e',M_ACUTE,0),
 147         LETTER('e',M_CIRCUMFLEX,0),
 148         LETTER('e',M_DIAERESIS,0),
 149         LETTER('i',M_GRAVE,0),
 150         LETTER('i',M_ACUTE,0),
 151         LETTER('i',M_CIRCUMFLEX,0),
 152         LETTER('i',M_DIAERESIS,0),
 153         LETTER('d',M_NAME,0),  // eth  // U+00f0
 154         LETTER('n',M_TILDE,0),
 155         LETTER('o',M_GRAVE,0),
 156         LETTER('o',M_ACUTE,0),
 157         LETTER('o',M_CIRCUMFLEX,0),
 158         LETTER('o',M_TILDE,0),
 159         LETTER('o',M_DIAERESIS,0),
 160         0,     // division sign
 161         LETTER('o',M_STROKE,0),
 162         LETTER('u',M_GRAVE,0),
 163         LETTER('u',M_ACUTE,0),
 164         LETTER('u',M_CIRCUMFLEX,0),
 165         LETTER('u',M_DIAERESIS,0),
 166         LETTER('y',M_ACUTE,0),
 167         LETTER('t',M_NAME,0),  // thorn
 168         LETTER('y',M_DIAERESIS,0),
 169         CAPITAL,                 // U+0100
 170         LETTER('a',M_MACRON,0),
 171         CAPITAL,
 172         LETTER('a',M_BREVE,0),
 173         CAPITAL,
 174         LETTER('a',M_OGONEK,0),
 175         CAPITAL,
 176         LETTER('c',M_ACUTE,0),
 177         CAPITAL,
 178         LETTER('c',M_CIRCUMFLEX,0),
 179         CAPITAL,
 180         LETTER('c',M_DOT_ABOVE,0),
 181         CAPITAL,
 182         LETTER('c',M_CARON,0),
 183         CAPITAL,
 184         LETTER('d',M_CARON,0),
 185         CAPITAL,                 // U+0110
 186         LETTER('d',M_STROKE,0),
 187         CAPITAL,
 188         LETTER('e',M_MACRON,0),
 189         CAPITAL,
 190         LETTER('e',M_BREVE,0),
 191         CAPITAL,
 192         LETTER('e',M_DOT_ABOVE,0),
 193         CAPITAL,
 194         LETTER('e',M_OGONEK,0),
 195         CAPITAL,
 196         LETTER('e',M_CARON,0),
 197         CAPITAL,
 198         LETTER('g',M_CIRCUMFLEX,0),
 199         CAPITAL,
 200         LETTER('g',M_BREVE,0),
 201         CAPITAL,                // U+0120
 202         LETTER('g',M_DOT_ABOVE,0),
 203         CAPITAL,
 204         LETTER('g',M_CEDILLA,0),
 205         CAPITAL,
 206         LETTER('h',M_CIRCUMFLEX,0),
 207         CAPITAL,
 208         LETTER('h',M_STROKE,0),
 209         CAPITAL,
 210         LETTER('i',M_TILDE,0),
 211         CAPITAL,
 212         LETTER('i',M_MACRON,0),
 213         CAPITAL,
 214         LETTER('i',M_BREVE,0),
 215         CAPITAL,
 216         LETTER('i',M_OGONEK,0),
 217         CAPITAL,               // U+0130
 218         LETTER('i',M_NAME,0), // dotless i
 219         CAPITAL,
 220         LIGATURE('i','j',0),
 221         CAPITAL,
 222         LETTER('j',M_CIRCUMFLEX,0),
 223         CAPITAL,
 224         LETTER('k',M_CEDILLA,0),
 225         LETTER('k',M_NAME,0),  // kra
 226         CAPITAL,
 227         LETTER('l',M_ACUTE,0),
 228         CAPITAL,
 229         LETTER('l',M_CEDILLA,0),
 230         CAPITAL,
 231         LETTER('l',M_CARON,0),
 232         CAPITAL,
 233         LETTER('l',M_MIDDLE_DOT,0),  // U+0140
 234         CAPITAL,
 235         LETTER('l',M_STROKE,0),
 236         CAPITAL,
 237         LETTER('n',M_ACUTE,0),
 238         CAPITAL,
 239         LETTER('n',M_CEDILLA,0),
 240         CAPITAL,
 241         LETTER('n',M_CARON,0),
 242         LETTER('n',M_NAME,0),  // apostrophe n
 243         CAPITAL,
 244         LETTER('n',M_NAME,0),  // eng
 245         CAPITAL,
 246         LETTER('o',M_MACRON,0),
 247         CAPITAL,
 248         LETTER('o',M_BREVE,0),
 249         CAPITAL,             // U+0150
 250         LETTER('o',M_DOUBLE_ACUTE,0),
 251         CAPITAL,
 252         LIGATURE('o','e',0),
 253         CAPITAL,
 254         LETTER('r',M_ACUTE,0),
 255         CAPITAL,
 256         LETTER('r',M_CEDILLA,0),
 257         CAPITAL,
 258         LETTER('r',M_CARON,0),
 259         CAPITAL,
 260         LETTER('s',M_ACUTE,0),
 261         CAPITAL,
 262         LETTER('s',M_CIRCUMFLEX,0),
 263         CAPITAL,
 264         LETTER('s',M_CEDILLA,0),
 265         CAPITAL,              // U+0160
 266         LETTER('s',M_CARON,0),
 267         CAPITAL,
 268         LETTER('t',M_CEDILLA,0),
 269         CAPITAL,
 270         LETTER('t',M_CARON,0),
 271         CAPITAL,
 272         LETTER('t',M_STROKE,0),
 273         CAPITAL,
 274         LETTER('u',M_TILDE,0),
 275         CAPITAL,
 276         LETTER('u',M_MACRON,0),
 277         CAPITAL,
 278         LETTER('u',M_BREVE,0),
 279         CAPITAL,
 280         LETTER('u',M_RING,0),
 281         CAPITAL,              // U+0170
 282         LETTER('u',M_DOUBLE_ACUTE,0),
 283         CAPITAL,
 284         LETTER('u',M_OGONEK,0),
 285         CAPITAL,
 286         LETTER('w',M_CIRCUMFLEX,0),
 287         CAPITAL,
 288         LETTER('y',M_CIRCUMFLEX,0),
 289         CAPITAL,   // Y-DIAERESIS
 290         CAPITAL,
 291         LETTER('z',M_ACUTE,0),
 292         CAPITAL,
 293         LETTER('z',M_DOT_ABOVE,0),
 294         CAPITAL,
 295         LETTER('z',M_CARON,0),
 296         LETTER('s',M_NAME,0), // long-s  // U+17f
 297 };
 298
 299
 300 // characters U+0250 to U+029F
 301 static const unsigned short letter_accents_250[] = {
 302         LETTER('a',M_TURNED,0),         // U+250
 303         LETTER(L_ALPHA,0,0),
 304         LETTER(L_ALPHA,M_TURNED,0),
 305         LETTER('b',M_IMPLOSIVE,0),
 306         0,  // open-o
 307         LETTER('c',M_CURL,0),
 308         LETTER('d',M_RETROFLEX,0),
 309         LETTER('d',M_IMPLOSIVE,0),
 310         LETTER('e',M_REVERSED,0),       // U+258
 311         0,   // schwa
 312         LETTER(L_SCHWA,M_HOOK,0),
 313         0,   // open-e
 314         LETTER(L_OPEN_E,M_REVERSED,0),
 315         LETTER(L_OPEN_E,M_HOOK,M_REVERSED),
 316         0,//LETTER(L_OPEN_E,M_CLOSED,M_REVERSED),
 317         LETTER('j',M_BAR,0),
 318         LETTER('g',M_IMPLOSIVE,0),      // U+260
 319         LETTER('g',0,0),
 320         LETTER('g',M_SMALLCAP,0),
 321         LETTER(L_GAMMA,0,0),
 322         0,   // ramshorn
 323         LETTER('h',M_TURNED,0),
 324         LETTER('h',M_HOOK,0),
 325         0,//LETTER(L_HENG,M_HOOK,0),
 326         LETTER('i',M_BAR,0),            // U+268
 327         LETTER(L_IOTA,0,0),
 328         LETTER('i',M_SMALLCAP,0),
 329         LETTER('l',M_TILDE,0),
 330         LETTER('l',M_BAR,0),
 331         LETTER('l',M_RETROFLEX,0),
 332         LIGATURE('l','z',0),
 333         LETTER('m',M_TURNED,0),
 334         0,//LETTER('m',M_TURNED,M_LEG), // U+270
 335         LETTER('m',M_HOOK,0),
 336         0,//LETTER('n',M_LEFTHOOK,0),
 337         LETTER('n',M_RETROFLEX,0),
 338         LETTER('n',M_SMALLCAP,0),
 339         LETTER('o',M_BAR,0),
 340         LIGATURE('o','e',M_SMALLCAP),
 341         0,//LETTER(L_OMEGA,M_CLOSED,0),
 342         LETTER(L_PHI,0,0),              // U+278
 343         LETTER('r',M_TURNED,0),
 344         0,//LETTER('r',M_TURNED,M_LEG),
 345         LETTER('r',M_RETROFLEX,M_TURNED),
 346         0,//LETTER('r',M_LEG,0),
 347         LETTER('r',M_RETROFLEX,0),
 348         0,  // r-tap
 349         LETTER(L_RTAP,M_REVERSED,0),
 350         LETTER('r',M_SMALLCAP,0),       // U+280
 351         LETTER('r',M_TURNED,M_SMALLCAP),
 352         LETTER('s',M_RETROFLEX,0),
 353         0,  // esh
 354         0,//LETTER('j',M_BAR,L_IMPLOSIVE),
 355         LETTER(L_ESH,M_REVERSED,0),
 356         LETTER(L_ESH,M_CURL,0),
 357         LETTER('t',M_TURNED,0),
 358         LETTER('t',M_RETROFLEX,0),      // U+288
 359         LETTER('u',M_BAR,0),
 360         LETTER(L_UPSILON,0,0),
 361         LETTER('v',M_HOOK,0),
 362         LETTER('v',M_TURNED,0),
 363         LETTER('w',M_TURNED,0),
 364         LETTER('y',M_TURNED,0),
 365         LETTER('y',M_SMALLCAP,0),
 366         LETTER('z',M_RETROFLEX,0),      // U+290
 367         LETTER('z',M_CURL,0),
 368         0,  // ezh
 369         LETTER(L_EZH,M_CURL,0),
 370         0,  // glottal stop
 371         LETTER(L_GLOTTAL,M_REVERSED,0),
 372         LETTER(L_GLOTTAL,M_TURNED,0),
 373         0,//LETTER('c',M_LONG,0),
 374         0,  // bilabial click           // U+298
 375         LETTER('b',M_SMALLCAP,0),
 376         0,//LETTER(L_OPEN_E,M_CLOSED,0),
 377         LETTER('g',M_IMPLOSIVE,M_SMALLCAP),
 378         LETTER('h',M_SMALLCAP,0),
 379         LETTER('j',M_CURL,0),
 380         LETTER('k',M_TURNED,0),
 381         LETTER('l',M_SMALLCAP,0),
 382         LETTER('q',M_HOOK,0),      // U+2a0
 383         LETTER(L_GLOTTAL,M_STROKE,0),
 384         LETTER(L_GLOTTAL,M_STROKE,M_REVERSED),
 385         LIGATURE('d','z',0),
 386         0,   // dezh
 387         LIGATURE('d','z',M_CURL),
 388         LIGATURE('t','s',0),
 389         0,   // tesh
 390         LIGATURE('t','s',M_CURL),
 391 };
 392
 393 static int LookupLetter2(Translator *tr, unsigned int letter, char *ph_buf)
 394 {       //========================================================================
 395         int len;
 396         char single_letter[10];
 397
 398         single_letter[0] = 0;
 399         single_letter[1] = '_';
 400         len = utf8_out(letter, &single_letter[2]);
 401         single_letter[len+2] = ' ';
 402         single_letter[len+3] = 0;
 403
 404         if(Lookup(tr, &single_letter[1], ph_buf) == 0)
 405         {
 406                 single_letter[1] = ' ';
 407                 if(Lookup(tr, &single_letter[2], ph_buf) == 0)
 408                 {
 409                         TranslateRules(tr, &single_letter[2], ph_buf, 20, NULL,0,NULL);
 410                 }
 411         }
 412         return(ph_buf[0]);
 413 }
 414
 415
 416 void LookupAccentedLetter(Translator *tr, unsigned int letter, char *ph_buf)
 417 {//=========================================================================
 418         // lookup the character in the accents table
 419         int accent_data = 0;
 420         int accent1 = 0;
 421         int accent2 = 0;
 422         int basic_letter;
 423         int letter2=0;
 424         char ph_letter1[30];
 425         char ph_letter2[30];
 426         char ph_accent1[30];
 427         char ph_accent2[30];
 428
 429         ph_accent2[0] = 0;
 430
 431         if((letter >= 0xe0) && (letter < 0x17f))
 432         {
 433                 accent_data = letter_accents_0e0[letter - 0xe0];
 434         }
 435         else if((letter >= 0x250) && (letter <= 0x2a8))
 436         {
 437                 accent_data = letter_accents_250[letter - 0x250];
 438         }
 439
 440         if(accent_data != 0)
 441         {
 442                 basic_letter = (accent_data & 0x3f) + 59;
 443                 if(basic_letter < 'a')
 444                         basic_letter = non_ascii_tab[basic_letter-59];
 445
 446                 if(accent_data & 0x8000)
 447                 {
 448                         letter2 = (accent_data >> 6) & 0x3f;
 449                         letter2 += 59;
 450                         accent2 = (accent_data >> 12) & 0x7;
 451                 }
 452                 else
 453                 {
 454                         accent1 = (accent_data >> 6) & 0x1f;
 455                         accent2 = (accent_data >> 11) & 0xf;
 456                 }
 457
 458
 459                 if(Lookup(tr, accents_tab[accent1].name, ph_accent1) != 0)
 460                 {
 461
 462                         if(LookupLetter2(tr, basic_letter, ph_letter1) != 0)
 463                         {
 464                                 if(accent2 != 0)
 465                                 {
 466                                         if(Lookup(tr, accents_tab[accent2].name, ph_accent2) == 0)
 467                                         {
 468 //                                              break;
 469                                         }
 470
 471                                         if(accents_tab[accent2].flags & 1)
 472                                         {
 473                                                 strcpy(ph_buf,ph_accent2);
 474                                                 ph_buf += strlen(ph_buf);
 475                                                 ph_accent2[0] = 0;
 476                                         }
 477                                 }
 478                                 if(letter2 != 0)
 479                                 {
 480                                         //ligature
 481                                         LookupLetter2(tr, letter2, ph_letter2);
 482                                         sprintf(ph_buf,"%s%c%s%c%s%s",ph_accent1, phonPAUSE_VSHORT, ph_letter1, phonSTRESS_P, ph_letter2, ph_accent2);
 483                                 }
 484                                 else
 485                                 {
 486                                         if(accent1 == 0)
 487                                                 strcpy(ph_buf, ph_letter1);
 488                                         else if((tr->langopts.accents & 1) || (accents_tab[accent1].flags & 1))
 489                                                 sprintf(ph_buf,"%s%c%c%s", ph_accent1, phonPAUSE_VSHORT, phonSTRESS_P, ph_letter1);
 490                                         else
 491                                                 sprintf(ph_buf,"%c%s%c%s%c", phonSTRESS_2, ph_letter1, phonPAUSE_VSHORT, ph_accent1, phonPAUSE_VSHORT);
 492                                 }
 493                         }
 494                 }
 495         }
 496 }  // end of LookupAccentedLetter
 497
 498
 499
 500 void LookupLetter(Translator *tr, unsigned int letter, int next_byte, char *ph_buf1, int control)
 501 {//==============================================================================================
 502 // control, bit 0:  not the first letter of a word
 503
 504         int len;
 505         static char single_letter[10] = {0,0};
 506         unsigned int dict_flags[2];
 507         char ph_buf3[40];
 508
 509         ph_buf1[0] = 0;
 510         len = utf8_out(letter,&single_letter[2]);
 511         single_letter[len+2] = ' ';
 512
 513         if(next_byte == -1)
 514         {
 515                 // speaking normal text, not individual characters
 516                 if(Lookup(tr, &single_letter[2], ph_buf1) != 0)
 517                         return;
 518
 519                 single_letter[1] = '_';
 520                 if(Lookup(tr, &single_letter[1], ph_buf3) != 0)
 521                         return;   // the character is specified as _* so ignore it when speaking normal text
 522
 523                 // check whether this character is specified for English
 524                 if(tr->translator_name == L('e','n'))
 525                         return;   // we are already using English
 526
 527                 SetTranslator2("en");
 528                 if(Lookup(translator2, &single_letter[2], ph_buf3) != 0)
 529                 {
 530                         // yes, switch to English and re-translate the word
 531                         sprintf(ph_buf1,"%c",phonSWITCH);
 532                 }
 533                 SelectPhonemeTable(voice->phoneme_tab_ix);  // revert to original phoneme table
 534                 return;
 535         }
 536
 537         if((letter <= 32) || iswspace(letter))
 538         {
 539                 // lookup space as _&32 etc.
 540                 sprintf(&single_letter[1],"_#%d ",letter);
 541                 Lookup(tr, &single_letter[1], ph_buf1);
 542                 return;
 543         }
 544
 545         if(next_byte != ' ')
 546                 next_byte = RULE_SPELLING;
 547         single_letter[3+len] = next_byte;   // follow by space-space if the end of the word, or space-31
 548
 549         single_letter[1] = '_';
 550
 551         // if the $accent flag is set for this letter, use the accents table (below)
 552         dict_flags[1] = 0;
 553
 554         if(Lookup(tr, &single_letter[1], ph_buf3) == 0)
 555         {
 556                 single_letter[1] = ' ';
 557                 if(Lookup(tr, &single_letter[2], ph_buf3) == 0)
 558                 {
 559                         TranslateRules(tr, &single_letter[2], ph_buf3, sizeof(ph_buf3), NULL,FLAG_NO_TRACE,NULL);
 560                 }
 561         }
 562
 563         if(ph_buf3[0] == 0)
 564         {
 565                 LookupAccentedLetter(tr, letter, ph_buf3);
 566         }
 567
 568         strcpy(ph_buf1, ph_buf3);
 569         if((ph_buf1[0] == 0) || (ph_buf1[0] == phonSWITCH))
 570         {
 571                 return;
 572         }
 573
 574         dict_flags[0] = 0;
 575         dict_flags[1] = 0;
 576         SetWordStress(tr, ph_buf1, dict_flags, -1, control & 1);
 577
 578 }  // end of LookupLetter
 579
 580
 581 // unicode ranges for non-ascii digits 0-9
 582 static const int number_ranges[] = {
 583         0x660, 0x6f0,  // arabic
 584         0x966, 0x9e6, 0xa66, 0xae6, 0xb66, 0xbe6, 0xc66, 0xce6, 0xd66,  // indic
 585         0xe50, 0xed0, 0xf20, 0x1040, 0x1090,
 586         0 };  // these must be in ascending order
 587
 588
 589 int NonAsciiNumber(int letter)
 590 {//============================
 591 // Change non-ascii digit into ascii digit '0' to '9', (or -1 if not)
 592         const int *p;
 593         int base;
 594
 595         for(p=number_ranges; (base = *p) != 0; p++)
 596         {
 597                 if(letter < base)
 598                         break;  // not found
 599                 if(letter < (base+10))
 600                         return(letter-base+'0');
 601         }
 602         return(-1);
 603 }
 604
 605 #define L_SUB 0x4000   // subscript
 606 #define L_SUP 0x8000   // superscript
 607
 608 static const char *modifiers[] = {NULL, "_sub", "_sup", NULL};
 609
 610 // this list must be in ascending order
 611 static unsigned short derived_letters[] = {
 612         0x00aa, 'a'+L_SUP,
 613         0x00b2, '2'+L_SUP,
 614         0x00b3, '3'+L_SUP,
 615         0x00b9, '1'+L_SUP,
 616         0x00ba, 'o'+L_SUP,
 617         0x02b0, 'h'+L_SUP,
 618         0x02b1, 0x266+L_SUP,
 619         0x02b2, 'j'+L_SUP,
 620         0x02b3, 'r'+L_SUP,
 621         0x02b4, 0x279+L_SUP,
 622         0x02b5, 0x27b+L_SUP,
 623         0x02b6, 0x281+L_SUP,
 624         0x02b7, 'w'+L_SUP,
 625         0x02b8, 'y'+L_SUP,
 626         0x02c0, 0x294+L_SUP,
 627         0x02c1, 0x295+L_SUP,
 628         0x02e0, 0x263+L_SUP,
 629         0x02e1, 'l'+L_SUP,
 630         0x02e2, 's'+L_SUP,
 631         0x02e3, 'x'+L_SUP,
 632         0x2070, '0'+L_SUP,
 633         0x2071, 'i'+L_SUP,
 634         0x2074, '4'+L_SUP,
 635         0x2075, '5'+L_SUP,
 636         0x2076, '6'+L_SUP,
 637         0x2077, '7'+L_SUP,
 638         0x2078, '8'+L_SUP,
 639         0x2079, '9'+L_SUP,
 640         0x207a, '+'+L_SUP,
 641         0x207b, '-'+L_SUP,
 642         0x207c, '='+L_SUP,
 643         0x207d, '('+L_SUP,
 644         0x207e, ')'+L_SUP,
 645         0x207f, 'n'+L_SUP,
 646         0x2080, '0'+L_SUB,
 647         0x2081, '1'+L_SUB,
 648         0x2082, '2'+L_SUB,
 649         0x2083, '3'+L_SUB,
 650         0x2084, '4'+L_SUB,
 651         0x2085, '5'+L_SUB,
 652         0x2086, '6'+L_SUB,
 653         0x2087, '7'+L_SUB,
 654         0x2088, '8'+L_SUB,
 655         0x2089, '9'+L_SUB,
 656         0x208a, '+'+L_SUB,
 657         0x208b, '-'+L_SUB,
 658         0x208c, '='+L_SUB,
 659         0x208d, '('+L_SUB,
 660         0x208e, ')'+L_SUB,
 661         0x2090, 'a'+L_SUB,
 662         0x2091, 'e'+L_SUB,
 663         0x2092, 'o'+L_SUB,
 664         0x2093, 'x'+L_SUB,
 665         0x2094, 0x259+L_SUB,
 666         0x2095, 'h'+L_SUB,
 667         0x2096, 'k'+L_SUB,
 668         0x2097, 'l'+L_SUB,
 669         0x2098, 'm'+L_SUB,
 670         0x2099, 'n'+L_SUB,
 671         0x209a, 'p'+L_SUB,
 672         0x209b, 's'+L_SUB,
 673         0x209c, 't'+L_SUB,
 674         0,0};
 675
 676
 677 static const char *hex_letters[] = {"'e:j","b'i:","s'i:","d'i:","'i:","'ef"};  // names, using phonemes available to all languages
 678
 679 int TranslateLetter(Translator *tr, char *word, char *phonemes, int control)
 680 {//=========================================================================
 681 // get pronunciation for an isolated letter
 682 // return number of bytes used by the letter
 683 // control bit 0:  a non-initial letter in a word
 684 //         bit 1:  say 'capital'
 685 //         bit 2:  say character code for unknown letters
 686
 687         int n_bytes;
 688         int letter;
 689         int len;
 690         int ix;
 691         int c;
 692         char *p2;
 693         char *pbuf;
 694         const char *modifier;
 695         ALPHABET *alphabet;
 696         int al_offset;
 697         int al_flags;
 698         int language;
 699         int number;
 700         int phontab_1;
 701         int speak_letter_number;
 702         char capital[30];
 703         char ph_buf[80];
 704         char ph_buf2[80];
 705         char ph_alphabet[80];
 706         char hexbuf[12];
 707         static char pause_string[] = {phonPAUSE, 0};
 708
 709         ph_buf[0] = 0;
 710         ph_alphabet[0] = 0;
 711         capital[0] = 0;
 712         phontab_1 = translator->phoneme_tab_ix;
 713
 714         n_bytes = utf8_in(&letter,word);
 715
 716         if((letter & 0xfff00) == 0x0e000)
 717         {
 718                 letter &= 0xff;   // uncode private usage area
 719         }
 720
 721         if(control & 2)
 722         {
 723                 // include CAPITAL information
 724                 if(iswupper2(letter))
 725                 {
 726                         Lookup(tr, "_cap", capital);
 727                 }
 728         }
 729         letter = towlower2(letter);
 730         LookupLetter(tr, letter, word[n_bytes], ph_buf, control & 1);
 731
 732         if(ph_buf[0] == 0)
 733         {
 734                 // is this a subscript or superscript letter ?
 735                 for(ix=0; (c = derived_letters[ix]) != 0; ix+=2)
 736                 {
 737                         if(c > letter)
 738                                 break;
 739                         if(c == letter)
 740                         {
 741                                 c = derived_letters[ix+1];
 742                                 letter = c & 0x3fff;
 743                                 if((modifier = modifiers[c >> 14]) != NULL)
 744                                 {
 745                                         Lookup(tr, modifier, capital);
 746                                         if(capital[0] == 0)
 747                                         {
 748                                                 capital[2] = SetTranslator2("en");   // overwrites previous contents of translator2
 749                                                 Lookup(translator2, modifier, &capital[3]);
 750                                                 if(capital[3] != 0)
 751                                                 {
 752                                                         capital[0] = phonPAUSE;
 753                                                         capital[1] = phonSWITCH;
 754                                                         len = strlen(&capital[3]);
 755                                                         capital[len+3] = phonSWITCH;
 756                                                         capital[len+4] = phontab_1;
 757                                                         capital[len+5] = 0;
 758                                                 }
 759                                         }
 760                                 }
 761                         }
 762                 }
 763                 LookupLetter(tr, letter, word[n_bytes], ph_buf, control & 1);
 764         }
 765
 766         if(ph_buf[0] == phonSWITCH)
 767         {
 768                 strcpy(phonemes,ph_buf);
 769                 return(0);
 770         }
 771
 772
 773         if((ph_buf[0] == 0) && ((number = NonAsciiNumber(letter)) > 0))
 774         {
 775                 // convert a non-ascii number to 0-9
 776                 LookupLetter(tr, number, 0, ph_buf, control & 1);
 777         }
 778
 779         al_offset = 0;
 780         al_flags = 0;
 781         if((alphabet = AlphabetFromChar(letter)) != NULL)
 782         {
 783                 al_offset = alphabet->offset;
 784                 al_flags = alphabet->flags;
 785         }
 786
 787         if(alphabet != current_alphabet)
 788         {
 789                 // speak the name of the alphabet
 790                 current_alphabet = alphabet;
 791                 if((alphabet != NULL) && !(al_flags & AL_DONT_NAME) && (al_offset != translator->letter_bits_offset))
 792                 {
 793                         if((al_flags & AL_DONT_NAME) || (al_offset == translator->langopts.alt_alphabet) || (al_offset == translator->langopts.our_alphabet))
 794                         {
 795                                 // don't say the alphabet name
 796                         }
 797                         else
 798                         {
 799                                 ph_buf2[0] = 0;
 800                                 if(Lookup(translator, alphabet->name, ph_alphabet) == 0)  // the original language for the current voice
 801                                 {
 802                                         // Can't find the local name for this alphabet, use the English name
 803                                         ph_alphabet[2] = SetTranslator2("en");   // overwrites previous contents of translator2
 804                                         Lookup(translator2, alphabet->name, ph_buf2);
 805                                 }
 806                                 else if(translator != tr)
 807                                 {
 808                                         phontab_1 = tr->phoneme_tab_ix;
 809                                         strcpy(ph_buf2, ph_alphabet);
 810                                         ph_alphabet[2] = translator->phoneme_tab_ix;
 811                                 }
 812
 813                                 if(ph_buf2[0] != 0)
 814                                 {
 815                                         // we used a different language for the alphabet name (now in ph_buf2)
 816                                         ph_alphabet[0] = phonPAUSE;
 817                                         ph_alphabet[1] = phonSWITCH;
 818                                         strcpy(&ph_alphabet[3], ph_buf2);
 819                                         len = strlen(ph_buf2) + 3;
 820                                         ph_alphabet[len] = phonSWITCH;
 821                                         ph_alphabet[len+1] = phontab_1;
 822                                         ph_alphabet[len+2] = 0;
 823                                 }
 824                         }
 825                 }
 826         }
 827
 828
 829 // caution: SetWordStress() etc don't expect phonSWITCH + phoneme table number
 830
 831         if(ph_buf[0] == 0)
 832         {
 833                 if((al_offset != 0) && (al_offset == translator->langopts.alt_alphabet))
 834                         language = translator->langopts.alt_alphabet_lang;
 835                 else
 836                 if((alphabet != NULL) && (alphabet->language != 0) && !(al_flags & AL_NOT_LETTERS))
 837                         language = alphabet->language;
 838                 else
 839                         language = L('e','n');
 840
 841                 if((language != tr->translator_name) || (language == L('k','o')))
 842                 {
 843                         char *p3;
 844                         int initial, code;
 845                         char hangul_buf[12];
 846
 847                         // speak in the language for this alphabet (or English)
 848                         ph_buf[2] = SetTranslator2(WordToString2(language));
 849
 850                         if(((code = letter - 0xac00) >= 0) && (letter <= 0xd7af))
 851                         {
 852                                 // Special case for Korean letters.
 853                                 // break a syllable hangul into 2 or 3 individual jamo
 854
 855                                 hangul_buf[0] = ' ';
 856                                 p3 = &hangul_buf[1];
 857                                 if((initial = (code/28)/21) != 11)
 858                                 {
 859                                         p3 += utf8_out(initial + 0x1100, p3);
 860                                 }
 861                                 utf8_out(((code/28) % 21) + 0x1161, p3);  // medial
 862                                 utf8_out((code % 28) + 0x11a7, &p3[3]);   // final
 863                                 p3[6] = ' ';
 864                                 p3[7] = 0;
 865                                 ph_buf[3] = 0;
 866                                 TranslateRules(translator2, &hangul_buf[1], &ph_buf[3], sizeof(ph_buf)-3, NULL, 0, NULL);
 867                                 SetWordStress(translator2, &ph_buf[3], NULL, -1, 0);
 868                         }
 869                         else
 870                         {
 871                                 LookupLetter(translator2, letter, word[n_bytes], &ph_buf[3], control & 1);
 872                         }
 873
 874                         if(ph_buf[3] == phonSWITCH)
 875                         {
 876                                 // another level of language change
 877                                 ph_buf[2] = SetTranslator2(&ph_buf[4]);
 878                                 LookupLetter(translator2, letter, word[n_bytes], &ph_buf[3], control & 1);
 879                         }
 880
 881                         SelectPhonemeTable(voice->phoneme_tab_ix);  // revert to original phoneme table
 882
 883                         if(ph_buf[3] != 0)
 884                         {
 885                                 ph_buf[0] = phonPAUSE;
 886                                 ph_buf[1] = phonSWITCH;
 887                                 len = strlen(&ph_buf[3]) + 3;
 888                                 ph_buf[len] = phonSWITCH;  // switch back
 889                                 ph_buf[len+1] = tr->phoneme_tab_ix;
 890                                 ph_buf[len+2] = 0;
 891                         }
 892                 }
 893         }
 894
 895         if(ph_buf[0] == 0)
 896         {
 897                 // character name not found
 898
 899                 if(ph_buf[0]== 0)
 900                 {
 901                         speak_letter_number = 1;
 902                         if(!(al_flags & AL_NO_SYMBOL))
 903                         {
 904                                 if(iswalpha2(letter))
 905                                         Lookup(translator, "_?A", ph_buf);
 906
 907                                 if((ph_buf[0]==0) && !iswspace(letter))
 908                                         Lookup(translator, "_??", ph_buf);
 909
 910                                 if(ph_buf[0] == 0)
 911                                 {
 912                                         EncodePhonemes("l'et@", ph_buf, NULL);
 913                                 }
 914                         }
 915
 916                         if(!(control & 4) && (al_flags & AL_NOT_CODE))
 917                         {
 918                                 // don't speak the character code number, unless we want full details of this character
 919                                 speak_letter_number = 0;
 920                         }
 921
 922 //                      if((ph_alphabet[0] != 0) && speak_letter_number)
 923 //                              ph_buf[0] = 0;  // don't speak "letter" if we speak alphabet name
 924
 925                         if(speak_letter_number)
 926                         {
 927                                 if(al_offset == 0x2800)
 928                                 {
 929                                         // braille dots symbol, list the numbered dots
 930                                         p2 = hexbuf;
 931                                         for(ix=0; ix<8; ix++)
 932                                         {
 933                                                 if(letter & (1 << ix))
 934                                                 {
 935                                                         *p2++ = '1'+ix;
 936                                                 }
 937                                         }
 938                                         *p2 = 0;
 939                                 }
 940                                 else
 941                                 {
 942                                         // speak the hexadecimal number of the character code
 943                                         sprintf(hexbuf,"%x",letter);
 944                                 }
 945
 946                                 pbuf = ph_buf;
 947                                 for(p2 = hexbuf; *p2 != 0; p2++)
 948                                 {
 949                                         pbuf += strlen(pbuf);
 950                                         *pbuf++ = phonPAUSE_VSHORT;
 951                                         LookupLetter(translator, *p2, 0, pbuf, 1);
 952                                         if(((pbuf[0] == 0) || (pbuf[0]==phonSWITCH)) && (*p2 >= 'a'))
 953                                         {
 954                                                 // This language has no translation for 'a' to 'f', speak English names using base phonemes
 955                                                 EncodePhonemes(hex_letters[*p2 - 'a'], pbuf, NULL);
 956                                         }
 957                                 }
 958                                 strcat(pbuf, pause_string);
 959                         }
 960                 }
 961         }
 962
 963         len = strlen(phonemes);
 964
 965         if(tr->langopts.accents & 2)  // 'capital' before or after the word ?
 966                 sprintf(ph_buf2,"%c%s%s%s",0xff,ph_alphabet,ph_buf,capital);
 967         else
 968                 sprintf(ph_buf2,"%c%s%s%s",0xff,ph_alphabet,capital,ph_buf);  // the 0xff marker will be removed or replaced in SetSpellingStress()
 969         if((len + strlen(ph_buf2)) < N_WORD_PHONEMES)
 970         {
 971                 strcpy(&phonemes[len],ph_buf2);
 972         }
 973         return(n_bytes);
 974 }  // end of TranslateLetter
 975
 976
 977
 978 void SetSpellingStress(Translator *tr, char *phonemes, int control, int n_chars)
 979 {//=============================================================================
 980 // Individual letter names, reduce the stress of some.
 981         int ix;
 982         unsigned int c;
 983         int n_stress=0;
 984         int prev = 0;
 985         int count;
 986         unsigned char buf[N_WORD_PHONEMES];
 987
 988         for(ix=0; (c = phonemes[ix]) != 0; ix++)
 989         {
 990                 if((c == phonSTRESS_P) && (prev != phonSWITCH))
 991                 {
 992                         n_stress++;
 993                 }
 994                 buf[ix] = prev = c;
 995         }
 996         buf[ix] = 0;
 997
 998         count = 0;
 999         prev = 0;
1000         for(ix=0; (c = buf[ix]) != 0; ix++)
1001         {
1002                 if((c == phonSTRESS_P) && (n_chars > 1) && (prev != phonSWITCH))
1003                 {
1004                         count++;
1005
1006                         if(tr->langopts.spelling_stress == 1)
1007                         {
1008                                 // stress on initial letter when spelling
1009                                 if(count > 1)
1010                                         c = phonSTRESS_3;
1011                         }
1012                         else
1013                         {
1014                                 if(count != n_stress)
1015                                 {
1016                                         if(((count % 3) != 0) || (count == n_stress-1))
1017                                                 c = phonSTRESS_3;   // reduce to secondary stress
1018                                 }
1019                         }
1020                 }
1021                 else if(c == 0xff)
1022                 {
1023                         if((control < 2) || (ix==0))
1024                                 continue;   // don't insert pauses
1025
1026                         if(control == 4)
1027                                 c = phonPAUSE;    // pause after each character
1028                         if(((count % 3) == 0) || (control > 2))
1029                                 c = phonPAUSE_NOLINK;  // pause following a primary stress
1030                         else
1031                                 c = phonPAUSE_VSHORT;
1032                 }
1033                 *phonemes++ = prev = c;
1034         }
1035         if(control >= 2)
1036                 *phonemes++ = phonPAUSE_NOLINK;
1037         *phonemes = 0;
1038 }  // end of SetSpellingStress
1039
1040
1041
1042 // Numbers
1043
1044 static char ph_ordinal2[12];
1045 static char ph_ordinal2x[12];
1046
1047
1048 static int CheckDotOrdinal(Translator *tr, char *word, char *word_end, WORD_TAB *wtab, int roman)
1049 {//==============================================================================================
1050
1051         int ordinal = 0;
1052         int c2;
1053         int nextflags;
1054
1055         if((tr->langopts.numbers & NUM_ORDINAL_DOT) && ((word_end[0] == '.') || (wtab[0].flags & FLAG_HAS_DOT)) && !(wtab[1].flags & FLAG_NOSPACE))
1056         {
1057                 if(roman || !(wtab[1].flags & FLAG_FIRST_UPPER))
1058                 {
1059                         if(word_end[0] == '.')
1060                                 utf8_in(&c2, &word_end[2]);
1061                         else
1062                                 utf8_in(&c2, &word_end[0]);
1063
1064                         if((word_end[0] != 0) && (word_end[1] != 0) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || IsAlpha(c2)))
1065                         {
1066                                 // ordinal number is indicated by dot after the number
1067                                 // but not if the next word starts with an upper-case letter
1068                                 // (c2 == 0) is for cases such as, "2.,"
1069                                 ordinal = 2;
1070                                 if(word_end[0] == '.')
1071                                         word_end[0] = ' ';
1072
1073                                 if((roman==0) && (tr->translator_name == L('h','u')))
1074                                 {
1075                                         // lang=hu don't treat dot as ordinal indicator if the next word is a month name ($alt). It may have a suffix.
1076                                         nextflags = 0;
1077                                         if(IsAlpha(c2))
1078                                         {
1079                                                 nextflags = TranslateWord(tr, &word_end[2], 0, NULL, NULL);
1080                                         }
1081
1082                                         if((tr->prev_dict_flags[0] & FLAG_ALT_TRANS) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || iswdigit(c2)))
1083                                                 ordinal = 0;   // TEST  09.02.10
1084
1085                                         if(nextflags & FLAG_ALT_TRANS)
1086                                                 ordinal = 0;
1087
1088                                         if(nextflags & FLAG_ALT3_TRANS)
1089                                         {
1090                                                 if(word[-2] == '-')
1091                                                         ordinal = 0;   // eg. december 2-5. között
1092
1093                                                 if(tr->prev_dict_flags[0] & (FLAG_ALT_TRANS | FLAG_ALT3_TRANS))
1094                                                         ordinal = 0x22;
1095                                         }
1096                                 }
1097                         }
1098                 }
1099         }
1100         return(ordinal);
1101 }  // end of CheckDotOrdinal
1102
1103
1104 static int hu_number_e(const char *word, int thousandplex, int value)
1105 {//==================================================================
1106 // lang-hu: variant form of numbers when followed by hyphen and a suffix starting with 'a' or 'e' (but not a, e, az, ez, azt, ezt, att. ett
1107
1108         if((word[0] == 'a') || (word[0] == 'e'))
1109         {
1110                 if((word[1] == ' ') || (word[1] == 'z') || ((word[1] == 't') && (word[2] == 't')))
1111                         return(0);
1112
1113                 if(((thousandplex==1) || ((value % 1000) == 0)) && (word[1] == 'l'))
1114                         return(0);   // 1000-el
1115
1116                 return(1);
1117         }
1118         return(0);
1119 }  // end of hu_numnber_e
1120
1121
1122
1123 int TranslateRoman(Translator *tr, char *word, char *ph_out, WORD_TAB *wtab)
1124 {//=========================================================================
1125         int c;
1126         char *p;
1127         const char *p2;
1128         int acc;
1129         int prev;
1130         int value;
1131         int subtract;
1132         int repeat = 0;
1133         int n_digits = 0;
1134         char *word_start;
1135         int num_control = 0;
1136         unsigned int flags[2];
1137         char ph_roman[30];
1138         char number_chars[N_WORD_BYTES];
1139
1140         static const char *roman_numbers = "ixcmvld";
1141         static int roman_values[] = {1,10,100,1000,5,50,500};
1142
1143         acc = 0;
1144         prev = 0;
1145         subtract = 0x7fff;
1146         ph_out[0] = 0;
1147         flags[0] = 0;
1148         flags[1] = 0;
1149
1150         if(((tr->langopts.numbers & NUM_ROMAN_CAPITALS) && !(wtab[0].flags & FLAG_ALL_UPPER)) || IsDigit09(word[-2]))
1151                 return(0);    // not '2xx'
1152
1153         word_start = word;
1154         while((c = *word++) != ' ')
1155         {
1156                 if((p2 = strchr(roman_numbers,c)) == NULL)
1157                         return(0);
1158
1159                 value = roman_values[p2 - roman_numbers];
1160                 if(value == prev)
1161                 {
1162                         repeat++;
1163                         if(repeat >= 3)
1164                                 return(0);
1165                 }
1166                 else
1167                         repeat = 0;
1168
1169                 if((prev > 1) && (prev != 10) && (prev != 100))
1170                 {
1171                         if(value >= prev)
1172                                 return(0);
1173                 }
1174                 if((prev != 0) && (prev < value))
1175                 {
1176                         if(((acc % 10) != 0) || ((prev*10) < value))
1177                                 return(0);
1178                         subtract = prev;
1179                         value -= subtract;
1180                 }
1181                 else if(value >= subtract)
1182                         return(0);
1183                 else
1184                         acc += prev;
1185                 prev = value;
1186                 n_digits++;
1187         }
1188
1189         if(IsDigit09(word[0]))
1190                 return(0);      // eg. 'xx2'
1191
1192         acc += prev;
1193         if(acc < tr->langopts.min_roman)
1194                 return(0);
1195
1196         if(acc > tr->langopts.max_roman)
1197                 return(0);
1198
1199
1200         Lookup(tr, "_roman",ph_roman);   // precede by "roman" if _rom is defined in *_list
1201         p = &ph_out[0];
1202
1203         if((tr->langopts.numbers & NUM_ROMAN_AFTER) == 0)
1204         {
1205                 strcpy(ph_out,ph_roman);
1206                 p = &ph_out[strlen(ph_roman)];
1207         }
1208
1209         sprintf(number_chars,"  %d    ",acc);
1210
1211         if(word[0] == '.')
1212         {
1213                 // dot has not been removed.  This implies that there was no space after it
1214                 return(0);
1215         }
1216
1217         if(CheckDotOrdinal(tr, word_start, word, wtab, 1))
1218                 wtab[0].flags |= FLAG_ORDINAL;
1219
1220         if(tr->langopts.numbers & NUM_ROMAN_ORDINAL)
1221         {
1222                 if(tr->translator_name == L('h','u'))
1223                 {
1224                         if(!(wtab[0].flags & FLAG_ORDINAL))
1225                         {
1226                                 if((wtab[0].flags & FLAG_HYPHEN_AFTER) && hu_number_e(word, 0, acc))
1227                                 {
1228                                         // should use the 'e' form of the number
1229                                         num_control |= 1;
1230                                 }
1231                                 else
1232                                         return(0);
1233                         }
1234                 }
1235                 else
1236                 {
1237                         wtab[0].flags |= FLAG_ORDINAL;
1238                 }
1239         }
1240
1241         tr->prev_dict_flags[0] = 0;
1242         tr->prev_dict_flags[1] = 0;
1243         TranslateNumber(tr, &number_chars[2], p, flags, wtab, num_control);
1244
1245         if(tr->langopts.numbers & NUM_ROMAN_AFTER)
1246                 strcat(ph_out,ph_roman);
1247
1248         return(1);
1249 }  // end of TranslateRoman
1250
1251
1252 static const char *M_Variant(int value)
1253 {//====================================
1254         // returns M, or perhaps MA or MB for some cases
1255
1256         int teens = 0;
1257
1258         if(((value % 100) > 10) && ((value % 100) < 20))
1259                 teens = 1;
1260
1261         switch((translator->langopts.numbers2 >> 6) & 0x7)
1262         {
1263         case 1:  // lang=ru  use singular for xx1 except for x11
1264                 if((teens == 0) && ((value % 10) == 1))
1265                         return("1M");
1266                 break;
1267
1268         case 2:  // lang=cs,sk
1269                 if((value >= 2) && (value <= 4))
1270                         return("0MA");
1271                 break;
1272
1273         case 3:  // lang=pl
1274                 if((teens == 0) && (((value % 10) >= 2) && ((value % 10) <= 4)))
1275                         return("0MA");
1276                 break;
1277
1278         case 4:  // lang=lt
1279                 if((teens == 1) || ((value % 10) == 0))
1280                         return("0MB");
1281                 if((value % 10) == 1)
1282                         return("0MA");
1283                 break;
1284
1285         case 5:  // lang=bs,hr,sr
1286                 if(teens == 0)
1287                 {
1288                         if((value % 10) == 1)
1289                                 return("1M");
1290                         if(((value % 10) >= 2) && ((value % 10) <= 4))
1291                                 return("0MA");
1292                 }
1293                 break;
1294         }
1295         return("0M");
1296 }
1297
1298
1299 static int LookupThousands(Translator *tr, int value, int thousandplex, int thousands_exact, char *ph_out)
1300 {//=======================================================================================================
1301 // thousands_exact:  bit 0  no hundreds,tens,or units,  bit 1  ordinal numberr
1302         int found;
1303         int found_value=0;
1304         char string[12];
1305         char ph_of[12];
1306         char ph_thousands[40];
1307         char ph_buf[40];
1308
1309         ph_of[0] = 0;
1310
1311         // first look for a match with the exact value of thousands
1312         if(value > 0)
1313         {
1314                 if(thousands_exact & 1)
1315                 {
1316                         if(thousands_exact & 2)
1317                         {
1318                                 // ordinal number
1319                                 sprintf(string,"_%dM%do",value,thousandplex);
1320                                 found_value = Lookup(tr, string, ph_thousands);
1321                         }
1322                         if(!found_value & (number_control & 1))
1323                         {
1324                                 // look for the 'e' variant
1325                                 sprintf(string,"_%dM%de",value,thousandplex);
1326                                 found_value = Lookup(tr, string, ph_thousands);
1327                         }
1328                         if(!found_value)
1329                         {
1330                                 // is there a different pronunciation if there are no hundreds,tens,or units ? (LANG=ta)
1331                                 sprintf(string,"_%dM%dx",value,thousandplex);
1332                                 found_value = Lookup(tr, string, ph_thousands);
1333                         }
1334                 }
1335                 if(found_value == 0)
1336                 {
1337                         sprintf(string,"_%dM%d",value,thousandplex);
1338                         found_value = Lookup(tr, string, ph_thousands);
1339                 }
1340         }
1341
1342         if(found_value == 0)
1343         {
1344                 if((value % 100) >= 20)
1345                 {
1346                         Lookup(tr, "_0of", ph_of);
1347                 }
1348
1349                 found = 0;
1350                 if(thousands_exact & 1)
1351                 {
1352                         if(thousands_exact & 2)
1353                         {
1354                                 // ordinal number
1355                                 sprintf(string,"_%s%do",M_Variant(value), thousandplex);
1356                                 found = Lookup(tr, string, ph_thousands);
1357                         }
1358                         if(!found && (number_control & 1))
1359                         {
1360                                 // look for the 'e' variant
1361                                 sprintf(string,"_%s%de",M_Variant(value), thousandplex);
1362                                 found = Lookup(tr, string, ph_thousands);
1363                         }
1364                         if(!found)
1365                         {
1366                                 // is there a different pronunciation if there are no hundreds,tens,or units ?
1367                                 sprintf(string,"_%s%dx",M_Variant(value), thousandplex);
1368                                 found = Lookup(tr, string, ph_thousands);
1369                         }
1370                 }
1371                 if(found == 0)
1372                 {
1373                         sprintf(string,"_%s%d",M_Variant(value), thousandplex);
1374
1375                         if(Lookup(tr, string, ph_thousands) == 0)
1376                         {
1377                                 if(thousandplex > 3)
1378                                 {
1379                                         sprintf(string,"_0M%d", thousandplex-1);
1380                                         if(Lookup(tr, string, ph_buf) == 0)
1381                                         {
1382                                                 // say "millions" if this name is not available and neither is the next lower
1383                                                 Lookup(tr, "_0M2", ph_thousands);
1384                                                 speak_missing_thousands = 3;
1385                                         }
1386                                 }
1387                                 if(ph_thousands[0] == 0)
1388                                 {
1389                                         // repeat "thousand" if higher order names are not available
1390                                         sprintf(string,"_%dM1",value);
1391                                         if((found_value = Lookup(tr, string, ph_thousands)) == 0)
1392                                                 Lookup(tr, "_0M1", ph_thousands);
1393                                         speak_missing_thousands = 2;
1394                                 }
1395                         }
1396                 }
1397         }
1398         sprintf(ph_out,"%s%s",ph_of,ph_thousands);
1399
1400         if((value == 1) && (thousandplex == 1) && (tr->langopts.numbers & NUM_OMIT_1_THOUSAND))
1401                 return(1);
1402
1403         return(found_value);
1404 }  // end f LookupThousands
1405
1406
1407 static int LookupNum2(Translator *tr, int value, const int control, char *ph_out)
1408 {//=============================================================================
1409 // Lookup a 2 digit number
1410 // control bit 0: ordinal number
1411 // control bit 1: final tens and units (not number of thousands) (use special form of '1', LANG=de "eins")
1412 // control bit 2: tens and units only, no higher digits
1413 // control bit 3: use feminine form of '2' (for thousands
1414 // control bit 4: speak zero tens
1415 // control bit 5: variant of ordinal number (lang=hu)
1416 //         bit 8   followed by decimal fraction
1417
1418         int found;
1419         int ix;
1420         int units;
1421         int tens;
1422         int is_ordinal;
1423         int used_and=0;
1424         int found_ordinal = 0;
1425         int next_phtype;
1426         int ord_type = 'o';
1427         char string[12];  // for looking up entries in *_list
1428         char ph_ordinal[20];
1429         char ph_tens[50];
1430         char ph_digits[50];
1431         char ph_and[12];
1432
1433         units = value % 10;
1434         tens = value / 10;
1435
1436         found = 0;
1437         ph_ordinal[0] = 0;
1438         ph_tens[0] = 0;
1439         ph_digits[0] = 0;
1440         ph_and[0] = 0;
1441
1442         if(control & 0x20)
1443         {
1444                 ord_type = 'q';
1445         }
1446
1447         is_ordinal = control & 1;
1448
1449         if((control & 2) && (n_digit_lookup == 2))
1450         {
1451                 // pronunciation of the final 2 digits has already been found
1452                 strcpy(ph_out, digit_lookup);
1453         }
1454         else
1455         {
1456                 if(digit_lookup[0] == 0)
1457                 {
1458                         // is there a special pronunciation for this 2-digit number
1459                         if(control & 8)
1460                         {
1461                                 // is there a feminine form?
1462                                 sprintf(string,"_%df",value);
1463                                 found = Lookup(tr, string, ph_digits);
1464                         }
1465                         else if(is_ordinal)
1466                         {
1467                                 strcpy(ph_ordinal, ph_ordinal2);
1468
1469                                 if(control & 4)
1470                                 {
1471                                         sprintf(string,"_%d%cx",value,ord_type);  // LANG=hu, special word for 1. 2. when there are no higher digits
1472                                         if((found = Lookup(tr, string, ph_digits)) != 0)
1473                                         {
1474                                                 if(ph_ordinal2x[0] != 0)
1475                                                         strcpy(ph_ordinal, ph_ordinal2x);  // alternate pronunciation (lang=an)
1476                                         }
1477                                 }
1478                                 if(found == 0)
1479                                 {
1480                                         sprintf(string,"_%d%c",value,ord_type);
1481                                         found = Lookup(tr, string, ph_digits);
1482                                 }
1483                                 found_ordinal = found;
1484                         }
1485
1486                         if(found == 0)
1487                         {
1488                                 if(control & 2)
1489                                 {
1490                                         // the final tens and units of a number
1491                                         if(number_control & 1)
1492                                         {
1493                                                 // look for 'e' variant
1494                                                 sprintf(string,"_%de",value);
1495                                                 found = Lookup(tr, string, ph_digits);
1496                                         }
1497                                 }
1498                                 else
1499                                 {
1500                                         // followed by hundreds or thousands etc
1501                                         sprintf(string,"_%da",value);
1502                                         found = Lookup(tr, string, ph_digits);
1503                                 }
1504
1505                                 if(!found)
1506                                 {
1507                                         if((is_ordinal) && (tr->langopts.numbers2 & NUM2_NO_TEEN_ORDINALS))
1508                                         {
1509                                                 // don't use numbers 10-99 to make ordinals, always use _1Xo etc (lang=pt)
1510                                         }
1511                                         else
1512                                         {
1513                                                 sprintf(string,"_%d",value);
1514                                                 found = Lookup(tr, string, ph_digits);
1515                                         }
1516                                 }
1517                         }
1518                 }
1519
1520                 // no, speak as tens+units
1521
1522                 if((control & 0x10) && (value < 10))
1523                 {
1524                         // speak leading zero
1525                         Lookup(tr, "_0", ph_tens);
1526                 }
1527                 else
1528                 {
1529                         if(found)
1530                         {
1531                                 ph_tens[0] = 0;
1532                         }
1533                         else
1534                         {
1535
1536                                 if(is_ordinal)
1537                                 {
1538                                         sprintf(string,"_%dX%c", tens, ord_type);
1539                                         if(Lookup(tr, string, ph_tens) != 0)
1540                                         {
1541                                                 found_ordinal = 1;
1542
1543                                                 if((units != 0) && (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL))
1544                                                 {
1545                                                         // Use the ordinal form of tens as well as units. Add the ordinal ending
1546                                                         strcat(ph_tens, ph_ordinal2);
1547                                                 }
1548                                         }
1549                                 }
1550                                 if(found_ordinal == 0)
1551                                 {
1552                                         sprintf(string,"_%dX", tens);
1553                                         Lookup(tr, string, ph_tens);
1554                                 }
1555
1556                                 if((ph_tens[0] == 0) && (tr->langopts.numbers & NUM_VIGESIMAL))
1557                                 {
1558                                         // tens not found,  (for example) 73 is 60+13
1559                                         units = (value % 20);
1560                                         sprintf(string,"_%dX", tens & 0xfe);
1561                                         Lookup(tr, string, ph_tens);
1562                                 }
1563
1564                                 ph_digits[0] = 0;
1565                                 if(units > 0)
1566                                 {
1567                                         found = 0;
1568
1569                                         if((control & 2) && (digit_lookup[0] != 0))
1570                                         {
1571                                                 // we have an entry for this digit (possibly together with the next word)
1572                                                 strcpy(ph_digits, digit_lookup);
1573                                                 found_ordinal = 1;
1574                                                 ph_ordinal[0] = 0;
1575                                         }
1576                                         else
1577                                         {
1578                                                 if(control & 8)
1579                                                 {
1580                                                         // is there a variant form of this number?
1581                                                         sprintf(string,"_%df",units);
1582                                                         found = Lookup(tr, string, ph_digits);
1583                                                 }
1584                                                 if((is_ordinal) && ((tr->langopts.numbers & NUM_SWAP_TENS) == 0))
1585                                                 {
1586                                                         // ordinal
1587                                                         sprintf(string,"_%d%c",units,ord_type);
1588                                                         if((found = Lookup(tr, string, ph_digits)) != 0)
1589                                                         {
1590                                                                 found_ordinal = 1;
1591                                                         }
1592                                                 }
1593                                                 if(found == 0)
1594                                                 {
1595                                                         if((number_control & 1) && (control & 2))
1596                                                         {
1597                                                                 // look for 'e' variant
1598                                                                 sprintf(string,"_%de",units);
1599                                                                 found = Lookup(tr, string, ph_digits);
1600                                                         }
1601                                                         else if(((control & 2) == 0) || ((tr->langopts.numbers & NUM_SWAP_TENS) != 0))
1602                                                         {
1603                                                                 // followed by hundreds or thousands (or tens)
1604                                                                 sprintf(string,"_%da",units);
1605                                                                 found = Lookup(tr, string, ph_digits);
1606                                                         }
1607                                                 }
1608                                                 if(found == 0)
1609                                                 {
1610                                                         sprintf(string,"_%d",units);
1611                                                         Lookup(tr, string, ph_digits);
1612                                                 }
1613                                         }
1614                                 }
1615                         }
1616                 }
1617
1618                 if((is_ordinal) && (found_ordinal == 0) && (ph_ordinal[0] == 0))
1619                 {
1620                         if((value >= 20) && (((value % 10) == 0) || (tr->langopts.numbers & NUM_SWAP_TENS)))
1621                                 Lookup(tr, "_ord20", ph_ordinal);
1622                         if(ph_ordinal[0] == 0)
1623                                 Lookup(tr, "_ord", ph_ordinal);
1624                 }
1625
1626                 if((tr->langopts.numbers & (NUM_SWAP_TENS | NUM_AND_UNITS)) && (ph_tens[0] != 0) && (ph_digits[0] != 0))
1627                 {
1628                         Lookup(tr, "_0and", ph_and);
1629
1630                         if((is_ordinal) && (tr->langopts.numbers2 & NUM2_ORDINAL_NO_AND))
1631                                 ph_and[0] = 0;
1632
1633                         if(tr->langopts.numbers & NUM_SWAP_TENS)
1634                                 sprintf(ph_out,"%s%s%s%s",ph_digits, ph_and, ph_tens, ph_ordinal);
1635                         else
1636                                 sprintf(ph_out,"%s%s%s%s",ph_tens, ph_and, ph_digits, ph_ordinal);
1637                         used_and = 1;
1638                 }
1639                 else
1640                 {
1641                         if(tr->langopts.numbers & NUM_SINGLE_VOWEL)
1642                         {
1643                                 // remove vowel from the end of tens if units starts with a vowel (LANG=Italian)
1644                                 if(((ix = strlen(ph_tens)-1) >= 0) && (ph_digits[0] != 0))
1645                                 {
1646                                         if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS)
1647                                                 next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type;
1648
1649                                         if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL))
1650                                                 ph_tens[ix] = 0;
1651                                 }
1652                         }
1653                         sprintf(ph_out,"%s%s%s",ph_tens, ph_digits, ph_ordinal);
1654                 }
1655         }
1656
1657         if(tr->langopts.numbers & NUM_SINGLE_STRESS_L)
1658         {
1659                 // only one primary stress, on the first part (tens)
1660                 found = 0;
1661                 for(ix=0; ix < (signed)strlen(ph_out); ix++)
1662                 {
1663                         if(ph_out[ix] == phonSTRESS_P)
1664                         {
1665                                 if(found)
1666                                         ph_out[ix] = phonSTRESS_3;
1667                                 else
1668                                         found = 1;
1669                         }
1670                 }
1671         }
1672         else if(tr->langopts.numbers & NUM_SINGLE_STRESS)
1673         {
1674                 // only one primary stress
1675                 found = 0;
1676                 for(ix=strlen(ph_out)-1; ix>=0; ix--)
1677                 {
1678                         if(ph_out[ix] == phonSTRESS_P)
1679                         {
1680                                 if(found)
1681                                         ph_out[ix] = phonSTRESS_3;
1682                                 else
1683                                         found = 1;
1684                         }
1685                 }
1686         }
1687         return(used_and);
1688 }  // end of LookupNum2
1689
1690
1691 static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null, int thousandplex, int control)
1692 {//=============================================================================================================
1693 // Translate a 3 digit number
1694 //  control  bit 0,  previous thousands
1695 //           bit 1,  ordinal number
1696 //           bit 5   variant form of ordinal number
1697 //           bit 8   followed by decimal fraction
1698         int found;
1699         int hundreds;
1700         int tensunits;
1701         int x;
1702         int ix;
1703         int exact;
1704         int ordinal;
1705         int tplex;
1706         int say_zero_hundred=0;
1707         char string[12];  // for looking up entries in **_list
1708         char buf1[100];
1709         char buf2[100];
1710         char ph_100[20];
1711         char ph_10T[20];
1712         char ph_digits[50];
1713         char ph_thousands[50];
1714         char ph_hundred_and[12];
1715         char ph_thousand_and[12];
1716
1717         ordinal = control & 0x22;
1718         hundreds = value / 100;
1719         tensunits = value % 100;
1720         buf1[0] = 0;
1721
1722         ph_thousands[0] = 0;
1723         ph_thousand_and[0] = 0;
1724
1725         if((tr->langopts.numbers & NUM_ZERO_HUNDRED) && ((control & 1) || (hundreds >= 10)))
1726         {
1727                 say_zero_hundred = 1;  // lang=vi
1728         }
1729
1730         if((hundreds > 0) || say_zero_hundred)
1731         {
1732                 found = 0;
1733                 if(ordinal && (tensunits == 0))
1734                 {
1735                         // ordinal number, with no tens or units
1736                         found = Lookup(tr, "_0Co", ph_100);
1737                 }
1738                 if(found == 0)
1739                 {
1740                         if(tensunits==0)
1741                         {
1742                                 // special form for exact hundreds?
1743                                 found = Lookup(tr, "_0C0", ph_100);
1744                         }
1745                         if(!found)
1746                         {
1747                                 Lookup(tr, "_0C", ph_100);
1748                         }
1749                 }
1750
1751                 if(((tr->langopts.numbers & NUM_1900) != 0) && (hundreds == 19))
1752                 {
1753                         // speak numbers such as 1984 as years: nineteen-eighty-four
1754 //                      ph_100[0] = 0;   // don't say "hundred", we also need to surpess "and"
1755                 }
1756                 else if(hundreds >= 10)
1757                 {
1758                         ph_digits[0] = 0;
1759
1760                         exact = 0;
1761                         if ((value % 1000) == 0)
1762                                 exact = 1;
1763
1764                         tplex = thousandplex+1;
1765                         if(tr->langopts.numbers2 & NUM2_MYRIADS)
1766                         {
1767                                 tplex = 0;
1768                         }
1769
1770                         if(LookupThousands(tr, hundreds / 10, tplex, exact | ordinal, ph_10T) == 0)
1771                         {
1772                                 x = 0;
1773                                 if(tr->langopts.numbers2 & (1 << tplex))
1774                                         x = 8;   // use variant (feminine) for before thousands and millions
1775                                 LookupNum2(tr, hundreds/10, x, ph_digits);
1776                         }
1777
1778                         if(tr->langopts.numbers2 & 0x200)
1779                                 sprintf(ph_thousands,"%s%c%s%c",ph_10T,phonEND_WORD,ph_digits,phonEND_WORD);  // say "thousands" before its number, not after
1780                         else
1781                                 sprintf(ph_thousands,"%s%c%s%c",ph_digits,phonEND_WORD,ph_10T,phonEND_WORD);
1782
1783                         hundreds %= 10;
1784                         if((hundreds == 0) && (say_zero_hundred == 0))
1785                                 ph_100[0] = 0;
1786                         suppress_null = 1;
1787                 }
1788
1789                 ph_digits[0] = 0;
1790
1791                 if((hundreds > 0) || say_zero_hundred)
1792                 {
1793                         if((tr->langopts.numbers & NUM_AND_HUNDRED) && ((control & 1) || (ph_thousands[0] != 0)))
1794                         {
1795                                 Lookup(tr, "_0and", ph_thousand_and);
1796                         }
1797
1798                         suppress_null = 1;
1799
1800                         found = 0;
1801                         if((ordinal)
1802                                         && ((tensunits == 0) || (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL)))
1803                         {
1804                                 // ordinal number
1805                                 sprintf(string, "_%dCo", hundreds);
1806                                 found = Lookup(tr, string, ph_digits);
1807
1808                                 if((tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL) && (tensunits > 0))
1809                                 {
1810                                         // Use ordinal form of hundreds, as well as for tens and units
1811                                         // Add ordinal suffix to the hundreds
1812                                         strcat(ph_digits, ph_ordinal2);
1813                                 }
1814                         }
1815
1816                         if((hundreds == 0) && say_zero_hundred)
1817                         {
1818                                 Lookup(tr, "_0", ph_digits);
1819                         }
1820                         else
1821                         {
1822                                 if((!found) && (tensunits == 0))
1823                                 {
1824                                         // is there a special pronunciation for exactly n00 ?
1825                                         sprintf(string,"_%dC0",hundreds);
1826                                         found = Lookup(tr, string, ph_digits);
1827                                 }
1828
1829                                 if(!found)
1830                                 {
1831                                         sprintf(string,"_%dC",hundreds);
1832                                         found = Lookup(tr, string, ph_digits);  // is there a specific pronunciation for n-hundred ?
1833                                 }
1834
1835                                 if(found)
1836                                 {
1837                                         ph_100[0] = 0;
1838                                 }
1839                                 else
1840                                 {
1841                                         if((hundreds > 1) || ((tr->langopts.numbers & NUM_OMIT_1_HUNDRED) == 0))
1842                                         {
1843                                                 LookupNum2(tr, hundreds, 0, ph_digits);
1844                                         }
1845                                 }
1846                         }
1847                 }
1848
1849                 sprintf(buf1,"%s%s%s%s",ph_thousands,ph_thousand_and,ph_digits,ph_100);
1850         }
1851
1852         ph_hundred_and[0] = 0;
1853         if(tensunits > 0)
1854         {
1855                 if((control & 2) && (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL))
1856                 {
1857                         // Don't use "and" if we apply ordinal to both hundreds and units
1858                 }
1859                 else
1860                 {
1861                         if((value > 100) || ((control & 1) && (thousandplex==0)))
1862                         {
1863                                 if((tr->langopts.numbers & NUM_HUNDRED_AND) || ((tr->langopts.numbers & NUM_HUNDRED_AND_DIGIT) && (tensunits < 10)))
1864                                 {
1865                                         Lookup(tr, "_0and", ph_hundred_and);
1866                                 }
1867                         }
1868                         if((tr->langopts.numbers & NUM_THOUSAND_AND) && (hundreds == 0) && ((control & 1) || (ph_thousands[0] != 0)))
1869                         {
1870                                 Lookup(tr, "_0and", ph_hundred_and);
1871                         }
1872                 }
1873         }
1874
1875
1876         buf2[0] = 0;
1877
1878         if((tensunits != 0) || (suppress_null == 0))
1879         {
1880                 x = 0;
1881                 if(thousandplex==0)
1882                 {
1883                         x = 2;   // allow "eins" for 1 rather than "ein"
1884                         if(ordinal)
1885                                 x = 3;   // ordinal number
1886                         if((value < 100) && !(control & 1))
1887                                 x |= 4;   // tens and units only, no higher digits
1888                         if(ordinal & 0x20)
1889                                 x |= 0x20;  // variant form of ordinal number
1890                 }
1891                 else
1892                 {
1893                         if(tr->langopts.numbers2 & (1 << thousandplex))
1894                                 x = 8;   // use variant (feminine) for before thousands and millions
1895                 }
1896
1897                 if(LookupNum2(tr, tensunits, x | (control & 0x100), buf2) != 0)
1898                 {
1899                         if(tr->langopts.numbers & NUM_SINGLE_AND)
1900                                 ph_hundred_and[0] = 0;  // don't put 'and' after 'hundred' if there's 'and' between tens and units
1901                 }
1902         }
1903         else
1904         {
1905                 if(ph_ordinal2[0] != 0)
1906                 {
1907                         ix = strlen(buf1);
1908                         if((ix > 0) && (buf1[ix-1] == phonPAUSE_SHORT))
1909                                 buf1[ix-1] = 0;   // remove pause before addding ordinal suffix
1910                         strcpy(buf2, ph_ordinal2);
1911                 }
1912         }
1913
1914         sprintf(ph_out,"%s%s%c%s",buf1,ph_hundred_and,phonEND_WORD,buf2);
1915
1916         return(0);
1917 }  // end of LookupNum3
1918
1919
1920 bool CheckThousandsGroup(char *word, int group_len)
1921 {//================================================
1922 // Is this a group of 3 digits which looks like a thousands group?
1923         int ix;
1924
1925         if(IsDigit09(word[group_len]) || IsDigit09(-1))
1926                 return(false);
1927
1928         for(ix=0; ix < group_len; ix++)
1929         {
1930                 if(!IsDigit09(word[ix]))
1931                         return(false);
1932         }
1933         return(true);
1934 }
1935
1936
1937 static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control)
1938 {//=====================================================================================================================
1939 //  Number translation with various options
1940 // the "word" may be up to 4 digits
1941 // "words" of 3 digits may be preceded by another number "word" for thousands or millions
1942
1943         int n_digits;
1944         int value;
1945         int ix;
1946         int digix;
1947         unsigned char c;
1948         int suppress_null = 0;
1949         int decimal_point = 0;
1950         int thousandplex = 0;
1951         int thousands_exact = 1;
1952         int thousands_inc = 0;
1953         int prev_thousands = 0;
1954         int ordinal = 0;
1955         int this_value;
1956         int decimal_count;
1957         int max_decimal_count;
1958         int decimal_mode;
1959         int suffix_ix;
1960         int skipwords = 0;
1961         int group_len;
1962         int len;
1963         char *p;
1964         char string[32];  // for looking up entries in **_list
1965         char buf1[100];
1966         char ph_append[50];
1967         char ph_buf[200];
1968         char ph_buf2[50];
1969         char ph_zeros[50];
1970         char suffix[30];  // string[] must be long enough for sizeof(suffix)+2
1971         char buf_digit_lookup[50];
1972
1973         static const char str_pause[2] = {phonPAUSE_NOLINK,0};
1974
1975         *flags = 0;
1976         n_digit_lookup = 0;
1977         buf_digit_lookup[0] = 0;
1978         digit_lookup = buf_digit_lookup;
1979         number_control = control;
1980
1981         for(ix=0; IsDigit09(word[ix]); ix++) ;
1982         n_digits = ix;
1983         value = this_value = atoi(word);
1984
1985         group_len = 3;
1986         if(tr->langopts.numbers2 & NUM2_MYRIADS)
1987                 group_len = 4;
1988
1989         // is there a previous thousands part (as a previous "word") ?
1990         if((n_digits == group_len) && (word[-2] == tr->langopts.thousands_sep) && IsDigit09(word[-3]))
1991         {
1992                 prev_thousands = 1;
1993         }
1994         else if((tr->langopts.thousands_sep == ' ') || (tr->langopts.numbers & NUM_ALLOW_SPACE))
1995         {
1996                 // thousands groups can be separated by spaces
1997                 if((n_digits == 3) && !(wtab->flags & FLAG_MULTIPLE_SPACES) && IsDigit09(word[-2]))
1998                 {
1999                         prev_thousands = 1;
2000                 }
2001         }
2002         if(prev_thousands == 0)
2003         {
2004                 speak_missing_thousands = 0;
2005         }
2006
2007         ph_ordinal2[0] = 0;
2008         ph_zeros[0] = 0;
2009
2010         if(prev_thousands || (word[0] != '0'))
2011         {
2012                 // don't check for ordinal if the number has a leading zero
2013                 if((ordinal = CheckDotOrdinal(tr, word, &word[ix], wtab, 0)) != 0)
2014                 {
2015 //                      dot_ordinal = 1;
2016                 }
2017         }
2018
2019         if((word[ix] == '.') && !IsDigit09(word[ix+1]) && !IsDigit09(word[ix+2]) && !(wtab[1].flags & FLAG_NOSPACE))
2020         {
2021                 // remove dot unless followed by another number
2022                 word[ix] = 0;
2023         }
2024
2025         if((ordinal == 0) || (tr->translator_name == L('h','u')))
2026         {
2027 // NOTE lang=hu, allow both dot and ordinal suffix, eg. "december 21.-én"
2028                 // look for an ordinal number suffix after the number
2029                 ix++;
2030                 p = suffix;
2031                 if(wtab[0].flags & FLAG_HYPHEN_AFTER)
2032                 {
2033                         *p++ = '-';
2034                         ix++;
2035                 }
2036                 while((word[ix] != 0) && (word[ix] != ' ') && (ix < (int)(sizeof(suffix)-1)))
2037                 {
2038                         *p++ = word[ix++];
2039                 }
2040                 *p = 0;
2041
2042                 if(suffix[0] != 0)
2043                 {
2044                         if((tr->langopts.ordinal_indicator != NULL) && (strcmp(suffix, tr->langopts.ordinal_indicator) == 0))
2045                         {
2046                                 ordinal = 2;
2047                         }
2048                         else if(!IsDigit09(suffix[0]))  // not _#9 (tab)
2049                         {
2050                                 sprintf(string,"_#%s",suffix);
2051                                 if(Lookup(tr, string, ph_ordinal2))
2052                                 {
2053                                         // this is an ordinal suffix
2054                                         ordinal = 2;
2055                                         flags[0] |= FLAG_SKIPWORDS;
2056                                         skipwords = 1;
2057                                         sprintf(string,"_x#%s",suffix);
2058                                         Lookup(tr, string, ph_ordinal2x);  // is there an alternate pronunciation?
2059                                 }
2060                         }
2061                 }
2062         }
2063
2064         if(wtab[0].flags & FLAG_ORDINAL)
2065                 ordinal = 2;
2066
2067         ph_append[0] = 0;
2068         ph_buf2[0] = 0;
2069
2070
2071         if((word[0] == '0') && (prev_thousands == 0) && (word[1] != ' ') && (word[1] != tr->langopts.decimal_sep))
2072         {
2073                 if((n_digits == 2) && (word[3] == ':') && IsDigit09(word[5]) && isspace(word[7]))
2074                 {
2075                         // looks like a time 02:30, omit the leading zero
2076                 }
2077                 else
2078                 {
2079                         if(n_digits > 3)
2080                         {
2081                                 flags[0] &= ~FLAG_SKIPWORDS;
2082                                 return(0);     // long number string with leading zero, speak as individual digits
2083                         }
2084
2085                         // speak leading zeros
2086                         for(ix=0; (word[ix] == '0') && (ix < (n_digits-1)); ix++)
2087                         {
2088                                 Lookup(tr, "_0", &ph_zeros[strlen(ph_zeros)]);
2089                         }
2090                 }
2091         }
2092
2093         if((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[n_digits] == ' '))
2094                 thousands_inc = 1;
2095         else if(word[n_digits] == tr->langopts.thousands_sep)
2096                 thousands_inc = 2;
2097
2098         suffix_ix = n_digits+2;
2099         if(thousands_inc > 0)
2100         {
2101                 // if the following "words" are three-digit groups, count them and add
2102                 // a "thousand"/"million" suffix to this one
2103                 digix = n_digits + thousands_inc;
2104
2105                 while(((wtab[thousandplex+1].flags & FLAG_MULTIPLE_SPACES) == 0) && CheckThousandsGroup(&word[digix], group_len))
2106                 {
2107                         for(ix=0; ix<group_len; ix++)
2108                         {
2109                                 if(word[digix+ix] != '0')
2110                                 {
2111                                         thousands_exact = 0;
2112                                         break;
2113                                 }
2114                         }
2115
2116                         thousandplex++;
2117                         digix += group_len;
2118                         if((word[digix] == tr->langopts.thousands_sep) || ((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[digix] == ' ')))
2119                         {
2120                                 suffix_ix = digix+2;
2121                                 digix += thousands_inc;
2122                         }
2123                         else
2124                                 break;
2125                 }
2126         }
2127
2128         if((value == 0) && prev_thousands)
2129         {
2130                 suppress_null = 1;
2131         }
2132
2133         if(tr->translator_name == L('h','u'))
2134         {
2135                 // variant form of numbers when followed by hyphen and a suffix starting with 'a' or 'e' (but not a, e, az, ez, azt, ezt
2136                 if((wtab[thousandplex].flags & FLAG_HYPHEN_AFTER) && (thousands_exact==1) && hu_number_e(&word[suffix_ix], thousandplex, value))
2137                 {
2138                         number_control |= 1;  // use _1e variant of number
2139                 }
2140         }
2141
2142         if((word[n_digits] == tr->langopts.decimal_sep) && IsDigit09(word[n_digits+1]))
2143         {
2144                 // this "word" ends with a decimal point
2145                 Lookup(tr, "_dpt", ph_append);
2146                 decimal_point = 0x100;
2147         }
2148         else if(suppress_null == 0)
2149         {
2150                 if(thousands_inc > 0)
2151                 {
2152                         if(thousandplex > 0)
2153 //                      if((thousandplex > 0) && (value < 1000))
2154                         {
2155                                 if((suppress_null == 0) && (LookupThousands(tr,value,thousandplex, thousands_exact, ph_append)))
2156                                 {
2157                                         // found an exact match for N thousand
2158                                         value = 0;
2159                                         suppress_null = 1;
2160                                 }
2161                         }
2162                 }
2163         }
2164         else
2165
2166                 if(speak_missing_thousands == 1)
2167                 {
2168                         // speak this thousandplex if there was no word for the previous thousandplex
2169                         sprintf(string,"_0M%d",thousandplex+1);
2170                         if(Lookup(tr, string, buf1)==0)
2171                         {
2172                                 sprintf(string,"_0M%d",thousandplex);
2173                                 Lookup(tr, string, ph_append);
2174                         }
2175                 }
2176
2177         if((ph_append[0] == 0) && (word[n_digits] == '.') && (thousandplex == 0))
2178         {
2179                 Lookup(tr, "_.", ph_append);
2180         }
2181
2182         if(thousandplex == 0)
2183         {
2184                 char *p2;
2185                 // look for combinations of the number with the next word
2186                 p = word;
2187                 while(IsDigit09(p[1])) p++;  // just use the last digit
2188                 if(IsDigit09(p[-1]))
2189                 {
2190                         p2 = p - 1;
2191                         if(LookupDictList(tr, &p2, buf_digit_lookup, flags, FLAG_SUFX, wtab))  // lookup 2 digits
2192                         {
2193                                 n_digit_lookup = 2;
2194                         }
2195                 }
2196
2197 //              if((buf_digit_lookup[0] == 0) && (*p != '0') && (dot_ordinal==0))
2198                 if((buf_digit_lookup[0] == 0) && (*p != '0'))
2199                 {
2200                         // LANG=hu ?
2201                         // not found, lookup only the last digit (?? but not if dot-ordinal has been found)
2202                         if(LookupDictList(tr, &p, buf_digit_lookup, flags, FLAG_SUFX, wtab))  // don't match '0', or entries with $only
2203                         {
2204                                 n_digit_lookup = 1;
2205                         }
2206                 }
2207
2208                 if(prev_thousands == 0)
2209                 {
2210                         if((decimal_point == 0) && (ordinal == 0))
2211                         {
2212                                 // Look for special pronunciation for this number in isolation (LANG=kl)
2213                                 sprintf(string, "_%dn", value);
2214                                 if(Lookup(tr, string, ph_out))
2215                                 {
2216                                         return(1);
2217                                 }
2218                         }
2219
2220                         if(tr->langopts.numbers2 & NUM2_PERCENT_BEFORE)
2221                         {
2222                                 // LANG=si, say "percent" before the number
2223                                 p2 = word;
2224                                 while((*p2 != ' ') && (*p2 != 0))
2225                                 {
2226                                         p2++;
2227                                 }
2228                                 if(p2[1] == '%')
2229                                 {
2230                                         Lookup(tr, "%", ph_out);
2231                                         ph_out += strlen(ph_out);
2232                                         p2[1] = ' ';
2233                                 }
2234                         }
2235                 }
2236
2237         }
2238
2239         LookupNum3(tr, value, ph_buf, suppress_null, thousandplex, prev_thousands | ordinal | decimal_point);
2240         if((thousandplex > 0) && (tr->langopts.numbers2 & 0x200))
2241                 sprintf(ph_out,"%s%s%c%s%s",ph_zeros,ph_append,phonEND_WORD,ph_buf2,ph_buf);  // say "thousands" before its number
2242         else
2243                 sprintf(ph_out,"%s%s%s%c%s",ph_zeros,ph_buf2,ph_buf,phonEND_WORD,ph_append);
2244
2245
2246         while(decimal_point)
2247         {
2248                 n_digits++;
2249
2250                 decimal_count = 0;
2251                 while(IsDigit09(word[n_digits+decimal_count]))
2252                         decimal_count++;
2253
2254 //              if(decimal_count > 1)
2255                 {
2256                         max_decimal_count = 2;
2257                         switch(decimal_mode = (tr->langopts.numbers & 0xe000))
2258                         {
2259                         case NUM_DFRACTION_4:
2260                                 max_decimal_count = 5;
2261                         case NUM_DFRACTION_2:
2262                                 // French/Polish decimal fraction
2263                                 while(word[n_digits] == '0')
2264                                 {
2265                                         Lookup(tr, "_0", buf1);
2266                                         strcat(ph_out,buf1);
2267                                         decimal_count--;
2268                                         n_digits++;
2269                                 }
2270                                 if((decimal_count <= max_decimal_count) && IsDigit09(word[n_digits]))
2271                                 {
2272                                         LookupNum3(tr, atoi(&word[n_digits]), buf1, 0,0,0);
2273                                         strcat(ph_out,buf1);
2274                                         n_digits += decimal_count;
2275                                 }
2276                                 break;
2277
2278                         case NUM_DFRACTION_1:   // italian, say "hundredths" if leading zero
2279                         case NUM_DFRACTION_5:   // hungarian, always say "tenths" etc.
2280                         case NUM_DFRACTION_6:   // kazakh, always say "tenths" etc, before the decimal fraction
2281                                 LookupNum3(tr, atoi(&word[n_digits]), ph_buf, 0,0,0);
2282                                 if((word[n_digits]=='0') || (decimal_mode != NUM_DFRACTION_1))
2283                                 {
2284                                         // decimal part has leading zeros, so add a "hundredths" or "thousandths" suffix
2285                                         sprintf(string,"_0Z%d",decimal_count);
2286                                         if(Lookup(tr, string, buf1) == 0)
2287                                                 break;   // revert to speaking single digits
2288
2289                                         if(decimal_mode == NUM_DFRACTION_6)
2290                                                 strcat(ph_out, buf1);
2291                                         else
2292                                                 strcat(ph_buf, buf1);
2293                                 }
2294                                 strcat(ph_out,ph_buf);
2295                                 n_digits += decimal_count;
2296                                 break;
2297
2298                         case NUM_DFRACTION_3:
2299                                 // Romanian decimal fractions
2300                                 if((decimal_count <= 4) && (word[n_digits] != '0'))
2301                                 {
2302                                         LookupNum3(tr, atoi(&word[n_digits]), buf1, 0,0,0);
2303                                         strcat(ph_out,buf1);
2304                                         n_digits += decimal_count;
2305                                 }
2306                                 break;
2307
2308                         case NUM_DFRACTION_7:
2309                                 // alternative form of decimal fraction digits, except the final digit
2310                                 while(decimal_count-- > 1)
2311                                 {
2312                                         sprintf(string,"_%cd", word[n_digits]);
2313                                         if(Lookup(tr, string, buf1) == 0)
2314                                                 break;
2315                                         n_digits++;
2316                                         strcat(ph_out, buf1);
2317                                 }
2318                         }
2319                 }
2320
2321                 while(IsDigit09(c = word[n_digits]) && (strlen(ph_out) < (N_WORD_PHONEMES - 10)))
2322                 {
2323                         // speak any remaining decimal fraction digits individually
2324                         value = word[n_digits++] - '0';
2325                         LookupNum2(tr, value, 2, buf1);
2326                         len = strlen(ph_out);
2327                         sprintf(&ph_out[len],"%c%s", phonEND_WORD, buf1);
2328                 }
2329
2330                 // something after the decimal part ?
2331                 if(Lookup(tr, "_dpt2", buf1))
2332                         strcat(ph_out,buf1);
2333
2334                 if((c == tr->langopts.decimal_sep) && IsDigit09(word[n_digits+1]))
2335                 {
2336                         Lookup(tr, "_dpt", buf1);
2337                         strcat(ph_out,buf1);
2338                 }
2339                 else
2340                 {
2341                         decimal_point = 0;
2342                 }
2343         }
2344         if((ph_out[0] != 0) && (ph_out[0] != phonSWITCH))
2345         {
2346                 int next_char;
2347                 char *p;
2348                 p = &word[n_digits+1];
2349
2350                 p += utf8_in(&next_char,p);
2351                 if((tr->langopts.numbers & NUM_NOPAUSE) && (next_char == ' '))
2352                         utf8_in(&next_char,p);
2353
2354                 if(!iswalpha2(next_char) && (thousands_exact==0))
2355 //              if(!iswalpha2(next_char) && !((wtab[thousandplex].flags & FLAG_HYPHEN_AFTER) && (thousands_exact != 0)))
2356                         strcat(ph_out,str_pause);  // don't add pause for 100s,  6th, etc.
2357         }
2358
2359         *flags |= FLAG_FOUND;
2360         speak_missing_thousands--;
2361
2362         if(skipwords)
2363                 dictionary_skipwords = skipwords;
2364         return(1);
2365 }  // end of TranslateNumber_1
2366
2367
2368
2369 int TranslateNumber(Translator *tr, char *word1, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control)
2370 {//=============================================================================================================
2371         if((option_sayas == SAYAS_DIGITS1) || (wtab[0].flags & FLAG_INDIVIDUAL_DIGITS))
2372                 return(0);  // speak digits individually
2373
2374         if(tr->langopts.numbers != 0)
2375         {
2376                 return(TranslateNumber_1(tr, word1, ph_out, flags, wtab, control));
2377         }
2378         return(0);
2379 }  // end of TranslateNumber
2380