1 /************************************************************************
\r
2 * $Id: minibidi.c 9169 2011-05-07 10:57:19Z simon $
\r
7 * This is an implemention of Unicode's Bidirectional Algorithm
\r
10 * http://www.unicode.org/reports/tr9/
\r
12 * Author: Ahmad Khalifa
\r
15 * Revision Details: (Updated by Revision Control System)
\r
17 * $Date: 2011-05-07 11:57:19 +0100 (Sat, 07 May 2011) $
\r
21 * (www.arabeyes.org - under MIT license)
\r
23 ************************************************************************/
\r
28 * - Explicit marks need to be handled (they are not 100% now)
\r
32 #include <stdlib.h> /* definition of wchar_t*/
\r
36 #define LMASK 0x3F /* Embedding Level mask */
\r
37 #define OMASK 0xC0 /* Override mask */
\r
38 #define OISL 0x80 /* Override is L */
\r
39 #define OISR 0x40 /* Override is R */
\r
41 /* For standalone compilation in a testing mode.
\r
42 * Still depends on the PuTTY headers for snewn and sfree, but can avoid
\r
43 * _linking_ with any other PuTTY code. */
\r
45 #define safemalloc malloc
\r
46 #define safefree free
\r
49 /* Shaping Helpers */
\r
50 #define STYPE(xh) ((((xh) >= SHAPE_FIRST) && ((xh) <= SHAPE_LAST)) ? \
\r
51 shapetypes[(xh)-SHAPE_FIRST].type : SU) /*))*/
\r
52 #define SISOLATED(xh) (shapetypes[(xh)-SHAPE_FIRST].form_b)
\r
53 #define SFINAL(xh) ((xh)+1)
\r
54 #define SINITIAL(xh) ((xh)+2)
\r
55 #define SMEDIAL(ch) ((ch)+3)
\r
57 #define leastGreaterOdd(x) ( ((x)+1) | 1 )
\r
58 #define leastGreaterEven(x) ( ((x)+2) &~ 1 )
\r
60 typedef struct bidi_char {
\r
62 unsigned short index;
\r
65 /* function declarations */
\r
66 void flipThisRun(bidi_char *from, unsigned char* level, int max, int count);
\r
67 int findIndexOfRun(unsigned char* level , int start, int count, int tlevel);
\r
68 unsigned char getType(int ch);
\r
69 unsigned char setOverrideBits(unsigned char level, unsigned char override);
\r
70 int getPreviousLevel(unsigned char* level, int from);
\r
71 int do_shape(bidi_char *line, bidi_char *to, int count);
\r
72 int do_bidi(bidi_char *line, int count);
\r
73 void doMirror(wchar_t* ch);
\r
75 /* character types */
\r
100 SL, /* Left-Joining, doesnt exist in U+0600 - U+06FF */
\r
101 SR, /* Right-Joining, ie has Isolated, Final */
\r
102 SD, /* Dual-Joining, ie has Isolated, Final, Initial, Medial */
\r
103 SU, /* Non-Joining */
\r
104 SC /* Join-Causing, like U+0640 (TATWEEL) */
\r
112 /* Kept near the actual table, for verification. */
\r
113 #define SHAPE_FIRST 0x621
\r
114 #define SHAPE_LAST (SHAPE_FIRST + lenof(shapetypes) - 1)
\r
116 const shape_node shapetypes[] = {
\r
117 /* index, Typ, Iso, Ligature Index*/
\r
118 /* 621 */ {SU, 0xFE80},
\r
119 /* 622 */ {SR, 0xFE81},
\r
120 /* 623 */ {SR, 0xFE83},
\r
121 /* 624 */ {SR, 0xFE85},
\r
122 /* 625 */ {SR, 0xFE87},
\r
123 /* 626 */ {SD, 0xFE89},
\r
124 /* 627 */ {SR, 0xFE8D},
\r
125 /* 628 */ {SD, 0xFE8F},
\r
126 /* 629 */ {SR, 0xFE93},
\r
127 /* 62A */ {SD, 0xFE95},
\r
128 /* 62B */ {SD, 0xFE99},
\r
129 /* 62C */ {SD, 0xFE9D},
\r
130 /* 62D */ {SD, 0xFEA1},
\r
131 /* 62E */ {SD, 0xFEA5},
\r
132 /* 62F */ {SR, 0xFEA9},
\r
133 /* 630 */ {SR, 0xFEAB},
\r
134 /* 631 */ {SR, 0xFEAD},
\r
135 /* 632 */ {SR, 0xFEAF},
\r
136 /* 633 */ {SD, 0xFEB1},
\r
137 /* 634 */ {SD, 0xFEB5},
\r
138 /* 635 */ {SD, 0xFEB9},
\r
139 /* 636 */ {SD, 0xFEBD},
\r
140 /* 637 */ {SD, 0xFEC1},
\r
141 /* 638 */ {SD, 0xFEC5},
\r
142 /* 639 */ {SD, 0xFEC9},
\r
143 /* 63A */ {SD, 0xFECD},
\r
144 /* 63B */ {SU, 0x0},
\r
145 /* 63C */ {SU, 0x0},
\r
146 /* 63D */ {SU, 0x0},
\r
147 /* 63E */ {SU, 0x0},
\r
148 /* 63F */ {SU, 0x0},
\r
149 /* 640 */ {SC, 0x0},
\r
150 /* 641 */ {SD, 0xFED1},
\r
151 /* 642 */ {SD, 0xFED5},
\r
152 /* 643 */ {SD, 0xFED9},
\r
153 /* 644 */ {SD, 0xFEDD},
\r
154 /* 645 */ {SD, 0xFEE1},
\r
155 /* 646 */ {SD, 0xFEE5},
\r
156 /* 647 */ {SD, 0xFEE9},
\r
157 /* 648 */ {SR, 0xFEED},
\r
158 /* 649 */ {SR, 0xFEEF}, /* SD */
\r
159 /* 64A */ {SD, 0xFEF1},
\r
160 /* 64B */ {SU, 0x0},
\r
161 /* 64C */ {SU, 0x0},
\r
162 /* 64D */ {SU, 0x0},
\r
163 /* 64E */ {SU, 0x0},
\r
164 /* 64F */ {SU, 0x0},
\r
165 /* 650 */ {SU, 0x0},
\r
166 /* 651 */ {SU, 0x0},
\r
167 /* 652 */ {SU, 0x0},
\r
168 /* 653 */ {SU, 0x0},
\r
169 /* 654 */ {SU, 0x0},
\r
170 /* 655 */ {SU, 0x0},
\r
171 /* 656 */ {SU, 0x0},
\r
172 /* 657 */ {SU, 0x0},
\r
173 /* 658 */ {SU, 0x0},
\r
174 /* 659 */ {SU, 0x0},
\r
175 /* 65A */ {SU, 0x0},
\r
176 /* 65B */ {SU, 0x0},
\r
177 /* 65C */ {SU, 0x0},
\r
178 /* 65D */ {SU, 0x0},
\r
179 /* 65E */ {SU, 0x0},
\r
180 /* 65F */ {SU, 0x0},
\r
181 /* 660 */ {SU, 0x0},
\r
182 /* 661 */ {SU, 0x0},
\r
183 /* 662 */ {SU, 0x0},
\r
184 /* 663 */ {SU, 0x0},
\r
185 /* 664 */ {SU, 0x0},
\r
186 /* 665 */ {SU, 0x0},
\r
187 /* 666 */ {SU, 0x0},
\r
188 /* 667 */ {SU, 0x0},
\r
189 /* 668 */ {SU, 0x0},
\r
190 /* 669 */ {SU, 0x0},
\r
191 /* 66A */ {SU, 0x0},
\r
192 /* 66B */ {SU, 0x0},
\r
193 /* 66C */ {SU, 0x0},
\r
194 /* 66D */ {SU, 0x0},
\r
195 /* 66E */ {SU, 0x0},
\r
196 /* 66F */ {SU, 0x0},
\r
197 /* 670 */ {SU, 0x0},
\r
198 /* 671 */ {SR, 0xFB50},
\r
199 /* 672 */ {SU, 0x0},
\r
200 /* 673 */ {SU, 0x0},
\r
201 /* 674 */ {SU, 0x0},
\r
202 /* 675 */ {SU, 0x0},
\r
203 /* 676 */ {SU, 0x0},
\r
204 /* 677 */ {SU, 0x0},
\r
205 /* 678 */ {SU, 0x0},
\r
206 /* 679 */ {SD, 0xFB66},
\r
207 /* 67A */ {SD, 0xFB5E},
\r
208 /* 67B */ {SD, 0xFB52},
\r
209 /* 67C */ {SU, 0x0},
\r
210 /* 67D */ {SU, 0x0},
\r
211 /* 67E */ {SD, 0xFB56},
\r
212 /* 67F */ {SD, 0xFB62},
\r
213 /* 680 */ {SD, 0xFB5A},
\r
214 /* 681 */ {SU, 0x0},
\r
215 /* 682 */ {SU, 0x0},
\r
216 /* 683 */ {SD, 0xFB76},
\r
217 /* 684 */ {SD, 0xFB72},
\r
218 /* 685 */ {SU, 0x0},
\r
219 /* 686 */ {SD, 0xFB7A},
\r
220 /* 687 */ {SD, 0xFB7E},
\r
221 /* 688 */ {SR, 0xFB88},
\r
222 /* 689 */ {SU, 0x0},
\r
223 /* 68A */ {SU, 0x0},
\r
224 /* 68B */ {SU, 0x0},
\r
225 /* 68C */ {SR, 0xFB84},
\r
226 /* 68D */ {SR, 0xFB82},
\r
227 /* 68E */ {SR, 0xFB86},
\r
228 /* 68F */ {SU, 0x0},
\r
229 /* 690 */ {SU, 0x0},
\r
230 /* 691 */ {SR, 0xFB8C},
\r
231 /* 692 */ {SU, 0x0},
\r
232 /* 693 */ {SU, 0x0},
\r
233 /* 694 */ {SU, 0x0},
\r
234 /* 695 */ {SU, 0x0},
\r
235 /* 696 */ {SU, 0x0},
\r
236 /* 697 */ {SU, 0x0},
\r
237 /* 698 */ {SR, 0xFB8A},
\r
238 /* 699 */ {SU, 0x0},
\r
239 /* 69A */ {SU, 0x0},
\r
240 /* 69B */ {SU, 0x0},
\r
241 /* 69C */ {SU, 0x0},
\r
242 /* 69D */ {SU, 0x0},
\r
243 /* 69E */ {SU, 0x0},
\r
244 /* 69F */ {SU, 0x0},
\r
245 /* 6A0 */ {SU, 0x0},
\r
246 /* 6A1 */ {SU, 0x0},
\r
247 /* 6A2 */ {SU, 0x0},
\r
248 /* 6A3 */ {SU, 0x0},
\r
249 /* 6A4 */ {SD, 0xFB6A},
\r
250 /* 6A5 */ {SU, 0x0},
\r
251 /* 6A6 */ {SD, 0xFB6E},
\r
252 /* 6A7 */ {SU, 0x0},
\r
253 /* 6A8 */ {SU, 0x0},
\r
254 /* 6A9 */ {SD, 0xFB8E},
\r
255 /* 6AA */ {SU, 0x0},
\r
256 /* 6AB */ {SU, 0x0},
\r
257 /* 6AC */ {SU, 0x0},
\r
258 /* 6AD */ {SD, 0xFBD3},
\r
259 /* 6AE */ {SU, 0x0},
\r
260 /* 6AF */ {SD, 0xFB92},
\r
261 /* 6B0 */ {SU, 0x0},
\r
262 /* 6B1 */ {SD, 0xFB9A},
\r
263 /* 6B2 */ {SU, 0x0},
\r
264 /* 6B3 */ {SD, 0xFB96},
\r
265 /* 6B4 */ {SU, 0x0},
\r
266 /* 6B5 */ {SU, 0x0},
\r
267 /* 6B6 */ {SU, 0x0},
\r
268 /* 6B7 */ {SU, 0x0},
\r
269 /* 6B8 */ {SU, 0x0},
\r
270 /* 6B9 */ {SU, 0x0},
\r
271 /* 6BA */ {SR, 0xFB9E},
\r
272 /* 6BB */ {SD, 0xFBA0},
\r
273 /* 6BC */ {SU, 0x0},
\r
274 /* 6BD */ {SU, 0x0},
\r
275 /* 6BE */ {SD, 0xFBAA},
\r
276 /* 6BF */ {SU, 0x0},
\r
277 /* 6C0 */ {SR, 0xFBA4},
\r
278 /* 6C1 */ {SD, 0xFBA6},
\r
279 /* 6C2 */ {SU, 0x0},
\r
280 /* 6C3 */ {SU, 0x0},
\r
281 /* 6C4 */ {SU, 0x0},
\r
282 /* 6C5 */ {SR, 0xFBE0},
\r
283 /* 6C6 */ {SR, 0xFBD9},
\r
284 /* 6C7 */ {SR, 0xFBD7},
\r
285 /* 6C8 */ {SR, 0xFBDB},
\r
286 /* 6C9 */ {SR, 0xFBE2},
\r
287 /* 6CA */ {SU, 0x0},
\r
288 /* 6CB */ {SR, 0xFBDE},
\r
289 /* 6CC */ {SD, 0xFBFC},
\r
290 /* 6CD */ {SU, 0x0},
\r
291 /* 6CE */ {SU, 0x0},
\r
292 /* 6CF */ {SU, 0x0},
\r
293 /* 6D0 */ {SU, 0x0},
\r
294 /* 6D1 */ {SU, 0x0},
\r
295 /* 6D2 */ {SR, 0xFBAE},
\r
299 * Flips the text buffer, according to max level, and
\r
300 * all higher levels
\r
303 * from: text buffer, on which to apply flipping
\r
304 * level: resolved levels buffer
\r
305 * max: the maximum level found in this line (should be unsigned char)
\r
306 * count: line size in bidi_char
\r
308 void flipThisRun(bidi_char *from, unsigned char *level, int max, int count)
\r
310 int i, j, k, tlevel;
\r
314 while (i<count && j<count) {
\r
316 /* find the start of the run of level=max */
\r
318 i = j = findIndexOfRun(level, i, count, max);
\r
319 /* find the end of the run */
\r
320 while (i<count && tlevel <= level[i]) {
\r
323 for (k = i - 1; k > j; k--, j++) {
\r
332 * Finds the index of a run with level equals tlevel
\r
334 int findIndexOfRun(unsigned char* level , int start, int count, int tlevel)
\r
337 for (i=start; i<count; i++) {
\r
338 if (tlevel == level[i]) {
\r
346 * Returns the bidi character type of ch.
\r
348 * The data table in this function is constructed from the Unicode
\r
349 * Character Database, downloadable from unicode.org at the URL
\r
351 * http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
\r
353 * by the following fragment of Perl:
\r
355 perl -ne 'split ";"; $num = hex $_[0]; $type = $_[4];' \
\r
356 -e '$fl = ($_[1] =~ /First/ ? 1 : $_[1] =~ /Last/ ? 2 : 0);' \
\r
357 -e 'if ($type eq $runtype and ($runend == $num-1 or ' \
\r
358 -e ' ($fl==2 and $pfl==1))) {$runend = $num;} else { &reset; }' \
\r
359 -e '$pfl=$fl; END { &reset }; sub reset {' \
\r
360 -e 'printf" {0x%04x, 0x%04x, %s},\n",$runstart,$runend,$runtype' \
\r
361 -e ' if defined $runstart and $runtype ne "ON";' \
\r
362 -e '$runstart=$runend=$num; $runtype=$type;}' \
\r
366 unsigned char getType(int ch)
\r
368 static const struct {
\r
369 int first, last, type;
\r
371 {0x0000, 0x0008, BN},
\r
372 {0x0009, 0x0009, S},
\r
373 {0x000a, 0x000a, B},
\r
374 {0x000b, 0x000b, S},
\r
375 {0x000c, 0x000c, WS},
\r
376 {0x000d, 0x000d, B},
\r
377 {0x000e, 0x001b, BN},
\r
378 {0x001c, 0x001e, B},
\r
379 {0x001f, 0x001f, S},
\r
380 {0x0020, 0x0020, WS},
\r
381 {0x0023, 0x0025, ET},
\r
382 {0x002b, 0x002b, ES},
\r
383 {0x002c, 0x002c, CS},
\r
384 {0x002d, 0x002d, ES},
\r
385 {0x002e, 0x002f, CS},
\r
386 {0x0030, 0x0039, EN},
\r
387 {0x003a, 0x003a, CS},
\r
388 {0x0041, 0x005a, L},
\r
389 {0x0061, 0x007a, L},
\r
390 {0x007f, 0x0084, BN},
\r
391 {0x0085, 0x0085, B},
\r
392 {0x0086, 0x009f, BN},
\r
393 {0x00a0, 0x00a0, CS},
\r
394 {0x00a2, 0x00a5, ET},
\r
395 {0x00aa, 0x00aa, L},
\r
396 {0x00ad, 0x00ad, BN},
\r
397 {0x00b0, 0x00b1, ET},
\r
398 {0x00b2, 0x00b3, EN},
\r
399 {0x00b5, 0x00b5, L},
\r
400 {0x00b9, 0x00b9, EN},
\r
401 {0x00ba, 0x00ba, L},
\r
402 {0x00c0, 0x00d6, L},
\r
403 {0x00d8, 0x00f6, L},
\r
404 {0x00f8, 0x0236, L},
\r
405 {0x0250, 0x02b8, L},
\r
406 {0x02bb, 0x02c1, L},
\r
407 {0x02d0, 0x02d1, L},
\r
408 {0x02e0, 0x02e4, L},
\r
409 {0x02ee, 0x02ee, L},
\r
410 {0x0300, 0x0357, NSM},
\r
411 {0x035d, 0x036f, NSM},
\r
412 {0x037a, 0x037a, L},
\r
413 {0x0386, 0x0386, L},
\r
414 {0x0388, 0x038a, L},
\r
415 {0x038c, 0x038c, L},
\r
416 {0x038e, 0x03a1, L},
\r
417 {0x03a3, 0x03ce, L},
\r
418 {0x03d0, 0x03f5, L},
\r
419 {0x03f7, 0x03fb, L},
\r
420 {0x0400, 0x0482, L},
\r
421 {0x0483, 0x0486, NSM},
\r
422 {0x0488, 0x0489, NSM},
\r
423 {0x048a, 0x04ce, L},
\r
424 {0x04d0, 0x04f5, L},
\r
425 {0x04f8, 0x04f9, L},
\r
426 {0x0500, 0x050f, L},
\r
427 {0x0531, 0x0556, L},
\r
428 {0x0559, 0x055f, L},
\r
429 {0x0561, 0x0587, L},
\r
430 {0x0589, 0x0589, L},
\r
431 {0x0591, 0x05a1, NSM},
\r
432 {0x05a3, 0x05b9, NSM},
\r
433 {0x05bb, 0x05bd, NSM},
\r
434 {0x05be, 0x05be, R},
\r
435 {0x05bf, 0x05bf, NSM},
\r
436 {0x05c0, 0x05c0, R},
\r
437 {0x05c1, 0x05c2, NSM},
\r
438 {0x05c3, 0x05c3, R},
\r
439 {0x05c4, 0x05c4, NSM},
\r
440 {0x05d0, 0x05ea, R},
\r
441 {0x05f0, 0x05f4, R},
\r
442 {0x0600, 0x0603, AL},
\r
443 {0x060c, 0x060c, CS},
\r
444 {0x060d, 0x060d, AL},
\r
445 {0x0610, 0x0615, NSM},
\r
446 {0x061b, 0x061b, AL},
\r
447 {0x061f, 0x061f, AL},
\r
448 {0x0621, 0x063a, AL},
\r
449 {0x0640, 0x064a, AL},
\r
450 {0x064b, 0x0658, NSM},
\r
451 {0x0660, 0x0669, AN},
\r
452 {0x066a, 0x066a, ET},
\r
453 {0x066b, 0x066c, AN},
\r
454 {0x066d, 0x066f, AL},
\r
455 {0x0670, 0x0670, NSM},
\r
456 {0x0671, 0x06d5, AL},
\r
457 {0x06d6, 0x06dc, NSM},
\r
458 {0x06dd, 0x06dd, AL},
\r
459 {0x06de, 0x06e4, NSM},
\r
460 {0x06e5, 0x06e6, AL},
\r
461 {0x06e7, 0x06e8, NSM},
\r
462 {0x06ea, 0x06ed, NSM},
\r
463 {0x06ee, 0x06ef, AL},
\r
464 {0x06f0, 0x06f9, EN},
\r
465 {0x06fa, 0x070d, AL},
\r
466 {0x070f, 0x070f, BN},
\r
467 {0x0710, 0x0710, AL},
\r
468 {0x0711, 0x0711, NSM},
\r
469 {0x0712, 0x072f, AL},
\r
470 {0x0730, 0x074a, NSM},
\r
471 {0x074d, 0x074f, AL},
\r
472 {0x0780, 0x07a5, AL},
\r
473 {0x07a6, 0x07b0, NSM},
\r
474 {0x07b1, 0x07b1, AL},
\r
475 {0x0901, 0x0902, NSM},
\r
476 {0x0903, 0x0939, L},
\r
477 {0x093c, 0x093c, NSM},
\r
478 {0x093d, 0x0940, L},
\r
479 {0x0941, 0x0948, NSM},
\r
480 {0x0949, 0x094c, L},
\r
481 {0x094d, 0x094d, NSM},
\r
482 {0x0950, 0x0950, L},
\r
483 {0x0951, 0x0954, NSM},
\r
484 {0x0958, 0x0961, L},
\r
485 {0x0962, 0x0963, NSM},
\r
486 {0x0964, 0x0970, L},
\r
487 {0x0981, 0x0981, NSM},
\r
488 {0x0982, 0x0983, L},
\r
489 {0x0985, 0x098c, L},
\r
490 {0x098f, 0x0990, L},
\r
491 {0x0993, 0x09a8, L},
\r
492 {0x09aa, 0x09b0, L},
\r
493 {0x09b2, 0x09b2, L},
\r
494 {0x09b6, 0x09b9, L},
\r
495 {0x09bc, 0x09bc, NSM},
\r
496 {0x09bd, 0x09c0, L},
\r
497 {0x09c1, 0x09c4, NSM},
\r
498 {0x09c7, 0x09c8, L},
\r
499 {0x09cb, 0x09cc, L},
\r
500 {0x09cd, 0x09cd, NSM},
\r
501 {0x09d7, 0x09d7, L},
\r
502 {0x09dc, 0x09dd, L},
\r
503 {0x09df, 0x09e1, L},
\r
504 {0x09e2, 0x09e3, NSM},
\r
505 {0x09e6, 0x09f1, L},
\r
506 {0x09f2, 0x09f3, ET},
\r
507 {0x09f4, 0x09fa, L},
\r
508 {0x0a01, 0x0a02, NSM},
\r
509 {0x0a03, 0x0a03, L},
\r
510 {0x0a05, 0x0a0a, L},
\r
511 {0x0a0f, 0x0a10, L},
\r
512 {0x0a13, 0x0a28, L},
\r
513 {0x0a2a, 0x0a30, L},
\r
514 {0x0a32, 0x0a33, L},
\r
515 {0x0a35, 0x0a36, L},
\r
516 {0x0a38, 0x0a39, L},
\r
517 {0x0a3c, 0x0a3c, NSM},
\r
518 {0x0a3e, 0x0a40, L},
\r
519 {0x0a41, 0x0a42, NSM},
\r
520 {0x0a47, 0x0a48, NSM},
\r
521 {0x0a4b, 0x0a4d, NSM},
\r
522 {0x0a59, 0x0a5c, L},
\r
523 {0x0a5e, 0x0a5e, L},
\r
524 {0x0a66, 0x0a6f, L},
\r
525 {0x0a70, 0x0a71, NSM},
\r
526 {0x0a72, 0x0a74, L},
\r
527 {0x0a81, 0x0a82, NSM},
\r
528 {0x0a83, 0x0a83, L},
\r
529 {0x0a85, 0x0a8d, L},
\r
530 {0x0a8f, 0x0a91, L},
\r
531 {0x0a93, 0x0aa8, L},
\r
532 {0x0aaa, 0x0ab0, L},
\r
533 {0x0ab2, 0x0ab3, L},
\r
534 {0x0ab5, 0x0ab9, L},
\r
535 {0x0abc, 0x0abc, NSM},
\r
536 {0x0abd, 0x0ac0, L},
\r
537 {0x0ac1, 0x0ac5, NSM},
\r
538 {0x0ac7, 0x0ac8, NSM},
\r
539 {0x0ac9, 0x0ac9, L},
\r
540 {0x0acb, 0x0acc, L},
\r
541 {0x0acd, 0x0acd, NSM},
\r
542 {0x0ad0, 0x0ad0, L},
\r
543 {0x0ae0, 0x0ae1, L},
\r
544 {0x0ae2, 0x0ae3, NSM},
\r
545 {0x0ae6, 0x0aef, L},
\r
546 {0x0af1, 0x0af1, ET},
\r
547 {0x0b01, 0x0b01, NSM},
\r
548 {0x0b02, 0x0b03, L},
\r
549 {0x0b05, 0x0b0c, L},
\r
550 {0x0b0f, 0x0b10, L},
\r
551 {0x0b13, 0x0b28, L},
\r
552 {0x0b2a, 0x0b30, L},
\r
553 {0x0b32, 0x0b33, L},
\r
554 {0x0b35, 0x0b39, L},
\r
555 {0x0b3c, 0x0b3c, NSM},
\r
556 {0x0b3d, 0x0b3e, L},
\r
557 {0x0b3f, 0x0b3f, NSM},
\r
558 {0x0b40, 0x0b40, L},
\r
559 {0x0b41, 0x0b43, NSM},
\r
560 {0x0b47, 0x0b48, L},
\r
561 {0x0b4b, 0x0b4c, L},
\r
562 {0x0b4d, 0x0b4d, NSM},
\r
563 {0x0b56, 0x0b56, NSM},
\r
564 {0x0b57, 0x0b57, L},
\r
565 {0x0b5c, 0x0b5d, L},
\r
566 {0x0b5f, 0x0b61, L},
\r
567 {0x0b66, 0x0b71, L},
\r
568 {0x0b82, 0x0b82, NSM},
\r
569 {0x0b83, 0x0b83, L},
\r
570 {0x0b85, 0x0b8a, L},
\r
571 {0x0b8e, 0x0b90, L},
\r
572 {0x0b92, 0x0b95, L},
\r
573 {0x0b99, 0x0b9a, L},
\r
574 {0x0b9c, 0x0b9c, L},
\r
575 {0x0b9e, 0x0b9f, L},
\r
576 {0x0ba3, 0x0ba4, L},
\r
577 {0x0ba8, 0x0baa, L},
\r
578 {0x0bae, 0x0bb5, L},
\r
579 {0x0bb7, 0x0bb9, L},
\r
580 {0x0bbe, 0x0bbf, L},
\r
581 {0x0bc0, 0x0bc0, NSM},
\r
582 {0x0bc1, 0x0bc2, L},
\r
583 {0x0bc6, 0x0bc8, L},
\r
584 {0x0bca, 0x0bcc, L},
\r
585 {0x0bcd, 0x0bcd, NSM},
\r
586 {0x0bd7, 0x0bd7, L},
\r
587 {0x0be7, 0x0bf2, L},
\r
588 {0x0bf9, 0x0bf9, ET},
\r
589 {0x0c01, 0x0c03, L},
\r
590 {0x0c05, 0x0c0c, L},
\r
591 {0x0c0e, 0x0c10, L},
\r
592 {0x0c12, 0x0c28, L},
\r
593 {0x0c2a, 0x0c33, L},
\r
594 {0x0c35, 0x0c39, L},
\r
595 {0x0c3e, 0x0c40, NSM},
\r
596 {0x0c41, 0x0c44, L},
\r
597 {0x0c46, 0x0c48, NSM},
\r
598 {0x0c4a, 0x0c4d, NSM},
\r
599 {0x0c55, 0x0c56, NSM},
\r
600 {0x0c60, 0x0c61, L},
\r
601 {0x0c66, 0x0c6f, L},
\r
602 {0x0c82, 0x0c83, L},
\r
603 {0x0c85, 0x0c8c, L},
\r
604 {0x0c8e, 0x0c90, L},
\r
605 {0x0c92, 0x0ca8, L},
\r
606 {0x0caa, 0x0cb3, L},
\r
607 {0x0cb5, 0x0cb9, L},
\r
608 {0x0cbc, 0x0cbc, NSM},
\r
609 {0x0cbd, 0x0cc4, L},
\r
610 {0x0cc6, 0x0cc8, L},
\r
611 {0x0cca, 0x0ccb, L},
\r
612 {0x0ccc, 0x0ccd, NSM},
\r
613 {0x0cd5, 0x0cd6, L},
\r
614 {0x0cde, 0x0cde, L},
\r
615 {0x0ce0, 0x0ce1, L},
\r
616 {0x0ce6, 0x0cef, L},
\r
617 {0x0d02, 0x0d03, L},
\r
618 {0x0d05, 0x0d0c, L},
\r
619 {0x0d0e, 0x0d10, L},
\r
620 {0x0d12, 0x0d28, L},
\r
621 {0x0d2a, 0x0d39, L},
\r
622 {0x0d3e, 0x0d40, L},
\r
623 {0x0d41, 0x0d43, NSM},
\r
624 {0x0d46, 0x0d48, L},
\r
625 {0x0d4a, 0x0d4c, L},
\r
626 {0x0d4d, 0x0d4d, NSM},
\r
627 {0x0d57, 0x0d57, L},
\r
628 {0x0d60, 0x0d61, L},
\r
629 {0x0d66, 0x0d6f, L},
\r
630 {0x0d82, 0x0d83, L},
\r
631 {0x0d85, 0x0d96, L},
\r
632 {0x0d9a, 0x0db1, L},
\r
633 {0x0db3, 0x0dbb, L},
\r
634 {0x0dbd, 0x0dbd, L},
\r
635 {0x0dc0, 0x0dc6, L},
\r
636 {0x0dca, 0x0dca, NSM},
\r
637 {0x0dcf, 0x0dd1, L},
\r
638 {0x0dd2, 0x0dd4, NSM},
\r
639 {0x0dd6, 0x0dd6, NSM},
\r
640 {0x0dd8, 0x0ddf, L},
\r
641 {0x0df2, 0x0df4, L},
\r
642 {0x0e01, 0x0e30, L},
\r
643 {0x0e31, 0x0e31, NSM},
\r
644 {0x0e32, 0x0e33, L},
\r
645 {0x0e34, 0x0e3a, NSM},
\r
646 {0x0e3f, 0x0e3f, ET},
\r
647 {0x0e40, 0x0e46, L},
\r
648 {0x0e47, 0x0e4e, NSM},
\r
649 {0x0e4f, 0x0e5b, L},
\r
650 {0x0e81, 0x0e82, L},
\r
651 {0x0e84, 0x0e84, L},
\r
652 {0x0e87, 0x0e88, L},
\r
653 {0x0e8a, 0x0e8a, L},
\r
654 {0x0e8d, 0x0e8d, L},
\r
655 {0x0e94, 0x0e97, L},
\r
656 {0x0e99, 0x0e9f, L},
\r
657 {0x0ea1, 0x0ea3, L},
\r
658 {0x0ea5, 0x0ea5, L},
\r
659 {0x0ea7, 0x0ea7, L},
\r
660 {0x0eaa, 0x0eab, L},
\r
661 {0x0ead, 0x0eb0, L},
\r
662 {0x0eb1, 0x0eb1, NSM},
\r
663 {0x0eb2, 0x0eb3, L},
\r
664 {0x0eb4, 0x0eb9, NSM},
\r
665 {0x0ebb, 0x0ebc, NSM},
\r
666 {0x0ebd, 0x0ebd, L},
\r
667 {0x0ec0, 0x0ec4, L},
\r
668 {0x0ec6, 0x0ec6, L},
\r
669 {0x0ec8, 0x0ecd, NSM},
\r
670 {0x0ed0, 0x0ed9, L},
\r
671 {0x0edc, 0x0edd, L},
\r
672 {0x0f00, 0x0f17, L},
\r
673 {0x0f18, 0x0f19, NSM},
\r
674 {0x0f1a, 0x0f34, L},
\r
675 {0x0f35, 0x0f35, NSM},
\r
676 {0x0f36, 0x0f36, L},
\r
677 {0x0f37, 0x0f37, NSM},
\r
678 {0x0f38, 0x0f38, L},
\r
679 {0x0f39, 0x0f39, NSM},
\r
680 {0x0f3e, 0x0f47, L},
\r
681 {0x0f49, 0x0f6a, L},
\r
682 {0x0f71, 0x0f7e, NSM},
\r
683 {0x0f7f, 0x0f7f, L},
\r
684 {0x0f80, 0x0f84, NSM},
\r
685 {0x0f85, 0x0f85, L},
\r
686 {0x0f86, 0x0f87, NSM},
\r
687 {0x0f88, 0x0f8b, L},
\r
688 {0x0f90, 0x0f97, NSM},
\r
689 {0x0f99, 0x0fbc, NSM},
\r
690 {0x0fbe, 0x0fc5, L},
\r
691 {0x0fc6, 0x0fc6, NSM},
\r
692 {0x0fc7, 0x0fcc, L},
\r
693 {0x0fcf, 0x0fcf, L},
\r
694 {0x1000, 0x1021, L},
\r
695 {0x1023, 0x1027, L},
\r
696 {0x1029, 0x102a, L},
\r
697 {0x102c, 0x102c, L},
\r
698 {0x102d, 0x1030, NSM},
\r
699 {0x1031, 0x1031, L},
\r
700 {0x1032, 0x1032, NSM},
\r
701 {0x1036, 0x1037, NSM},
\r
702 {0x1038, 0x1038, L},
\r
703 {0x1039, 0x1039, NSM},
\r
704 {0x1040, 0x1057, L},
\r
705 {0x1058, 0x1059, NSM},
\r
706 {0x10a0, 0x10c5, L},
\r
707 {0x10d0, 0x10f8, L},
\r
708 {0x10fb, 0x10fb, L},
\r
709 {0x1100, 0x1159, L},
\r
710 {0x115f, 0x11a2, L},
\r
711 {0x11a8, 0x11f9, L},
\r
712 {0x1200, 0x1206, L},
\r
713 {0x1208, 0x1246, L},
\r
714 {0x1248, 0x1248, L},
\r
715 {0x124a, 0x124d, L},
\r
716 {0x1250, 0x1256, L},
\r
717 {0x1258, 0x1258, L},
\r
718 {0x125a, 0x125d, L},
\r
719 {0x1260, 0x1286, L},
\r
720 {0x1288, 0x1288, L},
\r
721 {0x128a, 0x128d, L},
\r
722 {0x1290, 0x12ae, L},
\r
723 {0x12b0, 0x12b0, L},
\r
724 {0x12b2, 0x12b5, L},
\r
725 {0x12b8, 0x12be, L},
\r
726 {0x12c0, 0x12c0, L},
\r
727 {0x12c2, 0x12c5, L},
\r
728 {0x12c8, 0x12ce, L},
\r
729 {0x12d0, 0x12d6, L},
\r
730 {0x12d8, 0x12ee, L},
\r
731 {0x12f0, 0x130e, L},
\r
732 {0x1310, 0x1310, L},
\r
733 {0x1312, 0x1315, L},
\r
734 {0x1318, 0x131e, L},
\r
735 {0x1320, 0x1346, L},
\r
736 {0x1348, 0x135a, L},
\r
737 {0x1361, 0x137c, L},
\r
738 {0x13a0, 0x13f4, L},
\r
739 {0x1401, 0x1676, L},
\r
740 {0x1680, 0x1680, WS},
\r
741 {0x1681, 0x169a, L},
\r
742 {0x16a0, 0x16f0, L},
\r
743 {0x1700, 0x170c, L},
\r
744 {0x170e, 0x1711, L},
\r
745 {0x1712, 0x1714, NSM},
\r
746 {0x1720, 0x1731, L},
\r
747 {0x1732, 0x1734, NSM},
\r
748 {0x1735, 0x1736, L},
\r
749 {0x1740, 0x1751, L},
\r
750 {0x1752, 0x1753, NSM},
\r
751 {0x1760, 0x176c, L},
\r
752 {0x176e, 0x1770, L},
\r
753 {0x1772, 0x1773, NSM},
\r
754 {0x1780, 0x17b6, L},
\r
755 {0x17b7, 0x17bd, NSM},
\r
756 {0x17be, 0x17c5, L},
\r
757 {0x17c6, 0x17c6, NSM},
\r
758 {0x17c7, 0x17c8, L},
\r
759 {0x17c9, 0x17d3, NSM},
\r
760 {0x17d4, 0x17da, L},
\r
761 {0x17db, 0x17db, ET},
\r
762 {0x17dc, 0x17dc, L},
\r
763 {0x17dd, 0x17dd, NSM},
\r
764 {0x17e0, 0x17e9, L},
\r
765 {0x180b, 0x180d, NSM},
\r
766 {0x180e, 0x180e, WS},
\r
767 {0x1810, 0x1819, L},
\r
768 {0x1820, 0x1877, L},
\r
769 {0x1880, 0x18a8, L},
\r
770 {0x18a9, 0x18a9, NSM},
\r
771 {0x1900, 0x191c, L},
\r
772 {0x1920, 0x1922, NSM},
\r
773 {0x1923, 0x1926, L},
\r
774 {0x1927, 0x192b, NSM},
\r
775 {0x1930, 0x1931, L},
\r
776 {0x1932, 0x1932, NSM},
\r
777 {0x1933, 0x1938, L},
\r
778 {0x1939, 0x193b, NSM},
\r
779 {0x1946, 0x196d, L},
\r
780 {0x1970, 0x1974, L},
\r
781 {0x1d00, 0x1d6b, L},
\r
782 {0x1e00, 0x1e9b, L},
\r
783 {0x1ea0, 0x1ef9, L},
\r
784 {0x1f00, 0x1f15, L},
\r
785 {0x1f18, 0x1f1d, L},
\r
786 {0x1f20, 0x1f45, L},
\r
787 {0x1f48, 0x1f4d, L},
\r
788 {0x1f50, 0x1f57, L},
\r
789 {0x1f59, 0x1f59, L},
\r
790 {0x1f5b, 0x1f5b, L},
\r
791 {0x1f5d, 0x1f5d, L},
\r
792 {0x1f5f, 0x1f7d, L},
\r
793 {0x1f80, 0x1fb4, L},
\r
794 {0x1fb6, 0x1fbc, L},
\r
795 {0x1fbe, 0x1fbe, L},
\r
796 {0x1fc2, 0x1fc4, L},
\r
797 {0x1fc6, 0x1fcc, L},
\r
798 {0x1fd0, 0x1fd3, L},
\r
799 {0x1fd6, 0x1fdb, L},
\r
800 {0x1fe0, 0x1fec, L},
\r
801 {0x1ff2, 0x1ff4, L},
\r
802 {0x1ff6, 0x1ffc, L},
\r
803 {0x2000, 0x200a, WS},
\r
804 {0x200b, 0x200d, BN},
\r
805 {0x200e, 0x200e, L},
\r
806 {0x200f, 0x200f, R},
\r
807 {0x2028, 0x2028, WS},
\r
808 {0x2029, 0x2029, B},
\r
809 {0x202a, 0x202a, LRE},
\r
810 {0x202b, 0x202b, RLE},
\r
811 {0x202c, 0x202c, PDF},
\r
812 {0x202d, 0x202d, LRO},
\r
813 {0x202e, 0x202e, RLO},
\r
814 {0x202f, 0x202f, WS},
\r
815 {0x2030, 0x2034, ET},
\r
816 {0x2044, 0x2044, CS},
\r
817 {0x205f, 0x205f, WS},
\r
818 {0x2060, 0x2063, BN},
\r
819 {0x206a, 0x206f, BN},
\r
820 {0x2070, 0x2070, EN},
\r
821 {0x2071, 0x2071, L},
\r
822 {0x2074, 0x2079, EN},
\r
823 {0x207a, 0x207b, ET},
\r
824 {0x207f, 0x207f, L},
\r
825 {0x2080, 0x2089, EN},
\r
826 {0x208a, 0x208b, ET},
\r
827 {0x20a0, 0x20b1, ET},
\r
828 {0x20d0, 0x20ea, NSM},
\r
829 {0x2102, 0x2102, L},
\r
830 {0x2107, 0x2107, L},
\r
831 {0x210a, 0x2113, L},
\r
832 {0x2115, 0x2115, L},
\r
833 {0x2119, 0x211d, L},
\r
834 {0x2124, 0x2124, L},
\r
835 {0x2126, 0x2126, L},
\r
836 {0x2128, 0x2128, L},
\r
837 {0x212a, 0x212d, L},
\r
838 {0x212e, 0x212e, ET},
\r
839 {0x212f, 0x2131, L},
\r
840 {0x2133, 0x2139, L},
\r
841 {0x213d, 0x213f, L},
\r
842 {0x2145, 0x2149, L},
\r
843 {0x2160, 0x2183, L},
\r
844 {0x2212, 0x2213, ET},
\r
845 {0x2336, 0x237a, L},
\r
846 {0x2395, 0x2395, L},
\r
847 {0x2488, 0x249b, EN},
\r
848 {0x249c, 0x24e9, L},
\r
849 {0x2800, 0x28ff, L},
\r
850 {0x3000, 0x3000, WS},
\r
851 {0x3005, 0x3007, L},
\r
852 {0x3021, 0x3029, L},
\r
853 {0x302a, 0x302f, NSM},
\r
854 {0x3031, 0x3035, L},
\r
855 {0x3038, 0x303c, L},
\r
856 {0x3041, 0x3096, L},
\r
857 {0x3099, 0x309a, NSM},
\r
858 {0x309d, 0x309f, L},
\r
859 {0x30a1, 0x30fa, L},
\r
860 {0x30fc, 0x30ff, L},
\r
861 {0x3105, 0x312c, L},
\r
862 {0x3131, 0x318e, L},
\r
863 {0x3190, 0x31b7, L},
\r
864 {0x31f0, 0x321c, L},
\r
865 {0x3220, 0x3243, L},
\r
866 {0x3260, 0x327b, L},
\r
867 {0x327f, 0x32b0, L},
\r
868 {0x32c0, 0x32cb, L},
\r
869 {0x32d0, 0x32fe, L},
\r
870 {0x3300, 0x3376, L},
\r
871 {0x337b, 0x33dd, L},
\r
872 {0x33e0, 0x33fe, L},
\r
873 {0x3400, 0x4db5, L},
\r
874 {0x4e00, 0x9fa5, L},
\r
875 {0xa000, 0xa48c, L},
\r
876 {0xac00, 0xd7a3, L},
\r
877 {0xd800, 0xfa2d, L},
\r
878 {0xfa30, 0xfa6a, L},
\r
879 {0xfb00, 0xfb06, L},
\r
880 {0xfb13, 0xfb17, L},
\r
881 {0xfb1d, 0xfb1d, R},
\r
882 {0xfb1e, 0xfb1e, NSM},
\r
883 {0xfb1f, 0xfb28, R},
\r
884 {0xfb29, 0xfb29, ET},
\r
885 {0xfb2a, 0xfb36, R},
\r
886 {0xfb38, 0xfb3c, R},
\r
887 {0xfb3e, 0xfb3e, R},
\r
888 {0xfb40, 0xfb41, R},
\r
889 {0xfb43, 0xfb44, R},
\r
890 {0xfb46, 0xfb4f, R},
\r
891 {0xfb50, 0xfbb1, AL},
\r
892 {0xfbd3, 0xfd3d, AL},
\r
893 {0xfd50, 0xfd8f, AL},
\r
894 {0xfd92, 0xfdc7, AL},
\r
895 {0xfdf0, 0xfdfc, AL},
\r
896 {0xfe00, 0xfe0f, NSM},
\r
897 {0xfe20, 0xfe23, NSM},
\r
898 {0xfe50, 0xfe50, CS},
\r
899 {0xfe52, 0xfe52, CS},
\r
900 {0xfe55, 0xfe55, CS},
\r
901 {0xfe5f, 0xfe5f, ET},
\r
902 {0xfe62, 0xfe63, ET},
\r
903 {0xfe69, 0xfe6a, ET},
\r
904 {0xfe70, 0xfe74, AL},
\r
905 {0xfe76, 0xfefc, AL},
\r
906 {0xfeff, 0xfeff, BN},
\r
907 {0xff03, 0xff05, ET},
\r
908 {0xff0b, 0xff0b, ET},
\r
909 {0xff0c, 0xff0c, CS},
\r
910 {0xff0d, 0xff0d, ET},
\r
911 {0xff0e, 0xff0e, CS},
\r
912 {0xff0f, 0xff0f, ES},
\r
913 {0xff10, 0xff19, EN},
\r
914 {0xff1a, 0xff1a, CS},
\r
915 {0xff21, 0xff3a, L},
\r
916 {0xff41, 0xff5a, L},
\r
917 {0xff66, 0xffbe, L},
\r
918 {0xffc2, 0xffc7, L},
\r
919 {0xffca, 0xffcf, L},
\r
920 {0xffd2, 0xffd7, L},
\r
921 {0xffda, 0xffdc, L},
\r
922 {0xffe0, 0xffe1, ET},
\r
923 {0xffe5, 0xffe6, ET},
\r
924 {0x10000, 0x1000b, L},
\r
925 {0x1000d, 0x10026, L},
\r
926 {0x10028, 0x1003a, L},
\r
927 {0x1003c, 0x1003d, L},
\r
928 {0x1003f, 0x1004d, L},
\r
929 {0x10050, 0x1005d, L},
\r
930 {0x10080, 0x100fa, L},
\r
931 {0x10100, 0x10100, L},
\r
932 {0x10102, 0x10102, L},
\r
933 {0x10107, 0x10133, L},
\r
934 {0x10137, 0x1013f, L},
\r
935 {0x10300, 0x1031e, L},
\r
936 {0x10320, 0x10323, L},
\r
937 {0x10330, 0x1034a, L},
\r
938 {0x10380, 0x1039d, L},
\r
939 {0x1039f, 0x1039f, L},
\r
940 {0x10400, 0x1049d, L},
\r
941 {0x104a0, 0x104a9, L},
\r
942 {0x10800, 0x10805, R},
\r
943 {0x10808, 0x10808, R},
\r
944 {0x1080a, 0x10835, R},
\r
945 {0x10837, 0x10838, R},
\r
946 {0x1083c, 0x1083c, R},
\r
947 {0x1083f, 0x1083f, R},
\r
948 {0x1d000, 0x1d0f5, L},
\r
949 {0x1d100, 0x1d126, L},
\r
950 {0x1d12a, 0x1d166, L},
\r
951 {0x1d167, 0x1d169, NSM},
\r
952 {0x1d16a, 0x1d172, L},
\r
953 {0x1d173, 0x1d17a, BN},
\r
954 {0x1d17b, 0x1d182, NSM},
\r
955 {0x1d183, 0x1d184, L},
\r
956 {0x1d185, 0x1d18b, NSM},
\r
957 {0x1d18c, 0x1d1a9, L},
\r
958 {0x1d1aa, 0x1d1ad, NSM},
\r
959 {0x1d1ae, 0x1d1dd, L},
\r
960 {0x1d400, 0x1d454, L},
\r
961 {0x1d456, 0x1d49c, L},
\r
962 {0x1d49e, 0x1d49f, L},
\r
963 {0x1d4a2, 0x1d4a2, L},
\r
964 {0x1d4a5, 0x1d4a6, L},
\r
965 {0x1d4a9, 0x1d4ac, L},
\r
966 {0x1d4ae, 0x1d4b9, L},
\r
967 {0x1d4bb, 0x1d4bb, L},
\r
968 {0x1d4bd, 0x1d4c3, L},
\r
969 {0x1d4c5, 0x1d505, L},
\r
970 {0x1d507, 0x1d50a, L},
\r
971 {0x1d50d, 0x1d514, L},
\r
972 {0x1d516, 0x1d51c, L},
\r
973 {0x1d51e, 0x1d539, L},
\r
974 {0x1d53b, 0x1d53e, L},
\r
975 {0x1d540, 0x1d544, L},
\r
976 {0x1d546, 0x1d546, L},
\r
977 {0x1d54a, 0x1d550, L},
\r
978 {0x1d552, 0x1d6a3, L},
\r
979 {0x1d6a8, 0x1d7c9, L},
\r
980 {0x1d7ce, 0x1d7ff, EN},
\r
981 {0x20000, 0x2a6d6, L},
\r
982 {0x2f800, 0x2fa1d, L},
\r
983 {0xe0001, 0xe0001, BN},
\r
984 {0xe0020, 0xe007f, BN},
\r
985 {0xe0100, 0xe01ef, NSM},
\r
986 {0xf0000, 0xffffd, L},
\r
987 {0x100000, 0x10fffd, L}
\r
995 while (j - i > 1) {
\r
997 if (ch < lookup[k].first)
\r
999 else if (ch > lookup[k].last)
\r
1002 return lookup[k].type;
\r
1006 * If we reach here, the character was not in any of the
\r
1007 * intervals listed in the lookup table. This means we return
\r
1008 * ON (`Other Neutrals'). This is the appropriate code for any
\r
1009 * character genuinely not listed in the Unicode table, and
\r
1010 * also the table above has deliberately left out any
\r
1011 * characters _explicitly_ listed as ON (to save space!).
\r
1017 * Function exported to front ends to allow them to identify
\r
1018 * bidi-active characters (in case, for example, the platform's
\r
1019 * text display function can't conveniently be prevented from doing
\r
1020 * its own bidi and so special treatment is required for characters
\r
1021 * that would cause the bidi algorithm to activate).
\r
1023 * This function is passed a single Unicode code point, and returns
\r
1024 * nonzero if the presence of this code point can possibly cause
\r
1025 * the bidi algorithm to do any reordering. Thus, any string
\r
1026 * composed entirely of characters for which is_rtl() returns zero
\r
1027 * should be safe to pass to a bidi-active platform display
\r
1028 * function without fear.
\r
1030 * (is_rtl() must therefore also return true for any character
\r
1031 * which would be affected by Arabic shaping, but this isn't
\r
1032 * important because all such characters are right-to-left so it
\r
1033 * would have flagged them anyway.)
\r
1038 * After careful reading of the Unicode bidi algorithm (URL as
\r
1039 * given at the top of this file) I believe that the only
\r
1040 * character classes which can possibly cause trouble are R,
\r
1041 * AL, RLE and RLO. I think that any string containing no
\r
1042 * character in any of those classes will be displayed
\r
1043 * uniformly left-to-right by the Unicode bidi algorithm.
\r
1045 const int mask = (1<<R) | (1<<AL) | (1<<RLE) | (1<<RLO);
\r
1047 return mask & (1 << (getType(c)));
\r
1051 * The most significant 2 bits of each level are used to store
\r
1052 * Override status of each character
\r
1053 * This function sets the override bits of level according
\r
1054 * to the value in override, and reurns the new byte.
\r
1056 unsigned char setOverrideBits(unsigned char level, unsigned char override)
\r
1058 if (override == ON)
\r
1060 else if (override == R)
\r
1061 return level | OISR;
\r
1062 else if (override == L)
\r
1063 return level | OISL;
\r
1068 * Find the most recent run of the same value in `level', and
\r
1069 * return the value _before_ it. Used to process U+202C POP
\r
1070 * DIRECTIONAL FORMATTING.
\r
1072 int getPreviousLevel(unsigned char* level, int from)
\r
1075 unsigned char current = level[--from];
\r
1077 while (from >= 0 && level[from] == current)
\r
1081 return level[from];
\r
1088 /* The Main shaping function, and the only one to be used
\r
1089 * by the outside world.
\r
1091 * line: buffer to apply shaping to. this must be passed by doBidi() first
\r
1092 * to: output buffer for the shaped data
\r
1093 * count: number of characters in line
\r
1095 int do_shape(bidi_char *line, bidi_char *to, int count)
\r
1097 int i, tempShape, ligFlag;
\r
1099 for (ligFlag=i=0; i<count; i++) {
\r
1101 tempShape = STYPE(line[i].wc);
\r
1102 switch (tempShape) {
\r
1110 tempShape = (i+1 < count ? STYPE(line[i+1].wc) : SU);
\r
1111 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
\r
1112 to[i].wc = SFINAL((SISOLATED(line[i].wc)));
\r
1114 to[i].wc = SISOLATED(line[i].wc);
\r
1119 /* Make Ligatures */
\r
1120 tempShape = (i+1 < count ? STYPE(line[i+1].wc) : SU);
\r
1121 if (line[i].wc == 0x644) {
\r
1122 if (i > 0) switch (line[i-1].wc) {
\r
1125 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
\r
1126 to[i].wc = 0xFEF6;
\r
1128 to[i].wc = 0xFEF5;
\r
1132 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
\r
1133 to[i].wc = 0xFEF8;
\r
1135 to[i].wc = 0xFEF7;
\r
1139 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
\r
1140 to[i].wc = 0xFEFA;
\r
1142 to[i].wc = 0xFEF9;
\r
1146 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
\r
1147 to[i].wc = 0xFEFC;
\r
1149 to[i].wc = 0xFEFB;
\r
1153 to[i-1].wc = 0x20;
\r
1159 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) {
\r
1160 tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU);
\r
1161 if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC))
\r
1162 to[i].wc = SMEDIAL((SISOLATED(line[i].wc)));
\r
1164 to[i].wc = SFINAL((SISOLATED(line[i].wc)));
\r
1168 tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU);
\r
1169 if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC))
\r
1170 to[i].wc = SINITIAL((SISOLATED(line[i].wc)));
\r
1172 to[i].wc = SISOLATED(line[i].wc);
\r
1182 * The Main Bidi Function, and the only function that should
\r
1183 * be used by the outside world.
\r
1185 * line: a buffer of size count containing text to apply
\r
1186 * the Bidirectional algorithm to.
\r
1189 int do_bidi(bidi_char *line, int count)
\r
1191 unsigned char* types;
\r
1192 unsigned char* levels;
\r
1193 unsigned char paragraphLevel;
\r
1194 unsigned char currentEmbedding;
\r
1195 unsigned char currentOverride;
\r
1196 unsigned char tempType;
\r
1197 int i, j, yes, bover;
\r
1199 /* Check the presence of R or AL types as optimization */
\r
1201 for (i=0; i<count; i++) {
\r
1202 int type = getType(line[i].wc);
\r
1203 if (type == R || type == AL) {
\r
1211 /* Initialize types, levels */
\r
1212 types = snewn(count, unsigned char);
\r
1213 levels = snewn(count, unsigned char);
\r
1215 /* Rule (P1) NOT IMPLEMENTED
\r
1216 * P1. Split the text into separate paragraphs. A paragraph separator is
\r
1217 * kept with the previous paragraph. Within each paragraph, apply all the
\r
1218 * other rules of this algorithm.
\r
1221 /* Rule (P2), (P3)
\r
1222 * P2. In each paragraph, find the first character of type L, AL, or R.
\r
1223 * P3. If a character is found in P2 and it is of type AL or R, then set
\r
1224 * the paragraph embedding level to one; otherwise, set it to zero.
\r
1226 paragraphLevel = 0;
\r
1227 for (i=0; i<count ; i++) {
\r
1228 int type = getType(line[i].wc);
\r
1229 if (type == R || type == AL) {
\r
1230 paragraphLevel = 1;
\r
1232 } else if (type == L)
\r
1237 * X1. Begin by setting the current embedding level to the paragraph
\r
1238 * embedding level. Set the directional override status to neutral.
\r
1240 currentEmbedding = paragraphLevel;
\r
1241 currentOverride = ON;
\r
1243 /* Rule (X2), (X3), (X4), (X5), (X6), (X7), (X8)
\r
1244 * X2. With each RLE, compute the least greater odd embedding level.
\r
1245 * X3. With each LRE, compute the least greater even embedding level.
\r
1246 * X4. With each RLO, compute the least greater odd embedding level.
\r
1247 * X5. With each LRO, compute the least greater even embedding level.
\r
1248 * X6. For all types besides RLE, LRE, RLO, LRO, and PDF:
\r
1249 * a. Set the level of the current character to the current
\r
1250 * embedding level.
\r
1251 * b. Whenever the directional override status is not neutral,
\r
1252 * reset the current character type to the directional
\r
1253 * override status.
\r
1254 * X7. With each PDF, determine the matching embedding or override code.
\r
1255 * If there was a valid matching code, restore (pop) the last
\r
1256 * remembered (pushed) embedding level and directional override.
\r
1257 * X8. All explicit directional embeddings and overrides are completely
\r
1258 * terminated at the end of each paragraph. Paragraph separators are not
\r
1259 * included in the embedding. (Useless here) NOT IMPLEMENTED
\r
1262 for (i=0; i<count; i++) {
\r
1263 tempType = getType(line[i].wc);
\r
1264 switch (tempType) {
\r
1266 currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);
\r
1267 levels[i] = setOverrideBits(levels[i], currentOverride);
\r
1268 currentOverride = ON;
\r
1272 currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);
\r
1273 levels[i] = setOverrideBits(levels[i], currentOverride);
\r
1274 currentOverride = ON;
\r
1278 currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);
\r
1279 tempType = currentOverride = R;
\r
1284 currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);
\r
1285 tempType = currentOverride = L;
\r
1291 int prevlevel = getPreviousLevel(levels, i);
\r
1293 if (prevlevel == -1) {
\r
1294 currentEmbedding = paragraphLevel;
\r
1295 currentOverride = ON;
\r
1297 currentOverride = currentEmbedding & OMASK;
\r
1298 currentEmbedding = currentEmbedding & ~OMASK;
\r
1301 levels[i] = currentEmbedding;
\r
1304 /* Whitespace is treated as neutral for now */
\r
1307 levels[i] = currentEmbedding;
\r
1309 if (currentOverride != ON)
\r
1310 tempType = currentOverride;
\r
1314 levels[i] = currentEmbedding;
\r
1315 if (currentOverride != ON)
\r
1316 tempType = currentOverride;
\r
1320 types[i] = tempType;
\r
1322 /* this clears out all overrides, so we can use levels safely... */
\r
1323 /* checks bover first */
\r
1325 for (i=0; i<count; i++)
\r
1326 levels[i] = levels[i] & LMASK;
\r
1329 * X9. Remove all RLE, LRE, RLO, LRO, PDF, and BN codes.
\r
1330 * Here, they're converted to BN.
\r
1332 for (i=0; i<count; i++) {
\r
1333 switch (types[i]) {
\r
1345 * W1. Examine each non-spacing mark (NSM) in the level run, and change
\r
1346 * the type of the NSM to the type of the previous character. If the NSM
\r
1347 * is at the start of the level run, it will get the type of sor.
\r
1349 if (types[0] == NSM)
\r
1350 types[0] = paragraphLevel;
\r
1352 for (i=1; i<count; i++) {
\r
1353 if (types[i] == NSM)
\r
1354 types[i] = types[i-1];
\r
1355 /* Is this a safe assumption?
\r
1356 * I assumed the previous, IS a character.
\r
1361 * W2. Search backwards from each instance of a European number until the
\r
1362 * first strong type (R, L, AL, or sor) is found. If an AL is found,
\r
1363 * change the type of the European number to Arabic number.
\r
1365 for (i=0; i<count; i++) {
\r
1366 if (types[i] == EN) {
\r
1369 if (types[j] == AL) {
\r
1372 } else if (types[j] == R || types[j] == L) {
\r
1381 * W3. Change all ALs to R.
\r
1383 * Optimization: on Rule Xn, we might set a flag on AL type
\r
1384 * to prevent this loop in L R lines only...
\r
1386 for (i=0; i<count; i++) {
\r
1387 if (types[i] == AL)
\r
1392 * W4. A single European separator between two European numbers changes
\r
1393 * to a European number. A single common separator between two numbers
\r
1394 * of the same type changes to that type.
\r
1396 for (i=1; i<(count-1); i++) {
\r
1397 if (types[i] == ES) {
\r
1398 if (types[i-1] == EN && types[i+1] == EN)
\r
1400 } else if (types[i] == CS) {
\r
1401 if (types[i-1] == EN && types[i+1] == EN)
\r
1403 else if (types[i-1] == AN && types[i+1] == AN)
\r
1409 * W5. A sequence of European terminators adjacent to European numbers
\r
1410 * changes to all European numbers.
\r
1412 * Optimization: lots here... else ifs need rearrangement
\r
1414 for (i=0; i<count; i++) {
\r
1415 if (types[i] == ET) {
\r
1416 if (i > 0 && types[i-1] == EN) {
\r
1419 } else if (i < count-1 && types[i+1] == EN) {
\r
1422 } else if (i < count-1 && types[i+1] == ET) {
\r
1424 while (j <count && types[j] == ET) {
\r
1427 if (types[j] == EN)
\r
1434 * W6. Otherwise, separators and terminators change to Other Neutral:
\r
1436 for (i=0; i<count; i++) {
\r
1437 switch (types[i]) {
\r
1447 * W7. Search backwards from each instance of a European number until
\r
1448 * the first strong type (R, L, or sor) is found. If an L is found,
\r
1449 * then change the type of the European number to L.
\r
1451 for (i=0; i<count; i++) {
\r
1452 if (types[i] == EN) {
\r
1455 if (types[j] == L) {
\r
1458 } else if (types[j] == R || types[j] == AL) {
\r
1467 * N1. A sequence of neutrals takes the direction of the surrounding
\r
1468 * strong text if the text on both sides has the same direction. European
\r
1469 * and Arabic numbers are treated as though they were R.
\r
1471 if (count >= 2 && types[0] == ON) {
\r
1472 if ((types[1] == R) || (types[1] == EN) || (types[1] == AN))
\r
1474 else if (types[1] == L)
\r
1477 for (i=1; i<(count-1); i++) {
\r
1478 if (types[i] == ON) {
\r
1479 if (types[i-1] == L) {
\r
1481 while (j<(count-1) && types[j] == ON) {
\r
1484 if (types[j] == L) {
\r
1491 } else if ((types[i-1] == R) ||
\r
1492 (types[i-1] == EN) ||
\r
1493 (types[i-1] == AN)) {
\r
1495 while (j<(count-1) && types[j] == ON) {
\r
1498 if ((types[j] == R) ||
\r
1499 (types[j] == EN) ||
\r
1500 (types[j] == AN)) {
\r
1509 if (count >= 2 && types[count-1] == ON) {
\r
1510 if (types[count-2] == R || types[count-2] == EN || types[count-2] == AN)
\r
1511 types[count-1] = R;
\r
1512 else if (types[count-2] == L)
\r
1513 types[count-1] = L;
\r
1517 * N2. Any remaining neutrals take the embedding direction.
\r
1519 for (i=0; i<count; i++) {
\r
1520 if (types[i] == ON) {
\r
1521 if ((levels[i] % 2) == 0)
\r
1529 * I1. For all characters with an even (left-to-right) embedding
\r
1530 * direction, those of type R go up one level and those of type AN or
\r
1531 * EN go up two levels.
\r
1533 for (i=0; i<count; i++) {
\r
1534 if ((levels[i] % 2) == 0) {
\r
1535 if (types[i] == R)
\r
1537 else if (types[i] == AN || types[i] == EN)
\r
1543 * I2. For all characters with an odd (right-to-left) embedding direction,
\r
1544 * those of type L, EN or AN go up one level.
\r
1546 for (i=0; i<count; i++) {
\r
1547 if ((levels[i] % 2) == 1) {
\r
1548 if (types[i] == L || types[i] == EN || types[i] == AN)
\r
1554 * L1. On each line, reset the embedding level of the following characters
\r
1555 * to the paragraph embedding level:
\r
1556 * (1)segment separators, (2)paragraph separators,
\r
1557 * (3)any sequence of whitespace characters preceding
\r
1558 * a segment separator or paragraph separator,
\r
1559 * (4)and any sequence of white space characters
\r
1560 * at the end of the line.
\r
1561 * The types of characters used here are the original types, not those
\r
1562 * modified by the previous phase.
\r
1565 while (j>0 && (getType(line[j].wc) == WS)) {
\r
1568 if (j < (count-1)) {
\r
1569 for (j++; j<count; j++)
\r
1570 levels[j] = paragraphLevel;
\r
1572 for (i=0; i<count; i++) {
\r
1573 tempType = getType(line[i].wc);
\r
1574 if (tempType == WS) {
\r
1576 while (j<count && (getType(line[j].wc) == WS)) {
\r
1579 if (j==count || getType(line[j].wc) == B ||
\r
1580 getType(line[j].wc) == S) {
\r
1581 for (j--; j>=i ; j--) {
\r
1582 levels[j] = paragraphLevel;
\r
1585 } else if (tempType == B || tempType == S) {
\r
1586 levels[i] = paragraphLevel;
\r
1590 /* Rule (L4) NOT IMPLEMENTED
\r
1591 * L4. A character that possesses the mirrored property as specified by
\r
1592 * Section 4.7, Mirrored, must be depicted by a mirrored glyph if the
\r
1593 * resolved directionality of that character is R.
\r
1595 /* Note: this is implemented before L2 for efficiency */
\r
1596 for (i=0; i<count; i++)
\r
1597 if ((levels[i] % 2) == 1)
\r
1598 doMirror(&line[i].wc);
\r
1601 * L2. From the highest level found in the text to the lowest odd level on
\r
1602 * each line, including intermediate levels not actually present in the
\r
1603 * text, reverse any contiguous sequence of characters that are at that
\r
1606 /* we flip the character string and leave the level array */
\r
1608 tempType = levels[0];
\r
1609 while (i < count) {
\r
1610 if (levels[i] > tempType)
\r
1611 tempType = levels[i];
\r
1614 /* maximum level in tempType. */
\r
1615 while (tempType > 0) { /* loop from highest level to the least odd, */
\r
1616 /* which i assume is 1 */
\r
1617 flipThisRun(line, levels, tempType, count);
\r
1621 /* Rule (L3) NOT IMPLEMENTED
\r
1622 * L3. Combining marks applied to a right-to-left base character will at
\r
1623 * this point precede their base character. If the rendering engine
\r
1624 * expects them to follow the base characters in the final display
\r
1625 * process, then the ordering of the marks and the base character must
\r
1635 * Bad, Horrible function
\r
1636 * takes a pointer to a character that is checked for
\r
1637 * having a mirror glyph.
\r
1639 void doMirror(wchar_t* ch)
\r
1641 if ((*ch & 0xFF00) == 0) {
\r
1643 case 0x0028: *ch = 0x0029; break;
\r
1644 case 0x0029: *ch = 0x0028; break;
\r
1645 case 0x003C: *ch = 0x003E; break;
\r
1646 case 0x003E: *ch = 0x003C; break;
\r
1647 case 0x005B: *ch = 0x005D; break;
\r
1648 case 0x005D: *ch = 0x005B; break;
\r
1649 case 0x007B: *ch = 0x007D; break;
\r
1650 case 0x007D: *ch = 0x007B; break;
\r
1651 case 0x00AB: *ch = 0x00BB; break;
\r
1652 case 0x00BB: *ch = 0x00AB; break;
\r
1654 } else if ((*ch & 0xFF00) == 0x2000) {
\r
1656 case 0x2039: *ch = 0x203A; break;
\r
1657 case 0x203A: *ch = 0x2039; break;
\r
1658 case 0x2045: *ch = 0x2046; break;
\r
1659 case 0x2046: *ch = 0x2045; break;
\r
1660 case 0x207D: *ch = 0x207E; break;
\r
1661 case 0x207E: *ch = 0x207D; break;
\r
1662 case 0x208D: *ch = 0x208E; break;
\r
1663 case 0x208E: *ch = 0x208D; break;
\r
1665 } else if ((*ch & 0xFF00) == 0x2200) {
\r
1667 case 0x2208: *ch = 0x220B; break;
\r
1668 case 0x2209: *ch = 0x220C; break;
\r
1669 case 0x220A: *ch = 0x220D; break;
\r
1670 case 0x220B: *ch = 0x2208; break;
\r
1671 case 0x220C: *ch = 0x2209; break;
\r
1672 case 0x220D: *ch = 0x220A; break;
\r
1673 case 0x2215: *ch = 0x29F5; break;
\r
1674 case 0x223C: *ch = 0x223D; break;
\r
1675 case 0x223D: *ch = 0x223C; break;
\r
1676 case 0x2243: *ch = 0x22CD; break;
\r
1677 case 0x2252: *ch = 0x2253; break;
\r
1678 case 0x2253: *ch = 0x2252; break;
\r
1679 case 0x2254: *ch = 0x2255; break;
\r
1680 case 0x2255: *ch = 0x2254; break;
\r
1681 case 0x2264: *ch = 0x2265; break;
\r
1682 case 0x2265: *ch = 0x2264; break;
\r
1683 case 0x2266: *ch = 0x2267; break;
\r
1684 case 0x2267: *ch = 0x2266; break;
\r
1685 case 0x2268: *ch = 0x2269; break;
\r
1686 case 0x2269: *ch = 0x2268; break;
\r
1687 case 0x226A: *ch = 0x226B; break;
\r
1688 case 0x226B: *ch = 0x226A; break;
\r
1689 case 0x226E: *ch = 0x226F; break;
\r
1690 case 0x226F: *ch = 0x226E; break;
\r
1691 case 0x2270: *ch = 0x2271; break;
\r
1692 case 0x2271: *ch = 0x2270; break;
\r
1693 case 0x2272: *ch = 0x2273; break;
\r
1694 case 0x2273: *ch = 0x2272; break;
\r
1695 case 0x2274: *ch = 0x2275; break;
\r
1696 case 0x2275: *ch = 0x2274; break;
\r
1697 case 0x2276: *ch = 0x2277; break;
\r
1698 case 0x2277: *ch = 0x2276; break;
\r
1699 case 0x2278: *ch = 0x2279; break;
\r
1700 case 0x2279: *ch = 0x2278; break;
\r
1701 case 0x227A: *ch = 0x227B; break;
\r
1702 case 0x227B: *ch = 0x227A; break;
\r
1703 case 0x227C: *ch = 0x227D; break;
\r
1704 case 0x227D: *ch = 0x227C; break;
\r
1705 case 0x227E: *ch = 0x227F; break;
\r
1706 case 0x227F: *ch = 0x227E; break;
\r
1707 case 0x2280: *ch = 0x2281; break;
\r
1708 case 0x2281: *ch = 0x2280; break;
\r
1709 case 0x2282: *ch = 0x2283; break;
\r
1710 case 0x2283: *ch = 0x2282; break;
\r
1711 case 0x2284: *ch = 0x2285; break;
\r
1712 case 0x2285: *ch = 0x2284; break;
\r
1713 case 0x2286: *ch = 0x2287; break;
\r
1714 case 0x2287: *ch = 0x2286; break;
\r
1715 case 0x2288: *ch = 0x2289; break;
\r
1716 case 0x2289: *ch = 0x2288; break;
\r
1717 case 0x228A: *ch = 0x228B; break;
\r
1718 case 0x228B: *ch = 0x228A; break;
\r
1719 case 0x228F: *ch = 0x2290; break;
\r
1720 case 0x2290: *ch = 0x228F; break;
\r
1721 case 0x2291: *ch = 0x2292; break;
\r
1722 case 0x2292: *ch = 0x2291; break;
\r
1723 case 0x2298: *ch = 0x29B8; break;
\r
1724 case 0x22A2: *ch = 0x22A3; break;
\r
1725 case 0x22A3: *ch = 0x22A2; break;
\r
1726 case 0x22A6: *ch = 0x2ADE; break;
\r
1727 case 0x22A8: *ch = 0x2AE4; break;
\r
1728 case 0x22A9: *ch = 0x2AE3; break;
\r
1729 case 0x22AB: *ch = 0x2AE5; break;
\r
1730 case 0x22B0: *ch = 0x22B1; break;
\r
1731 case 0x22B1: *ch = 0x22B0; break;
\r
1732 case 0x22B2: *ch = 0x22B3; break;
\r
1733 case 0x22B3: *ch = 0x22B2; break;
\r
1734 case 0x22B4: *ch = 0x22B5; break;
\r
1735 case 0x22B5: *ch = 0x22B4; break;
\r
1736 case 0x22B6: *ch = 0x22B7; break;
\r
1737 case 0x22B7: *ch = 0x22B6; break;
\r
1738 case 0x22C9: *ch = 0x22CA; break;
\r
1739 case 0x22CA: *ch = 0x22C9; break;
\r
1740 case 0x22CB: *ch = 0x22CC; break;
\r
1741 case 0x22CC: *ch = 0x22CB; break;
\r
1742 case 0x22CD: *ch = 0x2243; break;
\r
1743 case 0x22D0: *ch = 0x22D1; break;
\r
1744 case 0x22D1: *ch = 0x22D0; break;
\r
1745 case 0x22D6: *ch = 0x22D7; break;
\r
1746 case 0x22D7: *ch = 0x22D6; break;
\r
1747 case 0x22D8: *ch = 0x22D9; break;
\r
1748 case 0x22D9: *ch = 0x22D8; break;
\r
1749 case 0x22DA: *ch = 0x22DB; break;
\r
1750 case 0x22DB: *ch = 0x22DA; break;
\r
1751 case 0x22DC: *ch = 0x22DD; break;
\r
1752 case 0x22DD: *ch = 0x22DC; break;
\r
1753 case 0x22DE: *ch = 0x22DF; break;
\r
1754 case 0x22DF: *ch = 0x22DE; break;
\r
1755 case 0x22E0: *ch = 0x22E1; break;
\r
1756 case 0x22E1: *ch = 0x22E0; break;
\r
1757 case 0x22E2: *ch = 0x22E3; break;
\r
1758 case 0x22E3: *ch = 0x22E2; break;
\r
1759 case 0x22E4: *ch = 0x22E5; break;
\r
1760 case 0x22E5: *ch = 0x22E4; break;
\r
1761 case 0x22E6: *ch = 0x22E7; break;
\r
1762 case 0x22E7: *ch = 0x22E6; break;
\r
1763 case 0x22E8: *ch = 0x22E9; break;
\r
1764 case 0x22E9: *ch = 0x22E8; break;
\r
1765 case 0x22EA: *ch = 0x22EB; break;
\r
1766 case 0x22EB: *ch = 0x22EA; break;
\r
1767 case 0x22EC: *ch = 0x22ED; break;
\r
1768 case 0x22ED: *ch = 0x22EC; break;
\r
1769 case 0x22F0: *ch = 0x22F1; break;
\r
1770 case 0x22F1: *ch = 0x22F0; break;
\r
1771 case 0x22F2: *ch = 0x22FA; break;
\r
1772 case 0x22F3: *ch = 0x22FB; break;
\r
1773 case 0x22F4: *ch = 0x22FC; break;
\r
1774 case 0x22F6: *ch = 0x22FD; break;
\r
1775 case 0x22F7: *ch = 0x22FE; break;
\r
1776 case 0x22FA: *ch = 0x22F2; break;
\r
1777 case 0x22FB: *ch = 0x22F3; break;
\r
1778 case 0x22FC: *ch = 0x22F4; break;
\r
1779 case 0x22FD: *ch = 0x22F6; break;
\r
1780 case 0x22FE: *ch = 0x22F7; break;
\r
1782 } else if ((*ch & 0xFF00) == 0x2300) {
\r
1784 case 0x2308: *ch = 0x2309; break;
\r
1785 case 0x2309: *ch = 0x2308; break;
\r
1786 case 0x230A: *ch = 0x230B; break;
\r
1787 case 0x230B: *ch = 0x230A; break;
\r
1788 case 0x2329: *ch = 0x232A; break;
\r
1789 case 0x232A: *ch = 0x2329; break;
\r
1791 } else if ((*ch & 0xFF00) == 0x2700) {
\r
1793 case 0x2768: *ch = 0x2769; break;
\r
1794 case 0x2769: *ch = 0x2768; break;
\r
1795 case 0x276A: *ch = 0x276B; break;
\r
1796 case 0x276B: *ch = 0x276A; break;
\r
1797 case 0x276C: *ch = 0x276D; break;
\r
1798 case 0x276D: *ch = 0x276C; break;
\r
1799 case 0x276E: *ch = 0x276F; break;
\r
1800 case 0x276F: *ch = 0x276E; break;
\r
1801 case 0x2770: *ch = 0x2771; break;
\r
1802 case 0x2771: *ch = 0x2770; break;
\r
1803 case 0x2772: *ch = 0x2773; break;
\r
1804 case 0x2773: *ch = 0x2772; break;
\r
1805 case 0x2774: *ch = 0x2775; break;
\r
1806 case 0x2775: *ch = 0x2774; break;
\r
1807 case 0x27D5: *ch = 0x27D6; break;
\r
1808 case 0x27D6: *ch = 0x27D5; break;
\r
1809 case 0x27DD: *ch = 0x27DE; break;
\r
1810 case 0x27DE: *ch = 0x27DD; break;
\r
1811 case 0x27E2: *ch = 0x27E3; break;
\r
1812 case 0x27E3: *ch = 0x27E2; break;
\r
1813 case 0x27E4: *ch = 0x27E5; break;
\r
1814 case 0x27E5: *ch = 0x27E4; break;
\r
1815 case 0x27E6: *ch = 0x27E7; break;
\r
1816 case 0x27E7: *ch = 0x27E6; break;
\r
1817 case 0x27E8: *ch = 0x27E9; break;
\r
1818 case 0x27E9: *ch = 0x27E8; break;
\r
1819 case 0x27EA: *ch = 0x27EB; break;
\r
1820 case 0x27EB: *ch = 0x27EA; break;
\r
1822 } else if ((*ch & 0xFF00) == 0x2900) {
\r
1824 case 0x2983: *ch = 0x2984; break;
\r
1825 case 0x2984: *ch = 0x2983; break;
\r
1826 case 0x2985: *ch = 0x2986; break;
\r
1827 case 0x2986: *ch = 0x2985; break;
\r
1828 case 0x2987: *ch = 0x2988; break;
\r
1829 case 0x2988: *ch = 0x2987; break;
\r
1830 case 0x2989: *ch = 0x298A; break;
\r
1831 case 0x298A: *ch = 0x2989; break;
\r
1832 case 0x298B: *ch = 0x298C; break;
\r
1833 case 0x298C: *ch = 0x298B; break;
\r
1834 case 0x298D: *ch = 0x2990; break;
\r
1835 case 0x298E: *ch = 0x298F; break;
\r
1836 case 0x298F: *ch = 0x298E; break;
\r
1837 case 0x2990: *ch = 0x298D; break;
\r
1838 case 0x2991: *ch = 0x2992; break;
\r
1839 case 0x2992: *ch = 0x2991; break;
\r
1840 case 0x2993: *ch = 0x2994; break;
\r
1841 case 0x2994: *ch = 0x2993; break;
\r
1842 case 0x2995: *ch = 0x2996; break;
\r
1843 case 0x2996: *ch = 0x2995; break;
\r
1844 case 0x2997: *ch = 0x2998; break;
\r
1845 case 0x2998: *ch = 0x2997; break;
\r
1846 case 0x29B8: *ch = 0x2298; break;
\r
1847 case 0x29C0: *ch = 0x29C1; break;
\r
1848 case 0x29C1: *ch = 0x29C0; break;
\r
1849 case 0x29C4: *ch = 0x29C5; break;
\r
1850 case 0x29C5: *ch = 0x29C4; break;
\r
1851 case 0x29CF: *ch = 0x29D0; break;
\r
1852 case 0x29D0: *ch = 0x29CF; break;
\r
1853 case 0x29D1: *ch = 0x29D2; break;
\r
1854 case 0x29D2: *ch = 0x29D1; break;
\r
1855 case 0x29D4: *ch = 0x29D5; break;
\r
1856 case 0x29D5: *ch = 0x29D4; break;
\r
1857 case 0x29D8: *ch = 0x29D9; break;
\r
1858 case 0x29D9: *ch = 0x29D8; break;
\r
1859 case 0x29DA: *ch = 0x29DB; break;
\r
1860 case 0x29DB: *ch = 0x29DA; break;
\r
1861 case 0x29F5: *ch = 0x2215; break;
\r
1862 case 0x29F8: *ch = 0x29F9; break;
\r
1863 case 0x29F9: *ch = 0x29F8; break;
\r
1864 case 0x29FC: *ch = 0x29FD; break;
\r
1865 case 0x29FD: *ch = 0x29FC; break;
\r
1867 } else if ((*ch & 0xFF00) == 0x2A00) {
\r
1869 case 0x2A2B: *ch = 0x2A2C; break;
\r
1870 case 0x2A2C: *ch = 0x2A2B; break;
\r
1871 case 0x2A2D: *ch = 0x2A2C; break;
\r
1872 case 0x2A2E: *ch = 0x2A2D; break;
\r
1873 case 0x2A34: *ch = 0x2A35; break;
\r
1874 case 0x2A35: *ch = 0x2A34; break;
\r
1875 case 0x2A3C: *ch = 0x2A3D; break;
\r
1876 case 0x2A3D: *ch = 0x2A3C; break;
\r
1877 case 0x2A64: *ch = 0x2A65; break;
\r
1878 case 0x2A65: *ch = 0x2A64; break;
\r
1879 case 0x2A79: *ch = 0x2A7A; break;
\r
1880 case 0x2A7A: *ch = 0x2A79; break;
\r
1881 case 0x2A7D: *ch = 0x2A7E; break;
\r
1882 case 0x2A7E: *ch = 0x2A7D; break;
\r
1883 case 0x2A7F: *ch = 0x2A80; break;
\r
1884 case 0x2A80: *ch = 0x2A7F; break;
\r
1885 case 0x2A81: *ch = 0x2A82; break;
\r
1886 case 0x2A82: *ch = 0x2A81; break;
\r
1887 case 0x2A83: *ch = 0x2A84; break;
\r
1888 case 0x2A84: *ch = 0x2A83; break;
\r
1889 case 0x2A8B: *ch = 0x2A8C; break;
\r
1890 case 0x2A8C: *ch = 0x2A8B; break;
\r
1891 case 0x2A91: *ch = 0x2A92; break;
\r
1892 case 0x2A92: *ch = 0x2A91; break;
\r
1893 case 0x2A93: *ch = 0x2A94; break;
\r
1894 case 0x2A94: *ch = 0x2A93; break;
\r
1895 case 0x2A95: *ch = 0x2A96; break;
\r
1896 case 0x2A96: *ch = 0x2A95; break;
\r
1897 case 0x2A97: *ch = 0x2A98; break;
\r
1898 case 0x2A98: *ch = 0x2A97; break;
\r
1899 case 0x2A99: *ch = 0x2A9A; break;
\r
1900 case 0x2A9A: *ch = 0x2A99; break;
\r
1901 case 0x2A9B: *ch = 0x2A9C; break;
\r
1902 case 0x2A9C: *ch = 0x2A9B; break;
\r
1903 case 0x2AA1: *ch = 0x2AA2; break;
\r
1904 case 0x2AA2: *ch = 0x2AA1; break;
\r
1905 case 0x2AA6: *ch = 0x2AA7; break;
\r
1906 case 0x2AA7: *ch = 0x2AA6; break;
\r
1907 case 0x2AA8: *ch = 0x2AA9; break;
\r
1908 case 0x2AA9: *ch = 0x2AA8; break;
\r
1909 case 0x2AAA: *ch = 0x2AAB; break;
\r
1910 case 0x2AAB: *ch = 0x2AAA; break;
\r
1911 case 0x2AAC: *ch = 0x2AAD; break;
\r
1912 case 0x2AAD: *ch = 0x2AAC; break;
\r
1913 case 0x2AAF: *ch = 0x2AB0; break;
\r
1914 case 0x2AB0: *ch = 0x2AAF; break;
\r
1915 case 0x2AB3: *ch = 0x2AB4; break;
\r
1916 case 0x2AB4: *ch = 0x2AB3; break;
\r
1917 case 0x2ABB: *ch = 0x2ABC; break;
\r
1918 case 0x2ABC: *ch = 0x2ABB; break;
\r
1919 case 0x2ABD: *ch = 0x2ABE; break;
\r
1920 case 0x2ABE: *ch = 0x2ABD; break;
\r
1921 case 0x2ABF: *ch = 0x2AC0; break;
\r
1922 case 0x2AC0: *ch = 0x2ABF; break;
\r
1923 case 0x2AC1: *ch = 0x2AC2; break;
\r
1924 case 0x2AC2: *ch = 0x2AC1; break;
\r
1925 case 0x2AC3: *ch = 0x2AC4; break;
\r
1926 case 0x2AC4: *ch = 0x2AC3; break;
\r
1927 case 0x2AC5: *ch = 0x2AC6; break;
\r
1928 case 0x2AC6: *ch = 0x2AC5; break;
\r
1929 case 0x2ACD: *ch = 0x2ACE; break;
\r
1930 case 0x2ACE: *ch = 0x2ACD; break;
\r
1931 case 0x2ACF: *ch = 0x2AD0; break;
\r
1932 case 0x2AD0: *ch = 0x2ACF; break;
\r
1933 case 0x2AD1: *ch = 0x2AD2; break;
\r
1934 case 0x2AD2: *ch = 0x2AD1; break;
\r
1935 case 0x2AD3: *ch = 0x2AD4; break;
\r
1936 case 0x2AD4: *ch = 0x2AD3; break;
\r
1937 case 0x2AD5: *ch = 0x2AD6; break;
\r
1938 case 0x2AD6: *ch = 0x2AD5; break;
\r
1939 case 0x2ADE: *ch = 0x22A6; break;
\r
1940 case 0x2AE3: *ch = 0x22A9; break;
\r
1941 case 0x2AE4: *ch = 0x22A8; break;
\r
1942 case 0x2AE5: *ch = 0x22AB; break;
\r
1943 case 0x2AEC: *ch = 0x2AED; break;
\r
1944 case 0x2AED: *ch = 0x2AEC; break;
\r
1945 case 0x2AF7: *ch = 0x2AF8; break;
\r
1946 case 0x2AF8: *ch = 0x2AF7; break;
\r
1947 case 0x2AF9: *ch = 0x2AFA; break;
\r
1948 case 0x2AFA: *ch = 0x2AF9; break;
\r
1950 } else if ((*ch & 0xFF00) == 0x3000) {
\r
1952 case 0x3008: *ch = 0x3009; break;
\r
1953 case 0x3009: *ch = 0x3008; break;
\r
1954 case 0x300A: *ch = 0x300B; break;
\r
1955 case 0x300B: *ch = 0x300A; break;
\r
1956 case 0x300C: *ch = 0x300D; break;
\r
1957 case 0x300D: *ch = 0x300C; break;
\r
1958 case 0x300E: *ch = 0x300F; break;
\r
1959 case 0x300F: *ch = 0x300E; break;
\r
1960 case 0x3010: *ch = 0x3011; break;
\r
1961 case 0x3011: *ch = 0x3010; break;
\r
1962 case 0x3014: *ch = 0x3015; break;
\r
1963 case 0x3015: *ch = 0x3014; break;
\r
1964 case 0x3016: *ch = 0x3017; break;
\r
1965 case 0x3017: *ch = 0x3016; break;
\r
1966 case 0x3018: *ch = 0x3019; break;
\r
1967 case 0x3019: *ch = 0x3018; break;
\r
1968 case 0x301A: *ch = 0x301B; break;
\r
1969 case 0x301B: *ch = 0x301A; break;
\r
1971 } else if ((*ch & 0xFF00) == 0xFF00) {
\r
1973 case 0xFF08: *ch = 0xFF09; break;
\r
1974 case 0xFF09: *ch = 0xFF08; break;
\r
1975 case 0xFF1C: *ch = 0xFF1E; break;
\r
1976 case 0xFF1E: *ch = 0xFF1C; break;
\r
1977 case 0xFF3B: *ch = 0xFF3D; break;
\r
1978 case 0xFF3D: *ch = 0xFF3B; break;
\r
1979 case 0xFF5B: *ch = 0xFF5D; break;
\r
1980 case 0xFF5D: *ch = 0xFF5B; break;
\r
1981 case 0xFF5F: *ch = 0xFF60; break;
\r
1982 case 0xFF60: *ch = 0xFF5F; break;
\r
1983 case 0xFF62: *ch = 0xFF63; break;
\r
1984 case 0xFF63: *ch = 0xFF62; break;
\r
1989 #ifdef TEST_GETTYPE
\r
1991 #include <stdio.h>
\r
1992 #include <assert.h>
\r
1994 int main(int argc, char **argv)
\r
1996 static const struct { int type; char *name; } typetoname[] = {
\r
1997 #define TYPETONAME(X) { X , #X }
\r
2021 for (i = 1; i < argc; i++) {
\r
2022 unsigned long chr = strtoul(argv[i], NULL, 0);
\r
2023 int type = getType(chr);
\r
2024 assert(typetoname[type].type == type);
\r
2025 printf("U+%04x: %s\n", chr, typetoname[type].name);
\r