OSDN Git Service

Update Go library to r60.
[pf3gnuchains/gcc-fork.git] / libgo / go / unicode / letter_test.go
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package unicode_test
6
7 import (
8         "testing"
9         . "unicode"
10 )
11
12 var upperTest = []int{
13         0x41,
14         0xc0,
15         0xd8,
16         0x100,
17         0x139,
18         0x14a,
19         0x178,
20         0x181,
21         0x376,
22         0x3cf,
23         0x1f2a,
24         0x2102,
25         0x2c00,
26         0x2c10,
27         0x2c20,
28         0xa650,
29         0xa722,
30         0xff3a,
31         0x10400,
32         0x1d400,
33         0x1d7ca,
34 }
35
36 var notupperTest = []int{
37         0x40,
38         0x5b,
39         0x61,
40         0x185,
41         0x1b0,
42         0x377,
43         0x387,
44         0x2150,
45         0xffff,
46         0x10000,
47 }
48
49 var letterTest = []int{
50         0x41,
51         0x61,
52         0xaa,
53         0xba,
54         0xc8,
55         0xdb,
56         0xf9,
57         0x2ec,
58         0x535,
59         0x620,
60         0x6e6,
61         0x93d,
62         0xa15,
63         0xb99,
64         0xdc0,
65         0xedd,
66         0x1000,
67         0x1200,
68         0x1312,
69         0x1401,
70         0x1885,
71         0x2c00,
72         0xa800,
73         0xf900,
74         0xfa30,
75         0xffda,
76         0xffdc,
77         0x10000,
78         0x10300,
79         0x10400,
80         0x20000,
81         0x2f800,
82         0x2fa1d,
83 }
84
85 var notletterTest = []int{
86         0x20,
87         0x35,
88         0x375,
89         0x619,
90         0x700,
91         0xfffe,
92         0x1ffff,
93         0x10ffff,
94 }
95
96 // Contains all the special cased Latin-1 chars.
97 var spaceTest = []int{
98         0x09,
99         0x0a,
100         0x0b,
101         0x0c,
102         0x0d,
103         0x20,
104         0x85,
105         0xA0,
106         0x2000,
107         0x3000,
108 }
109
110 type caseT struct {
111         cas, in, out int
112 }
113
114 var caseTest = []caseT{
115         // errors
116         {-1, '\n', 0xFFFD},
117         {UpperCase, -1, -1},
118         {UpperCase, 1 << 30, 1 << 30},
119
120         // ASCII (special-cased so test carefully)
121         {UpperCase, '\n', '\n'},
122         {UpperCase, 'a', 'A'},
123         {UpperCase, 'A', 'A'},
124         {UpperCase, '7', '7'},
125         {LowerCase, '\n', '\n'},
126         {LowerCase, 'a', 'a'},
127         {LowerCase, 'A', 'a'},
128         {LowerCase, '7', '7'},
129         {TitleCase, '\n', '\n'},
130         {TitleCase, 'a', 'A'},
131         {TitleCase, 'A', 'A'},
132         {TitleCase, '7', '7'},
133
134         // Latin-1: easy to read the tests!
135         {UpperCase, 0x80, 0x80},
136         {UpperCase, 'Å', 'Å'},
137         {UpperCase, 'å', 'Å'},
138         {LowerCase, 0x80, 0x80},
139         {LowerCase, 'Å', 'å'},
140         {LowerCase, 'å', 'å'},
141         {TitleCase, 0x80, 0x80},
142         {TitleCase, 'Å', 'Å'},
143         {TitleCase, 'å', 'Å'},
144
145         // 0131;LATIN SMALL LETTER DOTLESS I;Ll;0;L;;;;;N;;;0049;;0049
146         {UpperCase, 0x0131, 'I'},
147         {LowerCase, 0x0131, 0x0131},
148         {TitleCase, 0x0131, 'I'},
149
150         // 0133;LATIN SMALL LIGATURE IJ;Ll;0;L;<compat> 0069 006A;;;;N;LATIN SMALL LETTER I J;;0132;;0132
151         {UpperCase, 0x0133, 0x0132},
152         {LowerCase, 0x0133, 0x0133},
153         {TitleCase, 0x0133, 0x0132},
154
155         // 212A;KELVIN SIGN;Lu;0;L;004B;;;;N;DEGREES KELVIN;;;006B;
156         {UpperCase, 0x212A, 0x212A},
157         {LowerCase, 0x212A, 'k'},
158         {TitleCase, 0x212A, 0x212A},
159
160         // From an UpperLower sequence
161         // A640;CYRILLIC CAPITAL LETTER ZEMLYA;Lu;0;L;;;;;N;;;;A641;
162         {UpperCase, 0xA640, 0xA640},
163         {LowerCase, 0xA640, 0xA641},
164         {TitleCase, 0xA640, 0xA640},
165         // A641;CYRILLIC SMALL LETTER ZEMLYA;Ll;0;L;;;;;N;;;A640;;A640
166         {UpperCase, 0xA641, 0xA640},
167         {LowerCase, 0xA641, 0xA641},
168         {TitleCase, 0xA641, 0xA640},
169         // A64E;CYRILLIC CAPITAL LETTER NEUTRAL YER;Lu;0;L;;;;;N;;;;A64F;
170         {UpperCase, 0xA64E, 0xA64E},
171         {LowerCase, 0xA64E, 0xA64F},
172         {TitleCase, 0xA64E, 0xA64E},
173         // A65F;CYRILLIC SMALL LETTER YN;Ll;0;L;;;;;N;;;A65E;;A65E
174         {UpperCase, 0xA65F, 0xA65E},
175         {LowerCase, 0xA65F, 0xA65F},
176         {TitleCase, 0xA65F, 0xA65E},
177
178         // From another UpperLower sequence
179         // 0139;LATIN CAPITAL LETTER L WITH ACUTE;Lu;0;L;004C 0301;;;;N;LATIN CAPITAL LETTER L ACUTE;;;013A;
180         {UpperCase, 0x0139, 0x0139},
181         {LowerCase, 0x0139, 0x013A},
182         {TitleCase, 0x0139, 0x0139},
183         // 013F;LATIN CAPITAL LETTER L WITH MIDDLE DOT;Lu;0;L;<compat> 004C 00B7;;;;N;;;;0140;
184         {UpperCase, 0x013f, 0x013f},
185         {LowerCase, 0x013f, 0x0140},
186         {TitleCase, 0x013f, 0x013f},
187         // 0148;LATIN SMALL LETTER N WITH CARON;Ll;0;L;006E 030C;;;;N;LATIN SMALL LETTER N HACEK;;0147;;0147
188         {UpperCase, 0x0148, 0x0147},
189         {LowerCase, 0x0148, 0x0148},
190         {TitleCase, 0x0148, 0x0147},
191
192         // Last block in the 5.1.0 table
193         // 10400;DESERET CAPITAL LETTER LONG I;Lu;0;L;;;;;N;;;;10428;
194         {UpperCase, 0x10400, 0x10400},
195         {LowerCase, 0x10400, 0x10428},
196         {TitleCase, 0x10400, 0x10400},
197         // 10427;DESERET CAPITAL LETTER EW;Lu;0;L;;;;;N;;;;1044F;
198         {UpperCase, 0x10427, 0x10427},
199         {LowerCase, 0x10427, 0x1044F},
200         {TitleCase, 0x10427, 0x10427},
201         // 10428;DESERET SMALL LETTER LONG I;Ll;0;L;;;;;N;;;10400;;10400
202         {UpperCase, 0x10428, 0x10400},
203         {LowerCase, 0x10428, 0x10428},
204         {TitleCase, 0x10428, 0x10400},
205         // 1044F;DESERET SMALL LETTER EW;Ll;0;L;;;;;N;;;10427;;10427
206         {UpperCase, 0x1044F, 0x10427},
207         {LowerCase, 0x1044F, 0x1044F},
208         {TitleCase, 0x1044F, 0x10427},
209
210         // First one not in the 5.1.0 table
211         // 10450;SHAVIAN LETTER PEEP;Lo;0;L;;;;;N;;;;;
212         {UpperCase, 0x10450, 0x10450},
213         {LowerCase, 0x10450, 0x10450},
214         {TitleCase, 0x10450, 0x10450},
215
216         // Non-letters with case.
217         {LowerCase, 0x2161, 0x2171},
218         {UpperCase, 0x0345, 0x0399},
219 }
220
221 func TestIsLetter(t *testing.T) {
222         for _, r := range upperTest {
223                 if !IsLetter(r) {
224                         t.Errorf("IsLetter(U+%04X) = false, want true", r)
225                 }
226         }
227         for _, r := range letterTest {
228                 if !IsLetter(r) {
229                         t.Errorf("IsLetter(U+%04X) = false, want true", r)
230                 }
231         }
232         for _, r := range notletterTest {
233                 if IsLetter(r) {
234                         t.Errorf("IsLetter(U+%04X) = true, want false", r)
235                 }
236         }
237 }
238
239 func TestIsUpper(t *testing.T) {
240         for _, r := range upperTest {
241                 if !IsUpper(r) {
242                         t.Errorf("IsUpper(U+%04X) = false, want true", r)
243                 }
244         }
245         for _, r := range notupperTest {
246                 if IsUpper(r) {
247                         t.Errorf("IsUpper(U+%04X) = true, want false", r)
248                 }
249         }
250         for _, r := range notletterTest {
251                 if IsUpper(r) {
252                         t.Errorf("IsUpper(U+%04X) = true, want false", r)
253                 }
254         }
255 }
256
257 func caseString(c int) string {
258         switch c {
259         case UpperCase:
260                 return "UpperCase"
261         case LowerCase:
262                 return "LowerCase"
263         case TitleCase:
264                 return "TitleCase"
265         }
266         return "ErrorCase"
267 }
268
269 func TestTo(t *testing.T) {
270         for _, c := range caseTest {
271                 r := To(c.cas, c.in)
272                 if c.out != r {
273                         t.Errorf("To(U+%04X, %s) = U+%04X want U+%04X", c.in, caseString(c.cas), r, c.out)
274                 }
275         }
276 }
277
278 func TestToUpperCase(t *testing.T) {
279         for _, c := range caseTest {
280                 if c.cas != UpperCase {
281                         continue
282                 }
283                 r := ToUpper(c.in)
284                 if c.out != r {
285                         t.Errorf("ToUpper(U+%04X) = U+%04X want U+%04X", c.in, r, c.out)
286                 }
287         }
288 }
289
290 func TestToLowerCase(t *testing.T) {
291         for _, c := range caseTest {
292                 if c.cas != LowerCase {
293                         continue
294                 }
295                 r := ToLower(c.in)
296                 if c.out != r {
297                         t.Errorf("ToLower(U+%04X) = U+%04X want U+%04X", c.in, r, c.out)
298                 }
299         }
300 }
301
302 func TestToTitleCase(t *testing.T) {
303         for _, c := range caseTest {
304                 if c.cas != TitleCase {
305                         continue
306                 }
307                 r := ToTitle(c.in)
308                 if c.out != r {
309                         t.Errorf("ToTitle(U+%04X) = U+%04X want U+%04X", c.in, r, c.out)
310                 }
311         }
312 }
313
314 func TestIsSpace(t *testing.T) {
315         for _, c := range spaceTest {
316                 if !IsSpace(c) {
317                         t.Errorf("IsSpace(U+%04X) = false; want true", c)
318                 }
319         }
320         for _, c := range letterTest {
321                 if IsSpace(c) {
322                         t.Errorf("IsSpace(U+%04X) = true; want false", c)
323                 }
324         }
325 }
326
327 // Check that the optimizations for IsLetter etc. agree with the tables.
328 // We only need to check the Latin-1 range.
329 func TestLetterOptimizations(t *testing.T) {
330         for i := 0; i <= MaxLatin1; i++ {
331                 if Is(Letter, i) != IsLetter(i) {
332                         t.Errorf("IsLetter(U+%04X) disagrees with Is(Letter)", i)
333                 }
334                 if Is(Upper, i) != IsUpper(i) {
335                         t.Errorf("IsUpper(U+%04X) disagrees with Is(Upper)", i)
336                 }
337                 if Is(Lower, i) != IsLower(i) {
338                         t.Errorf("IsLower(U+%04X) disagrees with Is(Lower)", i)
339                 }
340                 if Is(Title, i) != IsTitle(i) {
341                         t.Errorf("IsTitle(U+%04X) disagrees with Is(Title)", i)
342                 }
343                 if Is(White_Space, i) != IsSpace(i) {
344                         t.Errorf("IsSpace(U+%04X) disagrees with Is(White_Space)", i)
345                 }
346                 if To(UpperCase, i) != ToUpper(i) {
347                         t.Errorf("ToUpper(U+%04X) disagrees with To(Upper)", i)
348                 }
349                 if To(LowerCase, i) != ToLower(i) {
350                         t.Errorf("ToLower(U+%04X) disagrees with To(Lower)", i)
351                 }
352                 if To(TitleCase, i) != ToTitle(i) {
353                         t.Errorf("ToTitle(U+%04X) disagrees with To(Title)", i)
354                 }
355         }
356 }
357
358 func TestTurkishCase(t *testing.T) {
359         lower := []int("abcçdefgğhıijklmnoöprsştuüvyz")
360         upper := []int("ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZ")
361         for i, l := range lower {
362                 u := upper[i]
363                 if TurkishCase.ToLower(l) != l {
364                         t.Errorf("lower(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToLower(l), l)
365                 }
366                 if TurkishCase.ToUpper(u) != u {
367                         t.Errorf("upper(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToUpper(u), u)
368                 }
369                 if TurkishCase.ToUpper(l) != u {
370                         t.Errorf("upper(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToUpper(l), u)
371                 }
372                 if TurkishCase.ToLower(u) != l {
373                         t.Errorf("lower(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToLower(l), l)
374                 }
375                 if TurkishCase.ToTitle(u) != u {
376                         t.Errorf("title(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToTitle(u), u)
377                 }
378                 if TurkishCase.ToTitle(l) != u {
379                         t.Errorf("title(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToTitle(l), u)
380                 }
381         }
382 }
383
384 var simpleFoldTests = []string{
385         // SimpleFold could order its returned slices in any order it wants,
386         // but we know it orders them in increasing order starting at in
387         // and looping around from MaxRune to 0.
388
389         // Easy cases.
390         "Aa",
391         "aA",
392         "δΔ",
393         "Δδ",
394
395         // ASCII special cases.
396         "KkK",
397         "kKK",
398         "KKk",
399         "Ssſ",
400         "sſS",
401         "ſSs",
402
403         // Non-ASCII special cases.
404         "ρϱΡ",
405         "ϱΡρ",
406         "Ρρϱ",
407         "ͅΙιι",
408         "Ιιιͅ",
409         "ιιͅΙ",
410         "ιͅΙι",
411
412         // Extra special cases: has lower/upper but no case fold.
413         "İ",
414         "ı",
415 }
416
417 func TestSimpleFold(t *testing.T) {
418         for _, tt := range simpleFoldTests {
419                 cycle := []int(tt)
420                 rune := cycle[len(cycle)-1]
421                 for _, out := range cycle {
422                         if r := SimpleFold(rune); r != out {
423                                 t.Errorf("SimpleFold(%#U) = %#U, want %#U", rune, r, out)
424                         }
425                         rune = out
426                 }
427         }
428 }