1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
29 CharacterByCharacterTests()
37 const file = "NormalizationTest.txt"
39 var url = flag.String("url",
40 "http://www.unicode.org/Public/6.0.0/ucd/"+file,
41 "URL of Unicode database directory")
42 var localFiles = flag.Bool("local",
44 "data files have been copied to the current directory; for debugging only")
46 var logger = log.New(os.Stderr, "", log.Lshortfile)
48 // This regression test runs the test set in NormalizationTest.txt
49 // (taken from http://www.unicode.org/Public/6.0.0/ucd/).
51 // NormalizationTest.txt has form:
52 // @Part0 # Specific cases
54 // 1E0A;1E0A;0044 0307;1E0A;0044 0307; # (Ḋ; Ḋ; D◌̇; Ḋ; D◌̇; ) LATIN CAPITAL LETTER D WITH DOT ABOVE
55 // 1E0C;1E0C;0044 0323;1E0C;0044 0323; # (Ḍ; Ḍ; D◌̣; Ḍ; D◌̣; ) LATIN CAPITAL LETTER D WITH DOT BELOW
57 // Each test has 5 columns (c1, c2, c3, c4, c5), where
58 // (c1, c2, c3, c4, c5) == (c1, NFC(c1), NFD(c1), NFKC(c1), NFKD(c1))
61 // 1. The following invariants must be true for all conformant implementations
64 // c2 == NFC(c1) == NFC(c2) == NFC(c3)
65 // c4 == NFC(c4) == NFC(c5)
68 // c3 == NFD(c1) == NFD(c2) == NFD(c3)
69 // c5 == NFD(c4) == NFD(c5)
72 // c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
75 // c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
77 // 2. For every code point X assigned in this version of Unicode that is not
78 // specifically listed in Part 1, the following invariants must be true
79 // for all conformant implementations:
81 // X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
94 // Holds data from NormalizationTest.txt
107 r rune // used for character by character test
108 cols [cMaxColumns]string // Each has 5 entries, see below.
111 func (t Test) Name() string {
113 return part[t.partnr].name
115 return fmt.Sprintf("%s:%d", part[t.partnr].name, t.number)
118 var partRe = regexp.MustCompile(`@Part(\d) # (.*)\n$`)
119 var testRe = regexp.MustCompile(`^` + strings.Repeat(`([\dA-F ]+);`, 5) + ` # (.*)\n?$`)
123 // Load the data form NormalizationTest.txt
124 func loadTestData() {
127 *url = "file://" + path.Join(pwd, file)
129 t := &http.Transport{}
130 t.RegisterProtocol("file", http.NewFileTransport(http.Dir("/")))
131 c := &http.Client{Transport: t}
132 resp, err := c.Get(*url)
136 if resp.StatusCode != 200 {
137 logger.Fatal("bad GET status for "+file, resp.Status)
141 input := bufio.NewReader(f)
143 line, err := input.ReadString('\n')
150 if len(line) == 0 || line[0] == '#' {
153 m := partRe.FindStringSubmatch(line)
156 logger.Fatal("Failed to parse Part: ", line)
158 i, err := strconv.Atoi(m[1])
163 part = append(part, Part{name: name[:len(name)-1], number: i})
166 m = testRe.FindStringSubmatch(line)
167 if m == nil || len(m) < 7 {
168 logger.Fatalf(`Failed to parse: "%s" result: %#v`, line, m)
170 test := Test{name: m[6], partnr: len(part) - 1, number: counter}
172 for j := 1; j < len(m)-1; j++ {
173 for _, split := range strings.Split(m[j], " ") {
174 r, err := strconv.ParseUint(split, 16, 64)
179 // save for CharacterByCharacterTests
182 var buf [utf8.UTFMax]byte
183 sz := utf8.EncodeRune(buf[:], rune(r))
184 test.cols[j-1] += string(buf[:sz])
187 part := &part[len(part)-1]
188 part.tests = append(part.tests, test)
192 var fstr = []string{"NFC", "NFD", "NFKC", "NFKD"}
196 func cmpResult(t *Test, name string, f norm.Form, gold, test, result string) {
202 st, sr, sg := []rune(test), []rune(result), []rune(gold)
203 logger.Printf("%s:%s: %s(%X)=%X; want:%X: %s",
204 t.Name(), name, fstr[f], st, sr, sg, t.name)
208 func cmpIsNormal(t *Test, name string, f norm.Form, test string, result, want bool) {
214 logger.Printf("%s:%s: %s(%X)=%v; want: %v", t.Name(), name, fstr[f], []rune(test), result, want)
218 func doTest(t *Test, f norm.Form, gold, test string) {
219 result := f.Bytes([]byte(test))
220 cmpResult(t, "Bytes", f, gold, test, string(result))
221 for i := range test {
222 out := f.Append(f.Bytes([]byte(test[:i])), []byte(test[i:])...)
223 cmpResult(t, fmt.Sprintf(":Append:%d", i), f, gold, test, string(out))
225 cmpIsNormal(t, "IsNormal", f, test, f.IsNormal([]byte(test)), test == gold)
228 func doConformanceTests(t *Test, partn int) {
229 for i := 0; i <= 2; i++ {
230 doTest(t, norm.NFC, t.cols[1], t.cols[i])
231 doTest(t, norm.NFD, t.cols[2], t.cols[i])
232 doTest(t, norm.NFKC, t.cols[3], t.cols[i])
233 doTest(t, norm.NFKD, t.cols[4], t.cols[i])
235 for i := 3; i <= 4; i++ {
236 doTest(t, norm.NFC, t.cols[3], t.cols[i])
237 doTest(t, norm.NFD, t.cols[4], t.cols[i])
238 doTest(t, norm.NFKC, t.cols[3], t.cols[i])
239 doTest(t, norm.NFKD, t.cols[4], t.cols[i])
243 func CharacterByCharacterTests() {
244 tests := part[1].tests
246 for i := 0; i <= len(tests); i++ { // last one is special case
249 r = 0x2FA1E // Don't have to go to 0x10FFFF
253 for last++; last < r; last++ {
254 // Check all characters that were not explicitly listed in the test.
255 t := &Test{partnr: 1, number: -1}
257 doTest(t, norm.NFC, char, char)
258 doTest(t, norm.NFD, char, char)
259 doTest(t, norm.NFKC, char, char)
260 doTest(t, norm.NFKD, char, char)
263 doConformanceTests(&tests[i], 1)
268 func StandardTests() {
269 for _, j := range []int{0, 2, 3} {
270 for _, test := range part[j].tests {
271 doConformanceTests(&test, j)
276 // PerformanceTest verifies that normalization is O(n). If any of the
277 // code does not properly check for maxCombiningChars, normalization
278 // may exhibit O(n**2) behavior.
279 func PerformanceTest() {
280 runtime.GOMAXPROCS(2)
281 success := make(chan bool, 1)
283 buf := bytes.Repeat([]byte("\u035D"), 1024*1024)
284 buf = append(buf, "\u035B"...)
285 norm.NFC.Append(nil, buf...)
288 timeout := time.After(1e9)
291 // test completed before the timeout
294 logger.Printf(`unexpectedly long time to complete PerformanceTest`)