1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
18 var utf8map = []Utf8Map{
36 {0x0800, "\xe0\xa0\x80"},
37 {0x0801, "\xe0\xa0\x81"},
38 {0xfffe, "\xef\xbf\xbe"},
39 {0xffff, "\xef\xbf\xbf"},
40 {0x10000, "\xf0\x90\x80\x80"},
41 {0x10001, "\xf0\x90\x80\x81"},
42 {0x10fffe, "\xf4\x8f\xbf\xbe"},
43 {0x10ffff, "\xf4\x8f\xbf\xbf"},
44 {0xFFFD, "\xef\xbf\xbd"},
47 var testStrings = []string{
51 "日a本b語ç日ð本Ê語þ日¥本¼語i日©",
52 "日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©",
56 func TestFullRune(t *testing.T) {
57 for i := 0; i < len(utf8map); i++ {
61 t.Errorf("FullRune(%q) (%U) = false, want true", b, m.rune)
64 if !FullRuneInString(s) {
65 t.Errorf("FullRuneInString(%q) (%U) = false, want true", s, m.rune)
69 t.Errorf("FullRune(%q) = true, want false", b1)
72 if FullRuneInString(s1) {
73 t.Errorf("FullRune(%q) = true, want false", s1)
78 func TestEncodeRune(t *testing.T) {
79 for i := 0; i < len(utf8map); i++ {
83 n := EncodeRune(buf[0:], m.rune)
85 if !bytes.Equal(b, b1) {
86 t.Errorf("EncodeRune(%#04x) = %q want %q", m.rune, b1, b)
91 func TestDecodeRune(t *testing.T) {
92 for i := 0; i < len(utf8map); i++ {
95 rune, size := DecodeRune(b)
96 if rune != m.rune || size != len(b) {
97 t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, rune, size, m.rune, len(b))
100 rune, size = DecodeRuneInString(s)
101 if rune != m.rune || size != len(b) {
102 t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", s, rune, size, m.rune, len(b))
105 // there's an extra byte that bytes left behind - make sure trailing byte works
106 rune, size = DecodeRune(b[0:cap(b)])
107 if rune != m.rune || size != len(b) {
108 t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, rune, size, m.rune, len(b))
111 rune, size = DecodeRuneInString(s)
112 if rune != m.rune || size != len(b) {
113 t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, rune, size, m.rune, len(b))
116 // make sure missing bytes fail
118 if wantsize >= len(b) {
121 rune, size = DecodeRune(b[0 : len(b)-1])
122 if rune != RuneError || size != wantsize {
123 t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b[0:len(b)-1], rune, size, RuneError, wantsize)
125 s = m.str[0 : len(m.str)-1]
126 rune, size = DecodeRuneInString(s)
127 if rune != RuneError || size != wantsize {
128 t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, rune, size, RuneError, wantsize)
131 // make sure bad sequences fail
137 rune, size = DecodeRune(b)
138 if rune != RuneError || size != 1 {
139 t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, rune, size, RuneError, 1)
142 rune, size = DecodeRune(b)
143 if rune != RuneError || size != 1 {
144 t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, rune, size, RuneError, 1)
150 // Check that DecodeRune and DecodeLastRune correspond to
151 // the equivalent range loop.
152 func TestSequencing(t *testing.T) {
153 for _, ts := range testStrings {
154 for _, m := range utf8map {
155 for _, s := range []string{ts + m.str, m.str + ts, ts + m.str + ts} {
162 // Check that a range loop and a []int conversion visit the same runes.
163 // Not really a test of this package, but the assumption is used here and
164 // it's good to verify
165 func TestIntConversion(t *testing.T) {
166 for _, ts := range testStrings {
168 if RuneCountInString(ts) != len(runes) {
169 t.Errorf("%q: expected %d runes; got %d", ts, len(runes), RuneCountInString(ts))
173 for _, r := range ts {
175 t.Errorf("%q[%d]: expected %c (%U); got %c (%U)", ts, i, runes[i], runes[i], r, r)
182 func testSequence(t *testing.T, s string) {
187 index := make([]info, len(s))
191 for i, r := range s {
193 t.Errorf("Sequence(%q) mismatched index %d, want %d", s, si, i)
196 index[j] = info{i, r}
198 rune1, size1 := DecodeRune(b[i:])
200 t.Errorf("DecodeRune(%q) = %#04x, want %#04x", s[i:], rune1, r)
203 rune2, size2 := DecodeRuneInString(s[i:])
205 t.Errorf("DecodeRuneInString(%q) = %#04x, want %#04x", s[i:], rune2, r)
209 t.Errorf("DecodeRune/DecodeRuneInString(%q) size mismatch %d/%d", s[i:], size1, size2)
215 for si = len(s); si > 0; {
216 rune1, size1 := DecodeLastRune(b[0:si])
217 rune2, size2 := DecodeLastRuneInString(s[0:si])
219 t.Errorf("DecodeLastRune/DecodeLastRuneInString(%q, %d) size mismatch %d/%d", s, si, size1, size2)
222 if rune1 != index[j].rune {
223 t.Errorf("DecodeLastRune(%q, %d) = %#04x, want %#04x", s, si, rune1, index[j].rune)
226 if rune2 != index[j].rune {
227 t.Errorf("DecodeLastRuneInString(%q, %d) = %#04x, want %#04x", s, si, rune2, index[j].rune)
231 if si != index[j].index {
232 t.Errorf("DecodeLastRune(%q) index mismatch at %d, want %d", s, si, index[j].index)
238 t.Errorf("DecodeLastRune(%q) finished at %d, not 0", s, si)
242 // Check that negative runes encode as U+FFFD.
243 func TestNegativeRune(t *testing.T) {
244 errorbuf := make([]byte, UTFMax)
245 errorbuf = errorbuf[0:EncodeRune(errorbuf, RuneError)]
246 buf := make([]byte, UTFMax)
247 buf = buf[0:EncodeRune(buf, -1)]
248 if !bytes.Equal(buf, errorbuf) {
249 t.Errorf("incorrect encoding [% x] for -1; expected [% x]", buf, errorbuf)
253 type RuneCountTest struct {
258 var runecounttests = []RuneCountTest{
265 func TestRuneCount(t *testing.T) {
266 for i := 0; i < len(runecounttests); i++ {
267 tt := runecounttests[i]
268 if out := RuneCountInString(tt.in); out != tt.out {
269 t.Errorf("RuneCountInString(%q) = %d, want %d", tt.in, out, tt.out)
271 if out := RuneCount([]byte(tt.in)); out != tt.out {
272 t.Errorf("RuneCount(%q) = %d, want %d", tt.in, out, tt.out)
277 func BenchmarkRuneCountTenASCIIChars(b *testing.B) {
278 for i := 0; i < b.N; i++ {
279 RuneCountInString("0123456789")
283 func BenchmarkRuneCountTenJapaneseChars(b *testing.B) {
284 for i := 0; i < b.N; i++ {
285 RuneCountInString("日本語日本語日本語日")
289 func BenchmarkEncodeASCIIRune(b *testing.B) {
290 buf := make([]byte, UTFMax)
291 for i := 0; i < b.N; i++ {
296 func BenchmarkEncodeJapaneseRune(b *testing.B) {
297 buf := make([]byte, UTFMax)
298 for i := 0; i < b.N; i++ {
303 func BenchmarkDecodeASCIIRune(b *testing.B) {
305 for i := 0; i < b.N; i++ {
310 func BenchmarkDecodeJapaneseRune(b *testing.B) {
312 for i := 0; i < b.N; i++ {