1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
17 <?xml version="1.0" encoding="UTF-8"?>
18 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
19 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
20 <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
22 <hello lang="en">World <>'" 白鵬翔</hello>
24 <outer foo:attr="value" xmlns:tag="ns4">
28 <![CDATA[Some text here.]]>
30 </body><!-- missing final newline -->`
32 var rawTokens = []Token{
33 CharData([]byte("\n")),
34 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
35 CharData([]byte("\n")),
36 Directive([]byte(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
37 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
39 CharData([]byte("\n")),
40 StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
41 CharData([]byte("\n ")),
42 StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
43 CharData([]byte("World <>'\" 白鵬翔")),
44 EndElement{Name{"", "hello"}},
45 CharData([]byte("\n ")),
46 StartElement{Name{"", "goodbye"}, nil},
47 EndElement{Name{"", "goodbye"}},
48 CharData([]byte("\n ")),
49 StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
50 CharData([]byte("\n ")),
51 StartElement{Name{"", "inner"}, nil},
52 EndElement{Name{"", "inner"}},
53 CharData([]byte("\n ")),
54 EndElement{Name{"", "outer"}},
55 CharData([]byte("\n ")),
56 StartElement{Name{"tag", "name"}, nil},
57 CharData([]byte("\n ")),
58 CharData([]byte("Some text here.")),
59 CharData([]byte("\n ")),
60 EndElement{Name{"tag", "name"}},
61 CharData([]byte("\n")),
62 EndElement{Name{"", "body"}},
63 Comment([]byte(" missing final newline ")),
66 var cookedTokens = []Token{
67 CharData([]byte("\n")),
68 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
69 CharData([]byte("\n")),
70 Directive([]byte(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
71 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
73 CharData([]byte("\n")),
74 StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
75 CharData([]byte("\n ")),
76 StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
77 CharData([]byte("World <>'\" 白鵬翔")),
78 EndElement{Name{"ns2", "hello"}},
79 CharData([]byte("\n ")),
80 StartElement{Name{"ns2", "goodbye"}, nil},
81 EndElement{Name{"ns2", "goodbye"}},
82 CharData([]byte("\n ")),
83 StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
84 CharData([]byte("\n ")),
85 StartElement{Name{"ns2", "inner"}, nil},
86 EndElement{Name{"ns2", "inner"}},
87 CharData([]byte("\n ")),
88 EndElement{Name{"ns2", "outer"}},
89 CharData([]byte("\n ")),
90 StartElement{Name{"ns3", "name"}, nil},
91 CharData([]byte("\n ")),
92 CharData([]byte("Some text here.")),
93 CharData([]byte("\n ")),
94 EndElement{Name{"ns3", "name"}},
95 CharData([]byte("\n")),
96 EndElement{Name{"ns2", "body"}},
97 Comment([]byte(" missing final newline ")),
100 const testInputAltEncoding = `
101 <?xml version="1.0" encoding="x-testing-uppercase"?>
104 var rawTokensAltEncoding = []Token{
105 CharData([]byte("\n")),
106 ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
107 CharData([]byte("\n")),
108 StartElement{Name{"", "tag"}, nil},
109 CharData([]byte("value")),
110 EndElement{Name{"", "tag"}},
113 var xmlInput = []string{
114 // unexpected EOF cases
139 // other Syntax errors
144 // "<!0 >", // let the Token() caller handle
153 // "<![CDATA[d]]>", // let the Token() caller handle
160 type stringReader struct {
165 func (r *stringReader) Read(b []byte) (n int, err error) {
166 if r.off >= len(r.s) {
169 for r.off < len(r.s) && n < len(b) {
177 func (r *stringReader) ReadByte() (b byte, err error) {
178 if r.off >= len(r.s) {
186 func StringReader(s string) io.Reader { return &stringReader{s, 0} }
188 func TestRawToken(t *testing.T) {
189 p := NewParser(StringReader(testInput))
190 testRawToken(t, p, rawTokens)
193 type downCaser struct {
198 func (d *downCaser) ReadByte() (c byte, err error) {
199 c, err = d.r.ReadByte()
200 if c >= 'A' && c <= 'Z' {
206 func (d *downCaser) Read(p []byte) (int, error) {
207 d.t.Fatalf("unexpected Read call on downCaser reader")
211 func TestRawTokenAltEncoding(t *testing.T) {
213 p := NewParser(StringReader(testInputAltEncoding))
214 p.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
215 sawEncoding = charset
216 if charset != "x-testing-uppercase" {
217 t.Fatalf("unexpected charset %q", charset)
219 return &downCaser{t, input.(io.ByteReader)}, nil
221 testRawToken(t, p, rawTokensAltEncoding)
224 func TestRawTokenAltEncodingNoConverter(t *testing.T) {
225 p := NewParser(StringReader(testInputAltEncoding))
226 token, err := p.RawToken()
228 t.Fatalf("expected a token on first RawToken call")
233 token, err = p.RawToken()
235 t.Errorf("expected a nil token; got %#v", token)
238 t.Fatalf("expected an error on second RawToken call")
240 const encoding = "x-testing-uppercase"
241 if !strings.Contains(err.Error(), encoding) {
242 t.Errorf("expected error to contain %q; got error: %v",
247 func testRawToken(t *testing.T, p *Parser, rawTokens []Token) {
248 for i, want := range rawTokens {
249 have, err := p.RawToken()
251 t.Fatalf("token %d: unexpected error: %s", i, err)
253 if !reflect.DeepEqual(have, want) {
254 t.Errorf("token %d = %#v want %#v", i, have, want)
259 // Ensure that directives (specifically !DOCTYPE) include the complete
260 // text of any nested directives, noting that < and > do not change
261 // nesting depth if they are in single or double quotes.
263 var nestedDirectivesInput = `
264 <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
265 <!DOCTYPE [<!ENTITY xlt ">">]>
266 <!DOCTYPE [<!ENTITY xlt "<">]>
267 <!DOCTYPE [<!ENTITY xlt '>'>]>
268 <!DOCTYPE [<!ENTITY xlt '<'>]>
269 <!DOCTYPE [<!ENTITY xlt '">'>]>
270 <!DOCTYPE [<!ENTITY xlt "'<">]>
273 var nestedDirectivesTokens = []Token{
274 CharData([]byte("\n")),
275 Directive([]byte(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`)),
276 CharData([]byte("\n")),
277 Directive([]byte(`DOCTYPE [<!ENTITY xlt ">">]`)),
278 CharData([]byte("\n")),
279 Directive([]byte(`DOCTYPE [<!ENTITY xlt "<">]`)),
280 CharData([]byte("\n")),
281 Directive([]byte(`DOCTYPE [<!ENTITY xlt '>'>]`)),
282 CharData([]byte("\n")),
283 Directive([]byte(`DOCTYPE [<!ENTITY xlt '<'>]`)),
284 CharData([]byte("\n")),
285 Directive([]byte(`DOCTYPE [<!ENTITY xlt '">'>]`)),
286 CharData([]byte("\n")),
287 Directive([]byte(`DOCTYPE [<!ENTITY xlt "'<">]`)),
288 CharData([]byte("\n")),
291 func TestNestedDirectives(t *testing.T) {
292 p := NewParser(StringReader(nestedDirectivesInput))
294 for i, want := range nestedDirectivesTokens {
295 have, err := p.Token()
297 t.Fatalf("token %d: unexpected error: %s", i, err)
299 if !reflect.DeepEqual(have, want) {
300 t.Errorf("token %d = %#v want %#v", i, have, want)
305 func TestToken(t *testing.T) {
306 p := NewParser(StringReader(testInput))
308 for i, want := range cookedTokens {
309 have, err := p.Token()
311 t.Fatalf("token %d: unexpected error: %s", i, err)
313 if !reflect.DeepEqual(have, want) {
314 t.Errorf("token %d = %#v want %#v", i, have, want)
319 func TestSyntax(t *testing.T) {
320 for i := range xmlInput {
321 p := NewParser(StringReader(xmlInput[i]))
323 for _, err = p.Token(); err == nil; _, err = p.Token() {
325 if _, ok := err.(*SyntaxError); !ok {
326 t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
331 type allScalars struct {
353 var all = allScalars{
377 const testScalarsInput = `<allscalars>
380 <false1>false</false1>
392 <uintptr>11</uintptr>
394 <float32>13.0</float32>
395 <float64>14.0</float64>
397 <ptrstring>16</ptrstring>
400 func TestAllScalars(t *testing.T) {
402 buf := bytes.NewBufferString(testScalarsInput)
403 err := Unmarshal(buf, &a)
408 if !reflect.DeepEqual(a, all) {
409 t.Errorf("have %+v want %+v", a, all)
417 func TestIssue569(t *testing.T) {
418 data := `<item><field_a>abcd</field_a></item>`
420 buf := bytes.NewBufferString(data)
421 err := Unmarshal(buf, &i)
423 if err != nil || i.Field_a != "abcd" {
424 t.Fatal("Expecting abcd")
428 func TestUnquotedAttrs(t *testing.T) {
429 data := "<tag attr=azAZ09:-_\t>"
430 p := NewParser(StringReader(data))
432 token, err := p.Token()
433 if _, ok := err.(*SyntaxError); ok {
434 t.Errorf("Unexpected error: %v", err)
436 if token.(StartElement).Name.Local != "tag" {
437 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
439 attr := token.(StartElement).Attr[0]
440 if attr.Value != "azAZ09:-_" {
441 t.Errorf("Unexpected attribute value: %v", attr.Value)
443 if attr.Name.Local != "attr" {
444 t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
448 func TestValuelessAttrs(t *testing.T) {
449 tests := [][3]string{
450 {"<p nowrap>", "p", "nowrap"},
451 {"<p nowrap >", "p", "nowrap"},
452 {"<input checked/>", "input", "checked"},
453 {"<input checked />", "input", "checked"},
455 for _, test := range tests {
456 p := NewParser(StringReader(test[0]))
458 token, err := p.Token()
459 if _, ok := err.(*SyntaxError); ok {
460 t.Errorf("Unexpected error: %v", err)
462 if token.(StartElement).Name.Local != test[1] {
463 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
465 attr := token.(StartElement).Attr[0]
466 if attr.Value != test[2] {
467 t.Errorf("Unexpected attribute value: %v", attr.Value)
469 if attr.Name.Local != test[2] {
470 t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
475 func TestCopyTokenCharData(t *testing.T) {
476 data := []byte("same data")
477 var tok1 Token = CharData(data)
478 tok2 := CopyToken(tok1)
479 if !reflect.DeepEqual(tok1, tok2) {
480 t.Error("CopyToken(CharData) != CharData")
483 if reflect.DeepEqual(tok1, tok2) {
484 t.Error("CopyToken(CharData) uses same buffer.")
488 func TestCopyTokenStartElement(t *testing.T) {
489 elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
491 tok2 := CopyToken(tok1)
492 if !reflect.DeepEqual(tok1, tok2) {
493 t.Error("CopyToken(StartElement) != StartElement")
495 elt.Attr[0] = Attr{Name{"", "lang"}, "de"}
496 if reflect.DeepEqual(tok1, tok2) {
497 t.Error("CopyToken(CharData) uses same buffer.")
501 func TestSyntaxErrorLineNum(t *testing.T) {
502 testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
503 p := NewParser(StringReader(testInput))
505 for _, err = p.Token(); err == nil; _, err = p.Token() {
507 synerr, ok := err.(*SyntaxError)
509 t.Error("Expected SyntaxError.")
511 if synerr.Line != 3 {
512 t.Error("SyntaxError didn't have correct line number.")
516 func TestTrailingRawToken(t *testing.T) {
517 input := `<FOO></FOO> `
518 p := NewParser(StringReader(input))
520 for _, err = p.RawToken(); err == nil; _, err = p.RawToken() {
523 t.Fatalf("p.RawToken() = _, %v, want _, os.EOF", err)
527 func TestTrailingToken(t *testing.T) {
528 input := `<FOO></FOO> `
529 p := NewParser(StringReader(input))
531 for _, err = p.Token(); err == nil; _, err = p.Token() {
534 t.Fatalf("p.Token() = _, %v, want _, os.EOF", err)
538 func TestEntityInsideCDATA(t *testing.T) {
539 input := `<test><![CDATA[ &val=foo ]]></test>`
540 p := NewParser(StringReader(input))
542 for _, err = p.Token(); err == nil; _, err = p.Token() {
545 t.Fatalf("p.Token() = _, %v, want _, os.EOF", err)
549 // The last three tests (respectively one for characters in attribute
550 // names and two for character entities) pass not because of code
551 // changed for issue 1259, but instead pass with the given messages
552 // from other parts of xml.Parser. I provide these to note the
553 // current behavior of situations where one might think that character
554 // range checking would detect the error, but it does not in fact.
556 var characterTests = []struct {
560 {"\x12<doc/>", "illegal character code U+0012"},
561 {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
562 {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
563 {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
564 {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
565 {"<doc>&\x01;</doc>", "invalid character entity &;"},
566 {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &;"},
569 func TestDisallowedCharacters(t *testing.T) {
571 for i, tt := range characterTests {
572 p := NewParser(StringReader(tt.in))
578 synerr, ok := err.(*SyntaxError)
580 t.Fatalf("input %d p.Token() = _, %v, want _, *SyntaxError", i, err)
582 if synerr.Msg != tt.err {
583 t.Fatalf("input %d synerr.Msg wrong: want '%s', got '%s'", i, tt.err, synerr.Msg)
588 type procInstEncodingTest struct {
592 var procInstTests = []struct {
595 {`version="1.0" encoding="utf-8"`, "utf-8"},
596 {`version="1.0" encoding='utf-8'`, "utf-8"},
597 {`version="1.0" encoding='utf-8' `, "utf-8"},
598 {`version="1.0" encoding=utf-8`, ""},
599 {`encoding="FOO" `, "FOO"},
602 func TestProcInstEncoding(t *testing.T) {
603 for _, test := range procInstTests {
604 got := procInstEncoding(test.input)
605 if got != test.expect {
606 t.Errorf("procInstEncoding(%q) = %q; want %q", test.input, got, test.expect)