1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
12 // A parser implements the HTML5 parsing algorithm:
13 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#tree-construction
15 // tokenizer provides the tokens for the parser.
17 // tok is the most recently read token.
19 // Self-closing tags like <hr/> are re-interpreted as a two-token sequence:
20 // <hr> followed by </hr>. hasSelfClosingToken is true if we have just read
21 // the synthetic start tag and the next one due is the matching end tag.
22 hasSelfClosingToken bool
23 // doc is the document root element.
25 // The stack of open elements (section 12.2.3.2) and active formatting
26 // elements (section 12.2.3.3).
28 // Element pointers (section 12.2.3.4).
30 // Other parsing state flags (section 12.2.3.5).
31 scripting, framesetOK bool
32 // im is the current insertion mode.
34 // originalIM is the insertion mode to go back to after completing a text
35 // or inTableText insertion mode.
36 originalIM insertionMode
37 // fosterParenting is whether new elements should be inserted according to
38 // the foster parenting rules (section 12.2.5.3).
40 // quirks is whether the parser is operating in "quirks mode."
42 // context is the context element when parsing an HTML fragment
47 func (p *parser) top() *Node {
48 if n := p.oe.top(); n != nil {
54 // stopTags for use in popUntil. These come from section 12.2.3.2.
56 defaultScopeStopTags = []string{"applet", "caption", "html", "table", "td", "th", "marquee", "object"}
57 listItemScopeStopTags = []string{"applet", "caption", "html", "table", "td", "th", "marquee", "object", "ol", "ul"}
58 buttonScopeStopTags = []string{"applet", "caption", "html", "table", "td", "th", "marquee", "object", "button"}
59 tableScopeStopTags = []string{"html", "table"}
62 // stopTags for use in clearStackToContext.
64 tableRowContextStopTags = []string{"tr", "html"}
67 // popUntil pops the stack of open elements at the highest element whose tag
68 // is in matchTags, provided there is no higher element in stopTags. It returns
69 // whether or not there was such an element. If there was not, popUntil leaves
70 // the stack unchanged.
72 // For example, if the stack was:
73 // ["html", "body", "font", "table", "b", "i", "u"]
74 // then popUntil([]string{"html, "table"}, "font") would return false, but
75 // popUntil([]string{"html, "table"}, "i") would return true and the resultant
77 // ["html", "body", "font", "table", "b"]
79 // If an element's tag is in both stopTags and matchTags, then the stack will
80 // be popped and the function returns true (provided, of course, there was no
81 // higher element in the stack that was also in stopTags). For example,
82 // popUntil([]string{"html, "table"}, "table") would return true and leave:
83 // ["html", "body", "font"]
84 func (p *parser) popUntil(stopTags []string, matchTags ...string) bool {
85 if i := p.indexOfElementInScope(stopTags, matchTags...); i != -1 {
92 // indexOfElementInScope returns the index in p.oe of the highest element
93 // whose tag is in matchTags that is in scope according to stopTags.
94 // If no matching element is in scope, it returns -1.
95 func (p *parser) indexOfElementInScope(stopTags []string, matchTags ...string) int {
96 for i := len(p.oe) - 1; i >= 0; i-- {
98 for _, t := range matchTags {
103 for _, t := range stopTags {
112 // elementInScope is like popUntil, except that it doesn't modify the stack of
114 func (p *parser) elementInScope(stopTags []string, matchTags ...string) bool {
115 return p.indexOfElementInScope(stopTags, matchTags...) != -1
118 // addChild adds a child node n to the top element, and pushes n onto the stack
119 // of open elements if it is an element node.
120 func (p *parser) addChild(n *Node) {
121 if p.fosterParenting {
127 if n.Type == ElementNode {
128 p.oe = append(p.oe, n)
132 // fosterParent adds a child node according to the foster parenting rules.
133 // Section 12.2.5.3, "foster parenting".
134 func (p *parser) fosterParent(n *Node) {
135 p.fosterParenting = false
136 var table, parent *Node
138 for i = len(p.oe) - 1; i >= 0; i-- {
139 if p.oe[i].Data == "table" {
146 // The foster parent is the html element.
149 parent = table.Parent
156 for i, child = range parent.Child {
162 if i > 0 && parent.Child[i-1].Type == TextNode && n.Type == TextNode {
163 parent.Child[i-1].Data += n.Data
167 if i == len(parent.Child) {
170 // Insert n into parent.Child at index i.
171 parent.Child = append(parent.Child[:i+1], parent.Child[i:]...)
177 // addText adds text to the preceding node if it is a text node, or else it
178 // calls addChild with a new text node.
179 func (p *parser) addText(text string) {
180 // TODO: distinguish whitespace text from others.
182 if i := len(t.Child); i > 0 && t.Child[i-1].Type == TextNode {
183 t.Child[i-1].Data += text
192 // addElement calls addChild with an element node.
193 func (p *parser) addElement(tag string, attr []Attribute) {
197 Namespace: p.top().Namespace,
203 func (p *parser) addFormattingElement(tag string, attr []Attribute) {
204 p.addElement(tag, attr)
205 p.afe = append(p.afe, p.top())
210 func (p *parser) clearActiveFormattingElements() {
213 if len(p.afe) == 0 || n.Type == scopeMarkerNode {
220 func (p *parser) reconstructActiveFormattingElements() {
225 if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
229 for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
239 clone := p.afe[i].clone()
242 if i == len(p.afe)-1 {
248 // read reads the next token. This is usually from the tokenizer, but it may
249 // be the synthesized end tag implied by a self-closing tag.
250 func (p *parser) read() error {
251 if p.hasSelfClosingToken {
252 p.hasSelfClosingToken = false
253 p.tok.Type = EndTagToken
258 p.tok = p.tokenizer.Token()
261 return p.tokenizer.Err()
262 case SelfClosingTagToken:
263 p.hasSelfClosingToken = true
264 p.tok.Type = StartTagToken
270 func (p *parser) acknowledgeSelfClosingTag() {
271 p.hasSelfClosingToken = false
274 // An insertion mode (section 12.2.3.1) is the state transition function from
275 // a particular state in the HTML5 parser's state machine. It updates the
276 // parser's fields depending on parser.tok (where ErrorToken means EOF).
277 // It returns whether the token was consumed.
278 type insertionMode func(*parser) bool
280 // setOriginalIM sets the insertion mode to return to after completing a text or
281 // inTableText insertion mode.
282 // Section 12.2.3.1, "using the rules for".
283 func (p *parser) setOriginalIM() {
284 if p.originalIM != nil {
285 panic("html: bad parser state: originalIM was set twice")
290 // Section 12.2.3.1, "reset the insertion mode".
291 func (p *parser) resetInsertionMode() {
292 for i := len(p.oe) - 1; i >= 0; i-- {
294 if i == 0 && p.context != nil {
305 case "tbody", "thead", "tfoot":
310 p.im = inColumnGroupIM
322 if p.top().Namespace == "" {
325 p.im = inForeignContentIM
332 const whitespace = " \t\r\n\f"
334 // Section 12.2.5.4.1.
335 func initialIM(p *parser) bool {
338 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
339 if len(p.tok.Data) == 0 {
340 // It was all whitespace, so ignore it.
350 n, quirks := parseDoctype(p.tok.Data)
361 // Section 12.2.5.4.2.
362 func beforeHTMLIM(p *parser) bool {
365 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
366 if len(p.tok.Data) == 0 {
367 // It was all whitespace, so ignore it.
371 if p.tok.Data == "html" {
372 p.addElement(p.tok.Data, p.tok.Attr)
378 case "head", "body", "html", "br":
379 // Drop down to creating an implied <html> tag.
391 // Create an implied <html> tag.
392 p.addElement("html", nil)
397 // Section 12.2.5.4.3.
398 func beforeHeadIM(p *parser) bool {
408 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
409 if len(p.tok.Data) == 0 {
410 // It was all whitespace, so ignore it.
426 case "head", "body", "html", "br":
439 p.addElement("head", attr)
446 // Section 12.2.5.4.4.
447 func inHeadIM(p *parser) bool {
456 s := strings.TrimLeft(p.tok.Data, whitespace)
457 if len(s) < len(p.tok.Data) {
458 // Add the initial whitespace to the current node.
459 p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
470 case "base", "basefont", "bgsound", "command", "link", "meta":
471 p.addElement(p.tok.Data, p.tok.Attr)
473 p.acknowledgeSelfClosingTag()
474 case "script", "title", "noscript", "noframes", "style":
475 p.addElement(p.tok.Data, p.tok.Attr)
489 case "body", "html", "br":
504 if n.Data != "head" {
505 panic("html: bad parser state: <head> element not found, in the in-head insertion mode")
513 // Section 12.2.5.4.6.
514 func afterHeadIM(p *parser) bool {
526 s := strings.TrimLeft(p.tok.Data, whitespace)
527 if len(s) < len(p.tok.Data) {
528 // Add the initial whitespace to the current node.
529 p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
546 p.addElement(p.tok.Data, p.tok.Attr)
549 case "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title":
550 p.oe = append(p.oe, p.head)
562 case "body", "html", "br":
577 p.addElement("body", attr)
578 p.framesetOK = framesetOK
584 // copyAttributes copies attributes of src not found on dst to dst.
585 func copyAttributes(dst *Node, src Token) {
586 if len(src.Attr) == 0 {
589 attr := map[string]string{}
590 for _, a := range dst.Attr {
593 for _, a := range src.Attr {
594 if _, ok := attr[a.Key]; !ok {
595 dst.Attr = append(dst.Attr, a)
601 // Section 12.2.5.4.7.
602 func inBodyIM(p *parser) bool {
605 switch n := p.oe.top(); n.Data {
606 case "pre", "listing", "textarea":
607 if len(n.Child) == 0 {
608 // Ignore a newline at the start of a <pre> block.
610 if d != "" && d[0] == '\r' {
613 if d != "" && d[0] == '\n' {
622 p.reconstructActiveFormattingElements()
623 p.addText(p.tok.Data)
628 copyAttributes(p.oe[0], p.tok)
629 case "address", "article", "aside", "blockquote", "center", "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "menu", "nav", "ol", "p", "section", "summary", "ul":
630 p.popUntil(buttonScopeStopTags, "p")
631 p.addElement(p.tok.Data, p.tok.Attr)
632 case "h1", "h2", "h3", "h4", "h5", "h6":
633 p.popUntil(buttonScopeStopTags, "p")
634 switch n := p.top(); n.Data {
635 case "h1", "h2", "h3", "h4", "h5", "h6":
638 p.addElement(p.tok.Data, p.tok.Attr)
640 for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
641 if n := p.afe[i]; n.Type == ElementNode && n.Data == "a" {
642 p.inBodyEndTagFormatting("a")
648 p.reconstructActiveFormattingElements()
649 p.addFormattingElement(p.tok.Data, p.tok.Attr)
650 case "b", "big", "code", "em", "font", "i", "s", "small", "strike", "strong", "tt", "u":
651 p.reconstructActiveFormattingElements()
652 p.addFormattingElement(p.tok.Data, p.tok.Attr)
654 p.reconstructActiveFormattingElements()
655 if p.elementInScope(defaultScopeStopTags, "nobr") {
656 p.inBodyEndTagFormatting("nobr")
657 p.reconstructActiveFormattingElements()
659 p.addFormattingElement(p.tok.Data, p.tok.Attr)
660 case "applet", "marquee", "object":
661 p.reconstructActiveFormattingElements()
662 p.addElement(p.tok.Data, p.tok.Attr)
663 p.afe = append(p.afe, &scopeMarker)
665 case "area", "br", "embed", "img", "input", "keygen", "wbr":
666 p.reconstructActiveFormattingElements()
667 p.addElement(p.tok.Data, p.tok.Attr)
669 p.acknowledgeSelfClosingTag()
673 p.popUntil(buttonScopeStopTags, "p")
675 p.addElement(p.tok.Data, p.tok.Attr)
680 p.popUntil(buttonScopeStopTags, "p")
681 p.addElement(p.tok.Data, p.tok.Attr)
683 p.acknowledgeSelfClosingTag()
686 p.reconstructActiveFormattingElements()
687 p.addElement(p.tok.Data, p.tok.Attr)
689 // TODO: detect <select> inside a table.
694 p.popUntil(buttonScopeStopTags, "p")
695 p.addElement(p.tok.Data, p.tok.Attr)
700 for i := len(p.oe) - 1; i >= 0; i-- {
704 p.popUntil(listItemScopeStopTags, "li")
705 case "address", "div", "p":
708 if !isSpecialElement[node.Data] {
714 p.popUntil(buttonScopeStopTags, "p")
715 p.addElement(p.tok.Data, p.tok.Attr)
718 for i := len(p.oe) - 1; i >= 0; i-- {
723 case "address", "div", "p":
726 if !isSpecialElement[node.Data] {
732 p.popUntil(buttonScopeStopTags, "p")
733 p.addElement(p.tok.Data, p.tok.Attr)
735 p.popUntil(buttonScopeStopTags, "p")
736 p.addElement(p.tok.Data, p.tok.Attr)
738 p.popUntil(defaultScopeStopTags, "button")
739 p.reconstructActiveFormattingElements()
740 p.addElement(p.tok.Data, p.tok.Attr)
742 case "optgroup", "option":
743 if p.top().Data == "option" {
746 p.reconstructActiveFormattingElements()
747 p.addElement(p.tok.Data, p.tok.Attr)
751 if body.Type == ElementNode && body.Data == "body" {
753 copyAttributes(body, p.tok)
756 case "base", "basefont", "bgsound", "command", "link", "meta", "noframes", "script", "style", "title":
767 prompt := "This is a searchable index. Enter search keywords: "
768 attr := []Attribute{{Key: "name", Val: "isindex"}}
769 for _, a := range p.tok.Attr {
774 // Ignore the attribute.
778 attr = append(attr, a)
781 p.acknowledgeSelfClosingTag()
782 p.popUntil(buttonScopeStopTags, "p")
783 p.addElement("form", nil)
786 p.form.Attr = []Attribute{{Key: "action", Val: action}}
788 p.addElement("hr", nil)
790 p.addElement("label", nil)
792 p.addElement("input", attr)
795 p.addElement("hr", nil)
800 p.popUntil(buttonScopeStopTags, "p")
801 p.reconstructActiveFormattingElements()
803 p.addElement(p.tok.Data, p.tok.Attr)
805 p.reconstructActiveFormattingElements()
807 if p.tok.Data == "math" {
808 // TODO: adjust MathML attributes.
811 // TODO: adjust SVG attributes.
814 // TODO: adjust foreign attributes.
815 p.addElement(p.tok.Data, p.tok.Attr)
816 p.top().Namespace = namespace
817 p.im = inForeignContentIM
819 case "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr":
823 p.addElement(p.tok.Data, p.tok.Attr)
828 // TODO: autoclose the stack of open elements.
832 if !p.elementInScope(buttonScopeStopTags, "p") {
833 p.addElement("p", nil)
835 p.popUntil(buttonScopeStopTags, "p")
836 case "a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u":
837 p.inBodyEndTagFormatting(p.tok.Data)
838 case "address", "article", "aside", "blockquote", "button", "center", "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "listing", "menu", "nav", "ol", "pre", "section", "summary", "ul":
839 p.popUntil(defaultScopeStopTags, p.tok.Data)
840 case "applet", "marquee", "object":
841 if p.popUntil(defaultScopeStopTags, p.tok.Data) {
842 p.clearActiveFormattingElements()
845 p.tok.Type = StartTagToken
848 p.inBodyEndTagOther(p.tok.Data)
860 func (p *parser) inBodyEndTagFormatting(tag string) {
861 // This is the "adoption agency" algorithm, described at
862 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#adoptionAgency
864 // TODO: this is a fairly literal line-by-line translation of that algorithm.
865 // Once the code successfully parses the comprehensive test suite, we should
866 // refactor this code to be more idiomatic.
868 // Steps 1-3. The outer loop.
869 for i := 0; i < 8; i++ {
870 // Step 4. Find the formatting element.
871 var formattingElement *Node
872 for j := len(p.afe) - 1; j >= 0; j-- {
873 if p.afe[j].Type == scopeMarkerNode {
876 if p.afe[j].Data == tag {
877 formattingElement = p.afe[j]
881 if formattingElement == nil {
882 p.inBodyEndTagOther(tag)
885 feIndex := p.oe.index(formattingElement)
887 p.afe.remove(formattingElement)
890 if !p.elementInScope(defaultScopeStopTags, tag) {
895 // Steps 5-6. Find the furthest block.
896 var furthestBlock *Node
897 for _, e := range p.oe[feIndex:] {
898 if isSpecialElement[e.Data] {
903 if furthestBlock == nil {
905 for e != formattingElement {
912 // Steps 7-8. Find the common ancestor and bookmark node.
913 commonAncestor := p.oe[feIndex-1]
914 bookmark := p.afe.index(formattingElement)
916 // Step 9. The inner loop. Find the lastNode to reparent.
917 lastNode := furthestBlock
918 node := furthestBlock
919 x := p.oe.index(node)
921 for j := 0; j < 3; j++ {
926 if p.afe.index(node) == -1 {
931 if node == formattingElement {
935 clone := node.clone()
936 p.afe[p.afe.index(node)] = clone
937 p.oe[p.oe.index(node)] = clone
940 if lastNode == furthestBlock {
941 bookmark = p.afe.index(node) + 1
944 if lastNode.Parent != nil {
945 lastNode.Parent.Remove(lastNode)
952 // Step 10. Reparent lastNode to the common ancestor,
953 // or for misnested table nodes, to the foster parent.
954 if lastNode.Parent != nil {
955 lastNode.Parent.Remove(lastNode)
957 switch commonAncestor.Data {
958 case "table", "tbody", "tfoot", "thead", "tr":
959 p.fosterParent(lastNode)
961 commonAncestor.Add(lastNode)
964 // Steps 11-13. Reparent nodes from the furthest block's children
965 // to a clone of the formatting element.
966 clone := formattingElement.clone()
967 reparentChildren(clone, furthestBlock)
968 furthestBlock.Add(clone)
970 // Step 14. Fix up the list of active formatting elements.
971 if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
972 // Move the bookmark with the rest of the list.
975 p.afe.remove(formattingElement)
976 p.afe.insert(bookmark, clone)
978 // Step 15. Fix up the stack of open elements.
979 p.oe.remove(formattingElement)
980 p.oe.insert(p.oe.index(furthestBlock)+1, clone)
984 // inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
985 func (p *parser) inBodyEndTagOther(tag string) {
986 for i := len(p.oe) - 1; i >= 0; i-- {
987 if p.oe[i].Data == tag {
991 if isSpecialElement[p.oe[i].Data] {
997 // Section 12.2.5.4.8.
998 func textIM(p *parser) bool {
1003 p.addText(p.tok.Data)
1010 return p.tok.Type == EndTagToken
1013 // Section 12.2.5.4.9.
1014 func inTableIM(p *parser) bool {
1024 p.clearStackToContext(tableScopeStopTags)
1025 p.afe = append(p.afe, &scopeMarker)
1026 p.addElement(p.tok.Data, p.tok.Attr)
1029 case "tbody", "tfoot", "thead":
1030 p.clearStackToContext(tableScopeStopTags)
1031 p.addElement(p.tok.Data, p.tok.Attr)
1032 p.im = inTableBodyIM
1034 case "td", "th", "tr":
1035 p.clearStackToContext(tableScopeStopTags)
1036 p.addElement("tbody", nil)
1037 p.im = inTableBodyIM
1040 if p.popUntil(tableScopeStopTags, "table") {
1041 p.resetInsertionMode()
1044 // Ignore the token.
1047 p.clearStackToContext(tableScopeStopTags)
1048 p.addElement(p.tok.Data, p.tok.Attr)
1049 p.im = inColumnGroupIM
1052 p.clearStackToContext(tableScopeStopTags)
1053 p.addElement("colgroup", p.tok.Attr)
1054 p.im = inColumnGroupIM
1062 if p.popUntil(tableScopeStopTags, "table") {
1063 p.resetInsertionMode()
1066 // Ignore the token.
1068 case "body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr":
1069 // Ignore the token.
1080 switch p.top().Data {
1081 case "table", "tbody", "tfoot", "thead", "tr":
1082 p.fosterParenting = true
1083 defer func() { p.fosterParenting = false }()
1089 // clearStackToContext pops elements off the stack of open elements
1090 // until an element listed in stopTags is found.
1091 func (p *parser) clearStackToContext(stopTags []string) {
1092 for i := len(p.oe) - 1; i >= 0; i-- {
1093 for _, tag := range stopTags {
1094 if p.oe[i].Data == tag {
1102 // Section 12.2.5.4.11.
1103 func inCaptionIM(p *parser) bool {
1107 case "caption", "col", "colgroup", "tbody", "td", "tfoot", "thead", "tr":
1108 if p.popUntil(tableScopeStopTags, "caption") {
1109 p.clearActiveFormattingElements()
1113 // Ignore the token.
1120 if p.popUntil(tableScopeStopTags, "caption") {
1121 p.clearActiveFormattingElements()
1126 if p.popUntil(tableScopeStopTags, "caption") {
1127 p.clearActiveFormattingElements()
1131 // Ignore the token.
1134 case "body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr":
1135 // Ignore the token.
1142 // Section 12.2.5.4.12.
1143 func inColumnGroupIM(p *parser) bool {
1152 // Ignore the token.
1159 p.addElement(p.tok.Data, p.tok.Attr)
1161 p.acknowledgeSelfClosingTag()
1167 if p.oe.top().Data != "html" {
1173 // Ignore the token.
1177 if p.oe.top().Data != "html" {
1185 // Section 12.2.5.4.13.
1186 func inTableBodyIM(p *parser) bool {
1215 if p.popUntil(tableScopeStopTags, "tbody", "thead", "tfoot") {
1219 // Ignore the token.
1221 case "body", "caption", "col", "colgroup", "html", "td", "th", "tr":
1222 // Ignore the token.
1233 // TODO: clear the stack back to a table body context.
1234 p.addElement(data, attr)
1241 // Section 12.2.5.4.14.
1242 func inRowIM(p *parser) bool {
1251 p.clearStackToContext(tableRowContextStopTags)
1252 p.addElement(p.tok.Data, p.tok.Attr)
1253 p.afe = append(p.afe, &scopeMarker)
1256 case "caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr":
1257 if p.popUntil(tableScopeStopTags, "tr") {
1258 p.im = inTableBodyIM
1261 // Ignore the token.
1269 if p.popUntil(tableScopeStopTags, "tr") {
1270 p.im = inTableBodyIM
1273 // Ignore the token.
1276 if p.popUntil(tableScopeStopTags, "tr") {
1277 p.im = inTableBodyIM
1280 // Ignore the token.
1282 case "tbody", "tfoot", "thead":
1284 case "body", "caption", "col", "colgroup", "html", "td", "th":
1285 // Ignore the token.
1300 // Section 12.2.5.4.15.
1301 func inCellIM(p *parser) bool {
1303 closeTheCellAndReprocess bool
1308 case "caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr":
1309 // TODO: check for "td" or "th" in table scope.
1310 closeTheCellAndReprocess = true
1315 if !p.popUntil(tableScopeStopTags, p.tok.Data) {
1316 // Ignore the token.
1319 p.clearActiveFormattingElements()
1322 case "body", "caption", "col", "colgroup", "html":
1324 case "table", "tbody", "tfoot", "thead", "tr":
1325 // TODO: check for matching element in table scope.
1326 closeTheCellAndReprocess = true
1335 if closeTheCellAndReprocess {
1336 if p.popUntil(tableScopeStopTags, "td") || p.popUntil(tableScopeStopTags, "th") {
1337 p.clearActiveFormattingElements()
1345 // Section 12.2.5.4.16.
1346 func inSelectIM(p *parser) bool {
1352 p.addText(p.tok.Data)
1358 if p.top().Data == "option" {
1361 p.addElement(p.tok.Data, p.tok.Attr)
1363 if p.top().Data == "option" {
1366 if p.top().Data == "optgroup" {
1369 p.addElement(p.tok.Data, p.tok.Attr)
1372 case "input", "keygen", "textarea":
1377 // Ignore the token.
1382 if p.top().Data == "option" {
1387 if p.oe[i].Data == "option" {
1390 if p.oe[i].Data == "optgroup" {
1396 // Ignore the token.
1405 for i := len(p.oe) - 1; i >= 0; i-- {
1406 switch p.oe[i].Data {
1409 p.resetInsertionMode()
1411 case "option", "optgroup":
1414 // Ignore the token.
1422 // Section 12.2.5.4.18.
1423 func afterBodyIM(p *parser) bool {
1429 if p.tok.Data == "html" {
1433 if p.tok.Data == "html" {
1434 p.im = afterAfterBodyIM
1438 // The comment is attached to the <html> element.
1439 if len(p.oe) < 1 || p.oe[0].Data != "html" {
1440 panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
1452 // Section 12.2.5.4.19.
1453 func inFramesetIM(p *parser) bool {
1461 // Ignore all text but whitespace.
1462 s := strings.Map(func(c rune) rune {
1464 case ' ', '\t', '\n', '\f', '\r':
1477 p.addElement(p.tok.Data, p.tok.Attr)
1479 p.addElement(p.tok.Data, p.tok.Attr)
1481 p.acknowledgeSelfClosingTag()
1488 if p.oe.top().Data != "html" {
1490 if p.oe.top().Data != "frameset" {
1491 p.im = afterFramesetIM
1497 // Ignore the token.
1502 // Section 12.2.5.4.20.
1503 func afterFramesetIM(p *parser) bool {
1511 // Ignore all text but whitespace.
1512 s := strings.Map(func(c rune) rune {
1514 case ' ', '\t', '\n', '\f', '\r':
1532 p.im = afterAfterFramesetIM
1536 // Ignore the token.
1541 // Section 12.2.5.4.21.
1542 func afterAfterBodyIM(p *parser) bool {
1550 if p.tok.Data == "html" {
1564 // Section 12.2.5.4.22.
1565 func afterAfterFramesetIM(p *parser) bool {
1580 // Ignore the token.
1585 // Section 12.2.5.5.
1586 func inForeignContentIM(p *parser) bool {
1594 if breakout[p.tok.Data] {
1597 switch p.top().Namespace {
1599 // TODO: adjust MathML attributes.
1601 // TODO: adjust SVG tag names.
1602 // TODO: adjust SVG attributes.
1604 panic("html: bad parser state: unexpected namespace")
1606 // TODO: adjust foreign attributes.
1607 p.addElement(p.tok.Data, p.tok.Attr)
1611 // Ignore the token.
1616 func (p *parser) parse() error {
1617 // Iterate until EOF. Any other error will cause an early return.
1621 if err := p.read(); err != nil {
1630 // Loop until the final token (the ErrorToken signifying EOF) is consumed.
1632 if consumed = p.im(p); consumed {
1639 // Parse returns the parse tree for the HTML from the given Reader.
1640 // The input is assumed to be UTF-8 encoded.
1641 func Parse(r io.Reader) (*Node, error) {
1643 tokenizer: NewTokenizer(r),
1658 // ParseFragment parses a fragment of HTML and returns the nodes that were
1659 // found. If the fragment is the InnerHTML for an existing element, pass that
1660 // element in context.
1661 func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
1663 tokenizer: NewTokenizer(r),
1672 switch context.Data {
1673 case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "title", "textarea", "xmp":
1674 p.tokenizer.rawTag = context.Data
1683 p.oe = nodeStack{root}
1684 p.resetInsertionMode()
1686 for n := context; n != nil; n = n.Parent {
1687 if n.Type == ElementNode && n.Data == "form" {
1703 result := parent.Child
1705 for _, n := range result {