hasSelfClosingToken bool
// doc is the document root element.
doc *Node
- // The stack of open elements (section 11.2.3.2) and active formatting
- // elements (section 11.2.3.3).
+ // The stack of open elements (section 12.2.3.2) and active formatting
+ // elements (section 12.2.3.3).
oe, afe nodeStack
- // Element pointers (section 11.2.3.4).
+ // Element pointers (section 12.2.3.4).
head, form *Node
- // Other parsing state flags (section 11.2.3.5).
+ // Other parsing state flags (section 12.2.3.5).
scripting, framesetOK bool
// im is the current insertion mode.
im insertionMode
// or inTableText insertion mode.
originalIM insertionMode
// fosterParenting is whether new elements should be inserted according to
- // the foster parenting rules (section 11.2.5.3).
+ // the foster parenting rules (section 12.2.5.3).
fosterParenting bool
// quirks is whether the parser is operating in "quirks mode."
quirks bool
// context is the context element when parsing an HTML fragment
- // (section 11.4).
+ // (section 12.4).
context *Node
}
return p.doc
}
-// stopTags for use in popUntil. These come from section 11.2.3.2.
+// stopTags for use in popUntil. These come from section 12.2.3.2.
var (
defaultScopeStopTags = []string{"applet", "caption", "html", "table", "td", "th", "marquee", "object"}
listItemScopeStopTags = []string{"applet", "caption", "html", "table", "td", "th", "marquee", "object", "ol", "ul"}
}
// fosterParent adds a child node according to the foster parenting rules.
-// Section 11.2.5.3, "foster parenting".
+// Section 12.2.5.3, "foster parenting".
func (p *parser) fosterParent(n *Node) {
p.fosterParenting = false
var table, parent *Node
// addElement calls addChild with an element node.
func (p *parser) addElement(tag string, attr []Attribute) {
p.addChild(&Node{
- Type: ElementNode,
- Data: tag,
- Attr: attr,
+ Type: ElementNode,
+ Data: tag,
+ Namespace: p.top().Namespace,
+ Attr: attr,
})
}
-// Section 11.2.3.3.
+// Section 12.2.3.3.
func (p *parser) addFormattingElement(tag string, attr []Attribute) {
p.addElement(tag, attr)
p.afe = append(p.afe, p.top())
// TODO.
}
-// Section 11.2.3.3.
+// Section 12.2.3.3.
func (p *parser) clearActiveFormattingElements() {
for {
n := p.afe.pop()
}
}
-// Section 11.2.3.3.
+// Section 12.2.3.3.
func (p *parser) reconstructActiveFormattingElements() {
n := p.afe.top()
if n == nil {
return nil
}
-// Section 11.2.4.
+// Section 12.2.4.
func (p *parser) acknowledgeSelfClosingTag() {
p.hasSelfClosingToken = false
}
-// An insertion mode (section 11.2.3.1) is the state transition function from
+// An insertion mode (section 12.2.3.1) is the state transition function from
// a particular state in the HTML5 parser's state machine. It updates the
// parser's fields depending on parser.tok (where ErrorToken means EOF).
// It returns whether the token was consumed.
// setOriginalIM sets the insertion mode to return to after completing a text or
// inTableText insertion mode.
-// Section 11.2.3.1, "using the rules for".
+// Section 12.2.3.1, "using the rules for".
func (p *parser) setOriginalIM() {
if p.originalIM != nil {
panic("html: bad parser state: originalIM was set twice")
p.originalIM = p.im
}
-// Section 11.2.3.1, "reset the insertion mode".
+// Section 12.2.3.1, "reset the insertion mode".
func (p *parser) resetInsertionMode() {
for i := len(p.oe) - 1; i >= 0; i-- {
n := p.oe[i]
case "html":
p.im = beforeHeadIM
default:
- continue
+ if p.top().Namespace == "" {
+ continue
+ }
+ p.im = inForeignContentIM
}
return
}
const whitespace = " \t\r\n\f"
-// Section 11.2.5.4.1.
+// Section 12.2.5.4.1.
func initialIM(p *parser) bool {
switch p.tok.Type {
case TextToken:
return false
}
-// Section 11.2.5.4.2.
+// Section 12.2.5.4.2.
func beforeHTMLIM(p *parser) bool {
switch p.tok.Type {
case TextToken:
return false
}
-// Section 11.2.5.4.3.
+// Section 12.2.5.4.3.
func beforeHeadIM(p *parser) bool {
var (
add bool
return !implied
}
-// Section 11.2.5.4.4.
+// Section 12.2.5.4.4.
func inHeadIM(p *parser) bool {
var (
pop bool
return true
}
-// Section 11.2.5.4.6.
+// Section 12.2.5.4.6.
func afterHeadIM(p *parser) bool {
var (
add bool
}
}
-// Section 11.2.5.4.7.
+// Section 12.2.5.4.7.
func inBodyIM(p *parser) bool {
switch p.tok.Type {
case TextToken:
case "plaintext":
p.popUntil(buttonScopeStopTags, "p")
p.addElement(p.tok.Data, p.tok.Attr)
+ case "button":
+ p.popUntil(defaultScopeStopTags, "button")
+ p.reconstructActiveFormattingElements()
+ p.addElement(p.tok.Data, p.tok.Attr)
+ p.framesetOK = false
case "optgroup", "option":
if p.top().Data == "option" {
p.oe.pop()
p.reconstructActiveFormattingElements()
p.framesetOK = false
p.addElement(p.tok.Data, p.tok.Attr)
+ case "math", "svg":
+ p.reconstructActiveFormattingElements()
+ namespace := ""
+ if p.tok.Data == "math" {
+ // TODO: adjust MathML attributes.
+ namespace = "mathml"
+ } else {
+ // TODO: adjust SVG attributes.
+ namespace = "svg"
+ }
+ // TODO: adjust foreign attributes.
+ p.addElement(p.tok.Data, p.tok.Attr)
+ p.top().Namespace = namespace
+ p.im = inForeignContentIM
+ return true
case "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr":
// Ignore the token.
default:
}
}
-// Section 11.2.5.4.8.
+// Section 12.2.5.4.8.
func textIM(p *parser) bool {
switch p.tok.Type {
case ErrorToken:
return p.tok.Type == EndTagToken
}
-// Section 11.2.5.4.9.
+// Section 12.2.5.4.9.
func inTableIM(p *parser) bool {
switch p.tok.Type {
case ErrorToken:
}
}
-// Section 11.2.5.4.11.
+// Section 12.2.5.4.11.
func inCaptionIM(p *parser) bool {
switch p.tok.Type {
case StartTagToken:
return inBodyIM(p)
}
-// Section 11.2.5.4.12.
+// Section 12.2.5.4.12.
func inColumnGroupIM(p *parser) bool {
switch p.tok.Type {
case CommentToken:
case "colgroup":
if p.oe.top().Data != "html" {
p.oe.pop()
+ p.im = inTableIM
}
- p.im = inTableIM
return true
case "col":
// Ignore the token.
}
if p.oe.top().Data != "html" {
p.oe.pop()
+ p.im = inTableIM
+ return false
}
- p.im = inTableIM
- return false
+ return true
}
-// Section 11.2.5.4.13.
+// Section 12.2.5.4.13.
func inTableBodyIM(p *parser) bool {
var (
add bool
return inTableIM(p)
}
-// Section 11.2.5.4.14.
+// Section 12.2.5.4.14.
func inRowIM(p *parser) bool {
switch p.tok.Type {
case ErrorToken:
return inTableIM(p)
}
-// Section 11.2.5.4.15.
+// Section 12.2.5.4.15.
func inCellIM(p *parser) bool {
var (
closeTheCellAndReprocess bool
return inBodyIM(p)
}
-// Section 11.2.5.4.16.
+// Section 12.2.5.4.16.
func inSelectIM(p *parser) bool {
endSelect := false
switch p.tok.Type {
return true
}
-// Section 11.2.5.4.18.
+// Section 12.2.5.4.18.
func afterBodyIM(p *parser) bool {
switch p.tok.Type {
case ErrorToken:
return false
}
-// Section 11.2.5.4.19.
+// Section 12.2.5.4.19.
func inFramesetIM(p *parser) bool {
switch p.tok.Type {
case CommentToken:
Type: CommentNode,
Data: p.tok.Data,
})
+ case TextToken:
+ // Ignore all text but whitespace.
+ s := strings.Map(func(c rune) rune {
+ switch c {
+ case ' ', '\t', '\n', '\f', '\r':
+ return c
+ }
+ return -1
+ }, p.tok.Data)
+ if s != "" {
+ p.addText(s)
+ }
case StartTagToken:
switch p.tok.Data {
case "html":
return true
}
-// Section 11.2.5.4.20.
+// Section 12.2.5.4.20.
func afterFramesetIM(p *parser) bool {
switch p.tok.Type {
case CommentToken:
Type: CommentNode,
Data: p.tok.Data,
})
+ case TextToken:
+ // Ignore all text but whitespace.
+ s := strings.Map(func(c rune) rune {
+ switch c {
+ case ' ', '\t', '\n', '\f', '\r':
+ return c
+ }
+ return -1
+ }, p.tok.Data)
+ if s != "" {
+ p.addText(s)
+ }
case StartTagToken:
switch p.tok.Data {
case "html":
return true
}
-// Section 11.2.5.4.21.
+// Section 12.2.5.4.21.
func afterAfterBodyIM(p *parser) bool {
switch p.tok.Type {
case ErrorToken:
return false
}
-// Section 11.2.5.4.22.
+// Section 12.2.5.4.22.
func afterAfterFramesetIM(p *parser) bool {
switch p.tok.Type {
case CommentToken:
return true
}
+// Section 12.2.5.5.
+func inForeignContentIM(p *parser) bool {
+ switch p.tok.Type {
+ case CommentToken:
+ p.addChild(&Node{
+ Type: CommentNode,
+ Data: p.tok.Data,
+ })
+ case StartTagToken:
+ if breakout[p.tok.Data] {
+ // TODO.
+ }
+ switch p.top().Namespace {
+ case "mathml":
+ // TODO: adjust MathML attributes.
+ case "svg":
+ // TODO: adjust SVG tag names.
+ // TODO: adjust SVG attributes.
+ default:
+ panic("html: bad parser state: unexpected namespace")
+ }
+ // TODO: adjust foreign attributes.
+ p.addElement(p.tok.Data, p.tok.Attr)
+ case EndTagToken:
+ // TODO.
+ default:
+ // Ignore the token.
+ }
+ return true
+}
+
func (p *parser) parse() error {
// Iterate until EOF. Any other error will cause an early return.
consumed := true