1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
15 // Escape rewrites each action in the template to guarantee that the output is
17 func Escape(t *template.Template) (*template.Template, error) {
20 if _, err := EscapeSet(&s, t.Name()); err != nil {
23 // TODO: if s contains cloned dependencies due to self-recursion
24 // cross-context, error out.
28 // EscapeSet rewrites the template set to guarantee that the output of any of
29 // the named templates is properly escaped.
30 // Names should include the names of all templates that might be Executed but
31 // need not include helper templates.
32 // If no error is returned, then the named templates have been modified.
33 // Otherwise the named templates have been rendered unusable.
34 func EscapeSet(s *template.Set, names ...string) (*template.Set, error) {
36 // TODO: Maybe add a method to Set to enumerate template names
37 // and use those instead.
38 return nil, &Error{ErrNoNames, "", 0, "must specify names of top level templates"}
41 for _, name := range names {
42 c, _ := e.escapeTree(context{}, name, 0)
45 err, c.err.Name = c.err, name
46 } else if c.state != stateText {
47 err = &Error{ErrEndContext, name, 0, fmt.Sprintf("ends in a non-text context: %v", c)}
50 // Prevent execution of unsafe templates.
51 for _, name := range names {
52 if t := s.Template(name); t != nil {
63 // funcMap maps command names to functions that render their inputs safe.
64 var funcMap = template.FuncMap{
65 "exp_template_html_attrescaper": attrEscaper,
66 "exp_template_html_commentescaper": commentEscaper,
67 "exp_template_html_cssescaper": cssEscaper,
68 "exp_template_html_cssvaluefilter": cssValueFilter,
69 "exp_template_html_htmlnamefilter": htmlNameFilter,
70 "exp_template_html_htmlescaper": htmlEscaper,
71 "exp_template_html_jsregexpescaper": jsRegexpEscaper,
72 "exp_template_html_jsstrescaper": jsStrEscaper,
73 "exp_template_html_jsvalescaper": jsValEscaper,
74 "exp_template_html_nospaceescaper": htmlNospaceEscaper,
75 "exp_template_html_rcdataescaper": rcdataEscaper,
76 "exp_template_html_urlescaper": urlEscaper,
77 "exp_template_html_urlfilter": urlFilter,
78 "exp_template_html_urlnormalizer": urlNormalizer,
81 // equivEscapers matches contextual escapers to equivalent template builtins.
82 var equivEscapers = map[string]string{
83 "exp_template_html_attrescaper": "html",
84 "exp_template_html_htmlescaper": "html",
85 "exp_template_html_nospaceescaper": "html",
86 "exp_template_html_rcdataescaper": "html",
87 "exp_template_html_urlescaper": "urlquery",
88 "exp_template_html_urlnormalizer": "urlquery",
91 // escaper collects type inferences about templates and changes needed to make
92 // templates injection safe.
94 // set is the template set being escaped.
96 // output[templateName] is the output context for a templateName that
97 // has been mangled to include its input context.
98 output map[string]context
99 // derived[c.mangle(name)] maps to a template derived from the template
100 // named name templateName for the start context c.
101 derived map[string]*template.Template
102 // called[templateName] is a set of called mangled template names.
103 called map[string]bool
104 // xxxNodeEdits are the accumulated edits to apply during commit.
105 // Such edits are not applied immediately in case a template set
106 // executes a given template in different escaping contexts.
107 actionNodeEdits map[*parse.ActionNode][]string
108 templateNodeEdits map[*parse.TemplateNode]string
109 textNodeEdits map[*parse.TextNode][]byte
112 // newEscaper creates a blank escaper for the given set.
113 func newEscaper(s *template.Set) *escaper {
116 map[string]context{},
117 map[string]*template.Template{},
119 map[*parse.ActionNode][]string{},
120 map[*parse.TemplateNode]string{},
121 map[*parse.TextNode][]byte{},
125 // filterFailsafe is an innocuous word that is emitted in place of unsafe values
126 // by sanitizer functions. It is not a keyword in any programming language,
127 // contains no special characters, is not empty, and when it appears in output
128 // it is distinct enough that a developer can find the source of the problem
129 // via a search engine.
130 const filterFailsafe = "ZgotmplZ"
132 // escape escapes a template node.
133 func (e *escaper) escape(c context, n parse.Node) context {
134 switch n := n.(type) {
135 case *parse.ActionNode:
136 return e.escapeAction(c, n)
138 return e.escapeBranch(c, &n.BranchNode, "if")
139 case *parse.ListNode:
140 return e.escapeList(c, n)
141 case *parse.RangeNode:
142 return e.escapeBranch(c, &n.BranchNode, "range")
143 case *parse.TemplateNode:
144 return e.escapeTemplate(c, n)
145 case *parse.TextNode:
146 return e.escapeText(c, n)
147 case *parse.WithNode:
148 return e.escapeBranch(c, &n.BranchNode, "with")
150 panic("escaping " + n.String() + " is unimplemented")
153 // escapeAction escapes an action template node.
154 func (e *escaper) escapeAction(c context, n *parse.ActionNode) context {
155 if len(n.Pipe.Decl) != 0 {
156 // A local variable assignment, not an interpolation.
160 s := make([]string, 0, 3)
164 case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL:
167 s = append(s, "exp_template_html_urlfilter")
169 case urlPartPreQuery:
171 case stateCSSDqStr, stateCSSSqStr:
172 s = append(s, "exp_template_html_cssescaper")
174 s = append(s, "exp_template_html_urlnormalizer")
176 case urlPartQueryOrFrag:
177 s = append(s, "exp_template_html_urlescaper")
181 err: errorf(ErrAmbigContext, n.Line, "%s appears in an ambiguous URL context", n),
184 panic(c.urlPart.String())
187 s = append(s, "exp_template_html_jsvalescaper")
188 // A slash after a value starts a div operator.
190 case stateJSDqStr, stateJSSqStr:
191 s = append(s, "exp_template_html_jsstrescaper")
193 s = append(s, "exp_template_html_jsregexpescaper")
195 s = append(s, "exp_template_html_cssvaluefilter")
197 s = append(s, "exp_template_html_htmlescaper")
199 s = append(s, "exp_template_html_rcdataescaper")
201 // Handled below in delim check.
202 case stateAttrName, stateTag:
203 c.state = stateAttrName
204 s = append(s, "exp_template_html_htmlnamefilter")
206 if isComment(c.state) {
207 s = append(s, "exp_template_html_commentescaper")
209 panic("unexpected state " + c.state.String())
214 // No extra-escaping needed for raw text content.
215 case delimSpaceOrTagEnd:
216 s = append(s, "exp_template_html_nospaceescaper")
218 s = append(s, "exp_template_html_attrescaper")
220 e.editActionNode(n, s)
224 // ensurePipelineContains ensures that the pipeline has commands with
225 // the identifiers in s in order.
226 // If the pipeline already has some of the sanitizers, do not interfere.
227 // For example, if p is (.X | html) and s is ["escapeJSVal", "html"] then it
228 // has one matching, "html", and one to insert, "escapeJSVal", to produce
229 // (.X | escapeJSVal | html).
230 func ensurePipelineContains(p *parse.PipeNode, s []string) {
235 // Find the identifiers at the end of the command chain.
237 for i := n - 1; i >= 0; i-- {
238 if cmd := p.Cmds[i]; len(cmd.Args) != 0 {
239 if id, ok := cmd.Args[0].(*parse.IdentifierNode); ok {
240 if id.Ident == "noescape" {
246 idents = p.Cmds[i+1:]
249 for _, id := range idents {
250 if escFnsEq(s[dups], (id.Args[0].(*parse.IdentifierNode)).Ident) {
257 newCmds := make([]*parse.CommandNode, n-len(idents), n+len(s)-dups)
258 copy(newCmds, p.Cmds)
259 // Merge existing identifier commands with the sanitizers needed.
260 for _, id := range idents {
261 i := indexOfStr((id.Args[0].(*parse.IdentifierNode)).Ident, s, escFnsEq)
263 for _, name := range s[:i] {
264 newCmds = appendCmd(newCmds, newIdentCmd(name))
268 newCmds = appendCmd(newCmds, id)
270 // Create any remaining sanitizers.
271 for _, name := range s {
272 newCmds = appendCmd(newCmds, newIdentCmd(name))
277 // redundantFuncs[a][b] implies that funcMap[b](funcMap[a](x)) == funcMap[a](x)
279 var redundantFuncs = map[string]map[string]bool{
280 "exp_template_html_commentescaper": {
281 "exp_template_html_attrescaper": true,
282 "exp_template_html_nospaceescaper": true,
283 "exp_template_html_htmlescaper": true,
285 "exp_template_html_cssescaper": {
286 "exp_template_html_attrescaper": true,
288 "exp_template_html_jsregexpescaper": {
289 "exp_template_html_attrescaper": true,
291 "exp_template_html_jsstrescaper": {
292 "exp_template_html_attrescaper": true,
294 "exp_template_html_urlescaper": {
295 "exp_template_html_urlnormalizer": true,
299 // appendCmd appends the given command to the end of the command pipeline
300 // unless it is redundant with the last command.
301 func appendCmd(cmds []*parse.CommandNode, cmd *parse.CommandNode) []*parse.CommandNode {
302 if n := len(cmds); n != 0 {
303 last, ok := cmds[n-1].Args[0].(*parse.IdentifierNode)
304 next, _ := cmd.Args[0].(*parse.IdentifierNode)
305 if ok && redundantFuncs[last.Ident][next.Ident] {
309 return append(cmds, cmd)
312 // indexOfStr is the first i such that eq(s, strs[i]) or -1 if s was not found.
313 func indexOfStr(s string, strs []string, eq func(a, b string) bool) int {
314 for i, t := range strs {
322 // escFnsEq returns whether the two escaping functions are equivalent.
323 func escFnsEq(a, b string) bool {
324 if e := equivEscapers[a]; e != "" {
327 if e := equivEscapers[b]; e != "" {
333 // newIdentCmd produces a command containing a single identifier node.
334 func newIdentCmd(identifier string) *parse.CommandNode {
335 return &parse.CommandNode{
336 NodeType: parse.NodeCommand,
337 Args: []parse.Node{parse.NewIdentifier(identifier)},
341 // nudge returns the context that would result from following empty string
342 // transitions from the input context.
343 // For example, parsing:
345 // will end in context{stateBeforeValue, attrURL}, but parsing one extra rune:
347 // will end in context{stateURL, delimSpaceOrTagEnd, ...}.
348 // There are two transitions that happen when the 'x' is seen:
349 // (1) Transition from a before-value state to a start-of-value state without
350 // consuming any character.
351 // (2) Consume 'x' and transition past the first value character.
352 // In this case, nudging produces the context after (1) happens.
353 func nudge(c context) context {
356 // In `<foo {{.}}`, the action should emit an attribute.
357 c.state = stateAttrName
358 case stateBeforeValue:
359 // In `<foo bar={{.}}`, the action is an undelimited value.
360 c.state, c.delim, c.attr = attrStartStates[c.attr], delimSpaceOrTagEnd, attrNone
362 // In `<foo bar {{.}}`, the action is an attribute name.
363 c.state, c.attr = stateAttrName, attrNone
368 // join joins the two contexts of a branch template node. The result is an
369 // error context if either of the input contexts are error contexts, or if the
370 // the input contexts differ.
371 func join(a, b context, line int, nodeName string) context {
372 if a.state == stateError {
375 if b.state == stateError {
383 c.urlPart = b.urlPart
385 // The contexts differ only by urlPart.
386 c.urlPart = urlPartUnknown
393 // The contexts differ only by jsCtx.
394 c.jsCtx = jsCtxUnknown
398 // Allow a nudged context to join with an unnudged one.
400 // <p title={{if .C}}{{.}}{{end}}
401 // ends in an unquoted value state even though the else branch
402 // ends in stateBeforeValue.
403 if c, d := nudge(a), nudge(b); !(c.eq(a) && d.eq(b)) {
404 if e := join(c, d, line, nodeName); e.state != stateError {
411 err: errorf(ErrBranchEnd, line, "{{%s}} branches end in different contexts: %v, %v", nodeName, a, b),
415 // escapeBranch escapes a branch template node: "if", "range" and "with".
416 func (e *escaper) escapeBranch(c context, n *parse.BranchNode, nodeName string) context {
417 c0 := e.escapeList(c, n.List)
418 if nodeName == "range" && c0.state != stateError {
419 // The "true" branch of a "range" node can execute multiple times.
420 // We check that executing n.List once results in the same context
421 // as executing n.List twice.
422 c1, _ := e.escapeListConditionally(c0, n.List, nil)
423 c0 = join(c0, c1, n.Line, nodeName)
424 if c0.state == stateError {
425 // Make clear that this is a problem on loop re-entry
426 // since developers tend to overlook that branch when
427 // debugging templates.
429 c0.err.Description = "on range loop re-entry: " + c0.err.Description
433 c1 := e.escapeList(c, n.ElseList)
434 return join(c0, c1, n.Line, nodeName)
437 // escapeList escapes a list template node.
438 func (e *escaper) escapeList(c context, n *parse.ListNode) context {
442 for _, m := range n.Nodes {
448 // escapeListConditionally escapes a list node but only preserves edits and
449 // inferences in e if the inferences and output context satisfy filter.
450 // It returns the best guess at an output context, and the result of the filter
451 // which is the same as whether e was updated.
452 func (e *escaper) escapeListConditionally(c context, n *parse.ListNode, filter func(*escaper, context) bool) (context, bool) {
453 e1 := newEscaper(e.set)
454 // Make type inferences available to f.
455 for k, v := range e.output {
458 c = e1.escapeList(c, n)
459 ok := filter != nil && filter(e1, c)
461 // Copy inferences and edits from e1 back into e.
462 for k, v := range e1.output {
465 for k, v := range e1.derived {
468 for k, v := range e1.called {
471 for k, v := range e1.actionNodeEdits {
472 e.editActionNode(k, v)
474 for k, v := range e1.templateNodeEdits {
475 e.editTemplateNode(k, v)
477 for k, v := range e1.textNodeEdits {
484 // escapeTemplate escapes a {{template}} call node.
485 func (e *escaper) escapeTemplate(c context, n *parse.TemplateNode) context {
486 c, name := e.escapeTree(c, n.Name, n.Line)
488 e.editTemplateNode(n, name)
493 // escapeTree escapes the named template starting in the given context as
494 // necessary and returns its output context.
495 func (e *escaper) escapeTree(c context, name string, line int) (context, string) {
496 // Mangle the template name with the input context to produce a reliable
498 dname := c.mangle(name)
499 e.called[dname] = true
500 if out, ok := e.output[dname]; ok {
504 t := e.template(name)
508 err: errorf(ErrNoSuchTemplate, line, "no such template %s", name),
512 // Use any template derived during an earlier call to EscapeSet
513 // with different top level templates, or clone if necessary.
514 dt := e.template(dname)
516 dt = template.New(dname)
517 dt.Tree = &parse.Tree{Name: dname, Root: cloneList(t.Root)}
518 e.derived[dname] = dt
522 return e.computeOutCtx(c, t), dname
525 // computeOutCtx takes a template and its start context and computes the output
526 // context while storing any inferences in e.
527 func (e *escaper) computeOutCtx(c context, t *template.Template) context {
528 // Propagate context over the body.
529 c1, ok := e.escapeTemplateBody(c, t)
531 // Look for a fixed point by assuming c1 as the output context.
532 if c2, ok2 := e.escapeTemplateBody(c1, t); ok2 {
535 // Use c1 as the error context if neither assumption worked.
537 if !ok && c1.state != stateError {
540 // TODO: Find the first node with a line in t.Tree.Root
541 err: errorf(ErrOutputContext, 0, "cannot compute output context for template %s", t.Name()),
547 // escapeTemplateBody escapes the given template assuming the given output
548 // context, and returns the best guess at the output context and whether the
549 // assumption was correct.
550 func (e *escaper) escapeTemplateBody(c context, t *template.Template) (context, bool) {
551 filter := func(e1 *escaper, c1 context) bool {
552 if c1.state == stateError {
553 // Do not update the input escaper, e.
556 if !e1.called[t.Name()] {
557 // If t is not recursively called, then c1 is an
558 // accurate output context.
561 // c1 is accurate if it matches our assumed output context.
564 // We need to assume an output context so that recursive template calls
565 // take the fast path out of escapeTree instead of infinitely recursing.
566 // Naively assuming that the input context is the same as the output
567 // works >90% of the time.
568 e.output[t.Name()] = c
569 return e.escapeListConditionally(c, t.Tree.Root, filter)
572 // delimEnds maps each delim to a string of characters that terminate it.
573 var delimEnds = [...]string{
574 delimDoubleQuote: `"`,
575 delimSingleQuote: "'",
576 // Determined empirically by running the below in various browsers.
577 // var div = document.createElement("DIV");
578 // for (var i = 0; i < 0x10000; ++i) {
579 // div.innerHTML = "<span title=x" + String.fromCharCode(i) + "-bar>";
580 // if (div.getElementsByTagName("SPAN")[0].title.indexOf("bar") < 0)
581 // document.write("<p>U+" + i.toString(16));
583 delimSpaceOrTagEnd: " \t\n\f\r>",
586 var doctypeBytes = []byte("<!DOCTYPE")
588 // escapeText escapes a text template node.
589 func (e *escaper) escapeText(c context, n *parse.TextNode) context {
590 s, written, i, b := n.Text, 0, 0, new(bytes.Buffer)
592 c1, nread := contextAfterText(c, s[i:])
594 if c.state == stateText || c.state == stateRCDATA {
596 if c1.state != c.state {
597 for j := end - 1; j >= i; j-- {
604 for j := i; j < end; j++ {
605 if s[j] == '<' && !bytes.HasPrefix(s[j:], doctypeBytes) {
606 b.Write(s[written:j])
607 b.WriteString("<")
611 } else if isComment(c.state) && c.delim == delimNone {
613 case stateJSBlockCmt:
614 // http://es5.github.com/#x7.4:
615 // "Comments behave like white space and are
616 // discarded except that, if a MultiLineComment
617 // contains a line terminator character, then
618 // the entire comment is considered to be a
619 // LineTerminator for purposes of parsing by
620 // the syntactic grammar."
621 if bytes.IndexAny(s[written:i1], "\n\r\u2028\u2029") != -1 {
626 case stateCSSBlockCmt:
631 if c.state != c1.state && isComment(c1.state) && c1.delim == delimNone {
632 // Preserve the portion between written and the comment start.
634 if c1.state == stateHTMLCmt {
635 // "<!--" instead of "/*" or "//"
638 b.Write(s[written:cs])
641 if i == i1 && c.state == c1.state {
642 panic(fmt.Sprintf("infinite loop from %v to %v on %q..%q", c, c1, s[:i], s[i:]))
647 if written != 0 && c.state != stateError {
648 if !isComment(c.state) || c.delim != delimNone {
649 b.Write(n.Text[written:])
651 e.editTextNode(n, b.Bytes())
656 // contextAfterText starts in context c, consumes some tokens from the front of
657 // s, then returns the context after those tokens and the unprocessed suffix.
658 func contextAfterText(c context, s []byte) (context, int) {
659 if c.delim == delimNone {
660 c1, i := tSpecialTagEnd(c, s)
662 // A special end tag (`</script>`) has been seen and
663 // all content preceding it has been consumed.
666 // Consider all content up to any end tag.
667 return transitionFunc[c.state](c, s[:i])
670 i := bytes.IndexAny(s, delimEnds[c.delim])
674 if c.delim == delimSpaceOrTagEnd {
675 // http://www.w3.org/TR/html5/tokenization.html#attribute-value-unquoted-state
676 // lists the runes below as error characters.
677 // Error out because HTML parsers may differ on whether
678 // "<a id= onclick=f(" ends inside id's or onchange's value,
679 // "<a class=`foo " ends inside a value,
680 // "<a style=font:'Arial'" needs open-quote fixup.
681 // IE treats '`' as a quotation character.
682 if j := bytes.IndexAny(s[:i], "\"'<=`"); j >= 0 {
685 err: errorf(ErrBadHTML, 0, "%q in unquoted attr: %q", s[j:j+1], s[:i]),
690 // Remain inside the attribute.
691 // Decode the value so non-HTML rules can easily handle
692 // <button onclick="alert("Hi!")">
693 // without having to entity decode token boundaries.
694 for u := []byte(html.UnescapeString(string(s))); len(u) != 0; {
695 c1, i1 := transitionFunc[c.state](c, u)
700 if c.delim != delimSpaceOrTagEnd {
701 // Consume any quote.
704 // On exiting an attribute, we discard all state information
705 // except the state and element.
706 return context{state: stateTag, element: c.element}, i
709 // editActionNode records a change to an action pipeline for later commit.
710 func (e *escaper) editActionNode(n *parse.ActionNode, cmds []string) {
711 if _, ok := e.actionNodeEdits[n]; ok {
712 panic(fmt.Sprintf("node %s shared between templates", n))
714 e.actionNodeEdits[n] = cmds
717 // editTemplateNode records a change to a {{template}} callee for later commit.
718 func (e *escaper) editTemplateNode(n *parse.TemplateNode, callee string) {
719 if _, ok := e.templateNodeEdits[n]; ok {
720 panic(fmt.Sprintf("node %s shared between templates", n))
722 e.templateNodeEdits[n] = callee
725 // editTextNode records a change to a text node for later commit.
726 func (e *escaper) editTextNode(n *parse.TextNode, text []byte) {
727 if _, ok := e.textNodeEdits[n]; ok {
728 panic(fmt.Sprintf("node %s shared between templates", n))
730 e.textNodeEdits[n] = text
733 // commit applies changes to actions and template calls needed to contextually
734 // autoescape content and adds any derived templates to the set.
735 func (e *escaper) commit() {
736 for name, _ := range e.output {
737 e.template(name).Funcs(funcMap)
739 for _, t := range e.derived {
742 for n, s := range e.actionNodeEdits {
743 ensurePipelineContains(n.Pipe, s)
745 for n, name := range e.templateNodeEdits {
748 for n, s := range e.textNodeEdits {
753 // template returns the named template given a mangled template name.
754 func (e *escaper) template(name string) *template.Template {
755 t := e.set.Template(name)