1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
14 // endsWithCSSKeyword returns whether b ends with an ident that
15 // case-insensitively matches the lower-case kw.
16 func endsWithCSSKeyword(b []byte, kw string) bool {
23 r, _ := utf8.DecodeLastRune(b[:i])
29 // Many CSS keywords, such as "!important" can have characters encoded,
30 // but the URI production does not allow that according to
31 // http://www.w3.org/TR/css3-syntax/#TOK-URI
32 // This does not attempt to recognize encoded keywords. For example,
33 // given "\75\72\6c" and "url" this return false.
34 return string(bytes.ToLower(b[i:])) == kw
37 // isCSSNmchar returns whether rune is allowed anywhere in a CSS identifier.
38 func isCSSNmchar(rune int) bool {
39 // Based on the CSS3 nmchar production but ignores multi-rune escape
41 // http://www.w3.org/TR/css3-syntax/#SUBTOK-nmchar
42 return 'a' <= rune && rune <= 'z' ||
43 'A' <= rune && rune <= 'Z' ||
44 '0' <= rune && rune <= '9' ||
47 // Non-ASCII cases below.
48 0x80 <= rune && rune <= 0xd7ff ||
49 0xe000 <= rune && rune <= 0xfffd ||
50 0x10000 <= rune && rune <= 0x10ffff
53 // decodeCSS decodes CSS3 escapes given a sequence of stringchars.
54 // If there is no change, it returns the input, otherwise it returns a slice
55 // backed by a new array.
56 // http://www.w3.org/TR/css3-syntax/#SUBTOK-stringchar defines stringchar.
57 func decodeCSS(s []byte) []byte {
58 i := bytes.IndexByte(s, '\\')
62 // The UTF-8 sequence for a codepoint is never longer than 1 + the
63 // number hex digits need to represent that codepoint, so len(s) is an
64 // upper bound on the output length.
65 b := make([]byte, 0, len(s))
67 i := bytes.IndexByte(s, '\\')
71 b, s = append(b, s[:i]...), s[i:]
75 // http://www.w3.org/TR/css3-syntax/#SUBTOK-escape
76 // escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
78 // http://www.w3.org/TR/css3-syntax/#SUBTOK-unicode
79 // unicode ::= '\' [0-9a-fA-F]{1,6} wc?
81 for j < len(s) && j < 7 && isHex(s[j]) {
84 rune := hexDecode(s[1:j])
85 if rune > unicode.MaxRune {
86 rune, j = rune/16, j-1
88 n := utf8.EncodeRune(b[len(b):cap(b)], rune)
89 // The optional space at the end allows a hex
90 // sequence to be followed by a literal hex.
91 // string(decodeCSS([]byte(`\A B`))) == "\nB"
92 b, s = b[:len(b)+n], skipCSSSpace(s[j:])
94 // `\\` decodes to `\` and `\"` to `"`.
95 _, n := utf8.DecodeRune(s[1:])
96 b, s = append(b, s[1:1+n]...), s[1+n:]
102 // isHex returns whether the given character is a hex digit.
103 func isHex(c byte) bool {
104 return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
107 // hexDecode decodes a short hex digit sequence: "10" -> 16.
108 func hexDecode(s []byte) int {
110 for _, c := range s {
113 case '0' <= c && c <= '9':
115 case 'a' <= c && c <= 'f':
117 case 'A' <= c && c <= 'F':
120 panic(fmt.Sprintf("Bad hex digit in %q", s))
126 // skipCSSSpace returns a suffix of c, skipping over a single space.
127 func skipCSSSpace(c []byte) []byte {
131 // wc ::= #x9 | #xA | #xC | #xD | #x20
133 case '\t', '\n', '\f', ' ':
136 // This differs from CSS3's wc production because it contains a
137 // probable spec error whereby wc contains all the single byte
138 // sequences in nl (newline) but not CRLF.
139 if len(c) >= 2 && c[1] == '\n' {
147 // isCSSSpace returns whether b is a CSS space char as defined in wc.
148 func isCSSSpace(b byte) bool {
150 case '\t', '\n', '\f', '\r', ' ':
156 // cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes.
157 func cssEscaper(args ...interface{}) string {
158 s, _ := stringify(args...)
161 for i, r := range s {
174 // Encode HTML specials as hex so the output can be embedded
175 // in HTML attributes without further encoding.
207 b.WriteString(s[written:i])
209 written = i + utf8.RuneLen(r)
210 if repl != `\\` && (written == len(s) || isHex(s[written]) || isCSSSpace(s[written])) {
217 b.WriteString(s[written:])
221 var expressionBytes = []byte("expression")
222 var mozBindingBytes = []byte("mozbinding")
224 // cssValueFilter allows innocuous CSS values in the output including CSS
225 // quantities (10px or 25%), ID or class literals (#foo, .bar), keyword values
226 // (inherit, blue), and colors (#888).
227 // It filters out unsafe values, such as those that affect token boundaries,
228 // and anything that might execute scripts.
229 func cssValueFilter(args ...interface{}) string {
230 s, t := stringify(args...)
231 if t == contentTypeCSS {
234 b, id := decodeCSS([]byte(s)), make([]byte, 0, 64)
236 // CSS3 error handling is specified as honoring string boundaries per
237 // http://www.w3.org/TR/css3-syntax/#error-handling :
238 // Malformed declarations. User agents must handle unexpected
239 // tokens encountered while parsing a declaration by reading until
240 // the end of the declaration, while observing the rules for
241 // matching pairs of (), [], {}, "", and '', and correctly handling
242 // escapes. For example, a malformed declaration may be missing a
243 // property, colon (:) or value.
244 // So we need to make sure that values do not have mismatched bracket
245 // or quote characters to prevent the browser from restarting parsing
246 // inside a string that might embed JavaScript source.
247 for i, c := range b {
249 case 0, '"', '\'', '(', ')', '/', ';', '@', '[', '\\', ']', '`', '{', '}':
250 return filterFailsafe
252 // Disallow <!-- or -->.
253 // -- should not appear in valid identifiers.
254 if i != 0 && '-' == b[i-1] {
255 return filterFailsafe
258 if c < 0x80 && isCSSNmchar(int(c)) {
263 id = bytes.ToLower(id)
264 if bytes.Index(id, expressionBytes) != -1 || bytes.Index(id, mozBindingBytes) != -1 {
265 return filterFailsafe