1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package URL parses URLs and implements query escaping.
15 // Error reports an error and the operation and URL that caused it.
22 func (e *Error) Error() string { return e.Op + " " + e.URL + ": " + e.Err.Error() }
24 func ishex(c byte) bool {
26 case '0' <= c && c <= '9':
28 case 'a' <= c && c <= 'f':
30 case 'A' <= c && c <= 'F':
36 func unhex(c byte) byte {
38 case '0' <= c && c <= '9':
40 case 'a' <= c && c <= 'f':
42 case 'A' <= c && c <= 'F':
51 encodePath encoding = 1 + iota
58 type EscapeError string
60 func (e EscapeError) Error() string {
61 return "invalid URL escape " + strconv.Quote(string(e))
64 // Return true if the specified character should be escaped when
65 // appearing in a URL string, according to RFC 2396.
66 // When 'all' is true the full range of reserved characters are matched.
67 func shouldEscape(c byte, mode encoding) bool {
68 // RFC 2396 §2.3 Unreserved characters (alphanum)
69 if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
73 case '-', '_', '.', '!', '~', '*', '\'', '(', ')': // §2.3 Unreserved characters (mark)
76 case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
77 // Different sections of the URL allow a few of
78 // the reserved characters to appear unescaped.
80 case encodePath: // §3.3
81 // The RFC allows : @ & = + $ , but saves / ; for assigning
82 // meaning to individual path segments. This package
83 // only manipulates the path as a whole, so we allow those
84 // last two as well. Clients that need to distinguish between
85 // `/foo;y=z/bar` and `/foo%3by=z/bar` will have to re-decode RawPath.
86 // That leaves only ? to escape.
89 case encodeUserPassword: // §3.2.2
90 // The RFC allows ; : & = + $ , in userinfo, so we must escape only @ and /.
91 // The parsing of userinfo treats : as special so we must escape that too.
92 return c == '@' || c == '/' || c == ':'
94 case encodeQueryComponent: // §3.4
95 // The RFC reserves (so we must escape) everything.
98 case encodeFragment: // §4.1
99 // The RFC text is silent but the grammar allows
100 // everything, so escape nothing.
103 case encodeOpaque: // §3 opaque_part
104 // The RFC allows opaque_part to use all characters
105 // except that the leading / must be escaped.
106 // (We implement that case in String.)
111 // Everything else must be escaped.
115 // QueryUnescape does the inverse transformation of QueryEscape, converting
116 // %AB into the byte 0xAB and '+' into ' ' (space). It returns an error if
117 // any % is not followed by two hexadecimal digits.
118 func QueryUnescape(s string) (string, error) {
119 return unescape(s, encodeQueryComponent)
122 // unescape unescapes a string; the mode specifies
123 // which section of the URL string is being unescaped.
124 func unescape(s string, mode encoding) (string, error) {
125 // Count %, check that they're well-formed.
128 for i := 0; i < len(s); {
132 if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
137 return "", EscapeError(s)
141 hasPlus = mode == encodeQueryComponent
148 if n == 0 && !hasPlus {
152 t := make([]byte, len(s)-2*n)
154 for i := 0; i < len(s); {
157 t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
161 if mode == encodeQueryComponent {
174 return string(t), nil
177 // QueryEscape escapes the string so it can be safely placed
178 // inside a URL query.
179 func QueryEscape(s string) string {
180 return escape(s, encodeQueryComponent)
183 func escape(s string, mode encoding) string {
184 spaceCount, hexCount := 0, 0
185 for i := 0; i < len(s); i++ {
187 if shouldEscape(c, mode) {
188 if c == ' ' && mode == encodeQueryComponent {
196 if spaceCount == 0 && hexCount == 0 {
200 t := make([]byte, len(s)+2*hexCount)
202 for i := 0; i < len(s); i++ {
204 case c == ' ' && mode == encodeQueryComponent:
207 case shouldEscape(c, mode):
209 t[j+1] = "0123456789ABCDEF"[c>>4]
210 t[j+2] = "0123456789ABCDEF"[c&15]
220 // UnescapeUserinfo parses the RawUserinfo field of a URL
221 // as the form user or user:password and unescapes and returns
224 // This functionality should only be used with legacy web sites.
225 // RFC 2396 warns that interpreting Userinfo this way
226 // ``is NOT RECOMMENDED, because the passing of authentication
227 // information in clear text (such as URI) has proven to be a
228 // security risk in almost every case where it has been used.''
229 func UnescapeUserinfo(rawUserinfo string) (user, password string, err error) {
230 u, p := split(rawUserinfo, ':', true)
231 if user, err = unescape(u, encodeUserPassword); err != nil {
234 if password, err = unescape(p, encodeUserPassword); err != nil {
240 // EscapeUserinfo combines user and password in the form
241 // user:password (or just user if password is empty) and then
242 // escapes it for use as the URL.RawUserinfo field.
244 // This functionality should only be used with legacy web sites.
245 // RFC 2396 warns that interpreting Userinfo this way
246 // ``is NOT RECOMMENDED, because the passing of authentication
247 // information in clear text (such as URI) has proven to be a
248 // security risk in almost every case where it has been used.''
249 func EscapeUserinfo(user, password string) string {
250 raw := escape(user, encodeUserPassword)
252 raw += ":" + escape(password, encodeUserPassword)
257 // A URL represents a parsed URL (technically, a URI reference).
258 // The general form represented is:
259 // scheme://[userinfo@]host/path[?query][#fragment]
260 // The Raw, RawAuthority, RawPath, and RawQuery fields are in "wire format"
261 // (special characters must be hex-escaped if not meant to have special meaning).
262 // All other fields are logical values; '+' or '%' represent themselves.
264 // The various Raw values are supplied in wire format because
265 // clients typically have to split them into pieces before further
268 Raw string // the original string
269 Scheme string // scheme
270 RawAuthority string // [userinfo@]host
271 RawUserinfo string // userinfo
273 RawPath string // /path[?query][#fragment]
275 OpaquePath bool // path is opaque (unrooted when scheme is present)
276 RawQuery string // query
277 Fragment string // fragment
280 // Maybe rawurl is of the form scheme:path.
281 // (Scheme must be [a-zA-Z][a-zA-Z0-9+-.]*)
282 // If so, return scheme, path; else return "", rawurl.
283 func getscheme(rawurl string) (scheme, path string, err error) {
284 for i := 0; i < len(rawurl); i++ {
287 case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
289 case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.':
291 return "", rawurl, nil
295 return "", "", errors.New("missing protocol scheme")
297 return rawurl[0:i], rawurl[i+1:], nil
299 // we have encountered an invalid character,
300 // so there is no valid scheme
301 return "", rawurl, nil
304 return "", rawurl, nil
307 // Maybe s is of the form t c u.
308 // If so, return t, c u (or t, u if cutc == true).
309 // If not, return s, "".
310 func split(s string, c byte, cutc bool) (string, string) {
311 for i := 0; i < len(s); i++ {
314 return s[0:i], s[i+1:]
322 // Parse parses rawurl into a URL structure.
323 // The string rawurl is assumed not to have a #fragment suffix.
324 // (Web browsers strip #fragment before sending the URL to a web server.)
325 // The rawurl may be relative or absolute.
326 func Parse(rawurl string) (url *URL, err error) {
327 return parse(rawurl, false)
330 // ParseRequest parses rawurl into a URL structure. It assumes that
331 // rawurl was received from an HTTP request, so the rawurl is interpreted
332 // only as an absolute URI or an absolute path.
333 // The string rawurl is assumed not to have a #fragment suffix.
334 // (Web browsers strip #fragment before sending the URL to a web server.)
335 func ParseRequest(rawurl string) (url *URL, err error) {
336 return parse(rawurl, true)
339 // parse parses a URL from a string in one of two contexts. If
340 // viaRequest is true, the URL is assumed to have arrived via an HTTP request,
341 // in which case only absolute URLs or path-absolute relative URLs are allowed.
342 // If viaRequest is false, all forms of relative URLs are allowed.
343 func parse(rawurl string, viaRequest bool) (url *URL, err error) {
350 err = errors.New("empty url")
356 // Split off possible leading "http:", "mailto:", etc.
357 // Cannot contain escaped characters.
358 if url.Scheme, path, err = getscheme(rawurl); err != nil {
361 leadingSlash = strings.HasPrefix(path, "/")
363 if url.Scheme != "" && !leadingSlash {
365 // Absolute URI (has scheme) with non-rooted path
366 // is uninterpreted. It doesn't even have a ?query.
367 // This is the case that handles mailto:name@example.com.
370 if url.Path, err = unescape(path, encodeOpaque); err != nil {
373 url.OpaquePath = true
375 if viaRequest && !leadingSlash {
376 err = errors.New("invalid URI for request")
380 // Split off query before parsing path further.
382 path, query := split(path, '?', false)
384 url.RawQuery = query[1:]
387 // Maybe path is //authority/path
388 if (url.Scheme != "" || !viaRequest) &&
389 strings.HasPrefix(path, "//") && !strings.HasPrefix(path, "///") {
390 url.RawAuthority, path = split(path[2:], '/', false)
391 url.RawPath = url.RawPath[2+len(url.RawAuthority):]
394 // Split authority into userinfo@host.
395 // If there's no @, split's default is wrong. Check explicitly.
397 if strings.Index(url.RawAuthority, "@") < 0 {
398 rawHost = url.RawAuthority
400 url.RawUserinfo, rawHost = split(url.RawAuthority, '@', true)
403 // We leave RawAuthority only in raw form because clients
404 // of common protocols should be using Userinfo and Host
405 // instead. Clients that wish to use RawAuthority will have to
406 // interpret it themselves: RFC 2396 does not define the meaning.
408 if strings.Contains(rawHost, "%") {
409 // Host cannot contain escaped characters.
410 err = errors.New("hexadecimal escape in host")
415 if url.Path, err = unescape(path, encodePath); err != nil {
422 return nil, &Error{"parse", rawurl, err}
426 // ParseWithReference is like Parse but allows a trailing #fragment.
427 func ParseWithReference(rawurlref string) (url *URL, err error) {
429 rawurl, frag := split(rawurlref, '#', false)
430 if url, err = Parse(rawurl); err != nil {
437 if url.Fragment, err = unescape(frag, encodeFragment); err != nil {
438 return nil, &Error{"parse", rawurl, err}
444 // String reassembles url into a valid URL string.
446 // There are redundant fields stored in the URL structure:
447 // the String method consults Scheme, Path, Host, RawUserinfo,
448 // RawQuery, and Fragment, but not Raw, RawPath or RawAuthority.
449 func (url *URL) String() string {
451 if url.Scheme != "" {
452 result += url.Scheme + ":"
454 if url.Host != "" || url.RawUserinfo != "" {
456 if url.RawUserinfo != "" {
457 // hide the password, if any
458 info := url.RawUserinfo
459 if i := strings.Index(info, ":"); i >= 0 {
460 info = info[0:i] + ":******"
468 if strings.HasPrefix(path, "/") {
472 result += escape(path, encodeOpaque)
474 result += escape(url.Path, encodePath)
476 if url.RawQuery != "" {
477 result += "?" + url.RawQuery
479 if url.Fragment != "" {
480 result += "#" + escape(url.Fragment, encodeFragment)
485 // Values maps a string key to a list of values.
486 // It is typically used for query parameters and form values.
487 // Unlike in the http.Header map, the keys in a Values map
488 // are case-sensitive.
489 type Values map[string][]string
491 // Get gets the first value associated with the given key.
492 // If there are no values associated with the key, Get returns
493 // the empty string. To access multiple values, use the map
495 func (v Values) Get(key string) string {
500 if !ok || len(vs) == 0 {
506 // Set sets the key to value. It replaces any existing
508 func (v Values) Set(key, value string) {
509 v[key] = []string{value}
512 // Add adds the key to value. It appends to any existing
513 // values associated with key.
514 func (v Values) Add(key, value string) {
515 v[key] = append(v[key], value)
518 // Del deletes the values associated with key.
519 func (v Values) Del(key string) {
523 // ParseQuery parses the URL-encoded query string and returns
524 // a map listing the values specified for each key.
525 // ParseQuery always returns a non-nil map containing all the
526 // valid query parameters found; err describes the first decoding error
527 // encountered, if any.
528 func ParseQuery(query string) (m Values, err error) {
530 err = parseQuery(m, query)
534 func parseQuery(m Values, query string) (err error) {
537 if i := strings.IndexAny(key, "&;"); i >= 0 {
538 key, query = key[:i], key[i+1:]
546 if i := strings.Index(key, "="); i >= 0 {
547 key, value = key[:i], key[i+1:]
549 key, err1 := QueryUnescape(key)
554 value, err1 = QueryUnescape(value)
559 m[key] = append(m[key], value)
564 // Encode encodes the values into ``URL encoded'' form.
565 // e.g. "foo=bar&bar=baz"
566 func (v Values) Encode() string {
570 parts := make([]string, 0, len(v)) // will be large enough for most uses
571 for k, vs := range v {
572 prefix := QueryEscape(k) + "="
573 for _, v := range vs {
574 parts = append(parts, prefix+QueryEscape(v))
577 return strings.Join(parts, "&")
580 // resolvePath applies special path segments from refs and applies
581 // them to base, per RFC 2396.
582 func resolvePath(basepath string, refpath string) string {
583 base := strings.Split(basepath, "/")
584 refs := strings.Split(refpath, "/")
588 for idx, ref := range refs {
591 base[len(base)-1] = ""
593 newLen := len(base) - 1
597 base = base[0:newLen]
598 base[len(base)-1] = ""
600 if idx == 0 || base[len(base)-1] == "" {
601 base[len(base)-1] = ref
603 base = append(base, ref)
607 return strings.Join(base, "/")
610 // IsAbs returns true if the URL is absolute.
611 func (url *URL) IsAbs() bool {
612 return url.Scheme != ""
615 // Parse parses a URL in the context of a base URL. The URL in ref
616 // may be relative or absolute. Parse returns nil, err on parse
617 // failure, otherwise its return value is the same as ResolveReference.
618 func (base *URL) Parse(ref string) (*URL, error) {
619 refurl, err := Parse(ref)
623 return base.ResolveReference(refurl), nil
626 // ResolveReference resolves a URI reference to an absolute URI from
627 // an absolute base URI, per RFC 2396 Section 5.2. The URI reference
628 // may be relative or absolute. ResolveReference always returns a new
629 // URL instance, even if the returned URL is identical to either the
630 // base or reference. If ref is an absolute URL, then ResolveReference
631 // ignores base and returns a copy of ref.
632 func (base *URL) ResolveReference(ref *URL) *URL {
638 // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
640 if ref.RawAuthority != "" {
641 // The "net_path" case.
642 url.RawAuthority = ref.RawAuthority
644 url.RawUserinfo = ref.RawUserinfo
649 url.RawPath = ref.RawPath
650 url.RawQuery = ref.RawQuery
651 case strings.HasPrefix(ref.Path, "/"):
652 // The "abs_path" case.
654 url.RawPath = ref.RawPath
655 url.RawQuery = ref.RawQuery
657 // The "rel_path" case.
658 path := resolvePath(base.Path, ref.Path)
659 if !strings.HasPrefix(path, "/") {
663 url.RawPath = url.Path
664 url.RawQuery = ref.RawQuery
665 if ref.RawQuery != "" {
666 url.RawPath += "?" + url.RawQuery
670 url.Fragment = ref.Fragment
672 url.Raw = url.String()
676 // Query parses RawQuery and returns the corresponding values.
677 func (u *URL) Query() Values {
678 v, _ := ParseQuery(u.RawQuery)
682 // EncodedPath returns the URL's path in "URL path encoded" form.
683 func (u *URL) EncodedPath() string {
684 return escape(u.Path, encodePath)