1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // HTTP Request reading and parsing.
7 // The http package implements parsing of HTTP requests, replies,
8 // and URLs and provides an extensible HTTP server and a basic
27 maxLineLength = 4096 // assumed <= bufio.defaultBufSize
30 chunkSize = 4 << 10 // 4 KB chunks
33 // HTTP request parsing errors.
34 type ProtocolError struct {
39 ErrLineTooLong = &ProtocolError{"header line too long"}
40 ErrHeaderTooLong = &ProtocolError{"header too long"}
41 ErrShortBody = &ProtocolError{"entity body too short"}
42 ErrNotSupported = &ProtocolError{"feature not supported"}
43 ErrUnexpectedTrailer = &ProtocolError{"trailer header without chunked transfer encoding"}
44 ErrMissingContentLength = &ProtocolError{"missing ContentLength in HEAD response"}
45 ErrNotMultipart = &ProtocolError{"request Content-Type isn't multipart/form-data"}
46 ErrMissingBoundary = &ProtocolError{"no multipart boundary param Content-Type"}
49 type badStringError struct {
54 func (e *badStringError) String() string { return fmt.Sprintf("%s %q", e.what, e.str) }
56 var reqExcludeHeader = map[string]bool{
60 "Content-Length": true,
61 "Transfer-Encoding": true,
65 // A Request represents a parsed HTTP request header.
67 Method string // GET, POST, PUT, etc.
68 RawURL string // The raw URL given in the request.
69 URL *URL // Parsed URL.
70 Proto string // "HTTP/1.0"
74 // A header maps request lines to their values.
77 // accept-encoding: gzip, deflate
78 // Accept-Language: en-us
79 // Connection: keep-alive
83 // Header = map[string]string{
84 // "Accept-Encoding": "gzip, deflate",
85 // "Accept-Language": "en-us",
86 // "Connection": "keep-alive",
89 // HTTP defines that header names are case-insensitive.
90 // The request parser implements this by canonicalizing the
91 // name, making the first character and any characters
92 // following a hyphen uppercase and the rest lowercase.
93 Header map[string]string
98 // ContentLength records the length of the associated content.
99 // The value -1 indicates that the length is unknown.
100 // Values >= 0 indicate that the given number of bytes may be read from Body.
103 // TransferEncoding lists the transfer encodings from outermost to innermost.
104 // An empty list denotes the "identity" encoding.
105 TransferEncoding []string
107 // Whether to close the connection after replying to this request.
110 // The host on which the URL is sought.
111 // Per RFC 2616, this is either the value of the Host: header
112 // or the host name given in the URL itself.
115 // The referring URL, if sent in the request.
117 // Referer is misspelled as in the request itself,
118 // a mistake from the earliest days of HTTP.
119 // This value can also be fetched from the Header map
120 // as Header["Referer"]; the benefit of making it
121 // available as a structure field is that the compiler
122 // can diagnose programs that use the alternate
123 // (correct English) spelling req.Referrer but cannot
124 // diagnose programs that use Header["Referrer"].
127 // The User-Agent: header string, if sent in the request.
130 // The parsed form. Only available after ParseForm is called.
131 Form map[string][]string
133 // Trailer maps trailer keys to values. Like for Header, if the
134 // response has multiple trailer lines with the same key, they will be
135 // concatenated, delimited by commas.
136 Trailer map[string]string
139 // ProtoAtLeast returns whether the HTTP protocol used
140 // in the request is at least major.minor.
141 func (r *Request) ProtoAtLeast(major, minor int) bool {
142 return r.ProtoMajor > major ||
143 r.ProtoMajor == major && r.ProtoMinor >= minor
146 // MultipartReader returns a MIME multipart reader if this is a
147 // multipart/form-data POST request, else returns nil and an error.
148 func (r *Request) MultipartReader() (multipart.Reader, os.Error) {
149 v, ok := r.Header["Content-Type"]
151 return nil, ErrNotMultipart
153 d, params := mime.ParseMediaType(v)
154 if d != "multipart/form-data" {
155 return nil, ErrNotMultipart
157 boundary, ok := params["boundary"]
159 return nil, ErrMissingBoundary
161 return multipart.NewReader(r.Body, boundary), nil
164 // Return value if nonempty, def otherwise.
165 func valueOrDefault(value, def string) string {
172 const defaultUserAgent = "Go http package"
174 // Write writes an HTTP/1.1 request -- header and body -- in wire format.
175 // This method consults the following fields of req:
177 // RawURL, if non-empty, or else URL
178 // Method (defaults to "GET")
179 // UserAgent (defaults to defaultUserAgent)
184 // If Body is present, Write forces "Transfer-Encoding: chunked" as a header
185 // and then closes Body when finished sending it.
186 func (req *Request) Write(w io.Writer) os.Error {
194 uri = valueOrDefault(urlEscape(req.URL.Path, encodePath), "/")
195 if req.URL.RawQuery != "" {
196 uri += "?" + req.URL.RawQuery
200 fmt.Fprintf(w, "%s %s HTTP/1.1\r\n", valueOrDefault(req.Method, "GET"), uri)
203 fmt.Fprintf(w, "Host: %s\r\n", host)
204 fmt.Fprintf(w, "User-Agent: %s\r\n", valueOrDefault(req.UserAgent, defaultUserAgent))
205 if req.Referer != "" {
206 fmt.Fprintf(w, "Referer: %s\r\n", req.Referer)
209 // Process Body,ContentLength,Close,Trailer
210 tw, err := newTransferWriter(req)
214 err = tw.WriteHeader(w)
219 // TODO: split long values? (If so, should share code with Conn.Write)
220 // TODO: if Header includes values for Host, User-Agent, or Referer, this
221 // may conflict with the User-Agent or Referer headers we add manually.
222 // One solution would be to remove the Host, UserAgent, and Referer fields
223 // from Request, and introduce Request methods along the lines of
224 // Response.{GetHeader,AddHeader} and string constants for "Host",
225 // "User-Agent" and "Referer".
226 err = writeSortedKeyValue(w, req.Header, reqExcludeHeader)
231 io.WriteString(w, "\r\n")
233 // Write body and trailer
234 err = tw.WriteBody(w)
242 // Read a line of bytes (up to \n) from b.
243 // Give up if the line exceeds maxLineLength.
244 // The returned bytes are a pointer into storage in
245 // the bufio, so they are only valid until the next bufio read.
246 func readLineBytes(b *bufio.Reader) (p []byte, err os.Error) {
247 if p, err = b.ReadSlice('\n'); err != nil {
248 // We always know when EOF is coming.
249 // If the caller asked for a line, there should be a line.
251 err = io.ErrUnexpectedEOF
252 } else if err == bufio.ErrBufferFull {
257 if len(p) >= maxLineLength {
258 return nil, ErrLineTooLong
261 // Chop off trailing white space.
263 for i = len(p); i > 0; i-- {
264 if c := p[i-1]; c != ' ' && c != '\r' && c != '\t' && c != '\n' {
271 // readLineBytes, but convert the bytes into a string.
272 func readLine(b *bufio.Reader) (s string, err os.Error) {
273 p, e := readLineBytes(b)
277 return string(p), nil
280 var colon = []byte{':'}
282 // Read a key/value pair from b.
283 // A key/value has the form Key: Value\r\n
284 // and the Value can continue on multiple lines if each continuation line
285 // starts with a space.
286 func readKeyValue(b *bufio.Reader) (key, value string, err os.Error) {
287 line, e := readLineBytes(b)
295 // Scan first line for colon.
296 i := bytes.Index(line, colon)
301 key = string(line[0:i])
302 if strings.Contains(key, " ") {
303 // Key field has space - no good.
307 // Skip initial space before value.
308 for i++; i < len(line); i++ {
313 value = string(line[i:])
315 // Look for extension lines, which must begin with space.
325 // Eat leading space.
327 if c, e = b.ReadByte(); e != nil {
329 e = io.ErrUnexpectedEOF
336 // Read the rest of the line and add to value.
337 if line, e = readLineBytes(b); e != nil {
340 value += " " + string(line)
342 if len(value) >= maxValueLength {
343 return "", "", &badStringError{"value too long for key", key}
346 return key, value, nil
349 return "", "", &badStringError{"malformed header line", string(line)}
352 // Convert decimal at s[i:len(s)] to integer,
353 // returning value, string position where the digits stopped,
354 // and whether there was a valid number (digits, not too big).
355 func atoi(s string, i int) (n, i1 int, ok bool) {
357 if i >= len(s) || s[i] < '0' || s[i] > '9' {
361 for ; i < len(s) && '0' <= s[i] && s[i] <= '9'; i++ {
362 n = n*10 + int(s[i]-'0')
370 // Parse HTTP version: "HTTP/1.2" -> (1, 2, true).
371 func parseHTTPVersion(vers string) (int, int, bool) {
372 if len(vers) < 5 || vers[0:5] != "HTTP/" {
375 major, i, ok := atoi(vers, 5)
376 if !ok || i >= len(vers) || vers[i] != '.' {
380 minor, i, ok = atoi(vers, i+1)
381 if !ok || i != len(vers) {
384 return major, minor, true
387 // CanonicalHeaderKey returns the canonical format of the
388 // HTTP header key s. The canonicalization converts the first
389 // letter and any letter following a hyphen to upper case;
390 // the rest are converted to lowercase. For example, the
391 // canonical key for "accept-encoding" is "Accept-Encoding".
392 func CanonicalHeaderKey(s string) string {
393 // canonicalize: first letter upper case
394 // and upper case after each dash.
395 // (Host, User-Agent, If-Modified-Since).
396 // HTTP headers are ASCII only, so no Unicode issues.
399 for i := 0; i < len(s); i++ {
401 if upper && 'a' <= v && v <= 'z' {
407 if !upper && 'A' <= v && v <= 'Z' {
424 type chunkedReader struct {
426 n uint64 // unread bytes in chunk
430 func newChunkedReader(r *bufio.Reader) *chunkedReader {
431 return &chunkedReader{r: r}
434 func (cr *chunkedReader) beginChunk() {
437 line, cr.err = readLine(cr.r)
441 cr.n, cr.err = strconv.Btoui64(line, 16)
448 line, cr.err = readLine(cr.r)
460 func (cr *chunkedReader) Read(b []uint8) (n int, err os.Error) {
470 if uint64(len(b)) > cr.n {
473 n, cr.err = cr.r.Read(b)
475 if cr.n == 0 && cr.err == nil {
476 // end of chunk (CRLF)
478 if _, cr.err = io.ReadFull(cr.r, b); cr.err == nil {
479 if b[0] != '\r' || b[1] != '\n' {
480 cr.err = os.NewError("malformed chunked encoding")
487 // ReadRequest reads and parses a request from b.
488 func ReadRequest(b *bufio.Reader) (req *Request, err os.Error) {
491 // First line: GET /index.html HTTP/1.0
493 if s, err = readLine(b); err != nil {
498 if f = strings.Split(s, " ", 3); len(f) < 3 {
499 return nil, &badStringError{"malformed HTTP request", s}
501 req.Method, req.RawURL, req.Proto = f[0], f[1], f[2]
503 if req.ProtoMajor, req.ProtoMinor, ok = parseHTTPVersion(req.Proto); !ok {
504 return nil, &badStringError{"malformed HTTP version", req.Proto}
507 if req.URL, err = ParseRequestURL(req.RawURL); err != nil {
511 // Subsequent lines: Key: value.
513 req.Header = make(map[string]string)
515 var key, value string
516 if key, value, err = readKeyValue(b); err != nil {
522 if nheader++; nheader >= maxHeaderLines {
523 return nil, ErrHeaderTooLong
526 key = CanonicalHeaderKey(key)
528 // RFC 2616 says that if you send the same header key
529 // multiple times, it has to be semantically equivalent
530 // to concatenating the values separated by commas.
531 oldvalue, present := req.Header[key]
533 req.Header[key] = oldvalue + "," + value
535 req.Header[key] = value
539 // RFC2616: Must treat
540 // GET /index.html HTTP/1.1
541 // Host: www.google.com
543 // GET http://www.google.com/index.html HTTP/1.1
544 // Host: doesntmatter
545 // the same. In the second case, any Host line is ignored.
546 req.Host = req.URL.Host
548 req.Host = req.Header["Host"]
550 req.Header["Host"] = "", false
552 fixPragmaCacheControl(req.Header)
554 // Pull out useful fields as a convenience to clients.
555 req.Referer = req.Header["Referer"]
556 req.Header["Referer"] = "", false
558 req.UserAgent = req.Header["User-Agent"]
559 req.Header["User-Agent"] = "", false
561 // TODO: Parse specific header values:
575 // If-Unmodified-Since
577 // Proxy-Authorization
579 // TE (transfer-codings)
587 err = readTransfer(req, b)
595 // ParseQuery parses the URL-encoded query string and returns
596 // a map listing the values specified for each key.
597 // ParseQuery always returns a non-nil map containing all the
598 // valid query parameters found; err describes the first decoding error
599 // encountered, if any.
600 func ParseQuery(query string) (m map[string][]string, err os.Error) {
601 m = make(map[string][]string)
602 err = parseQuery(m, query)
606 func parseQuery(m map[string][]string, query string) (err os.Error) {
607 for _, kv := range strings.Split(query, "&", -1) {
611 kvPair := strings.Split(kv, "=", 2)
613 var key, value string
615 key, e = URLUnescape(kvPair[0])
616 if e == nil && len(kvPair) > 1 {
617 value, e = URLUnescape(kvPair[1])
623 vec := vector.StringVector(m[key])
630 // ParseForm parses the request body as a form for POST requests, or the raw query for GET requests.
632 func (r *Request) ParseForm() (err os.Error) {
637 r.Form = make(map[string][]string)
639 err = parseQuery(r.Form, r.URL.RawQuery)
641 if r.Method == "POST" {
643 return os.ErrorString("missing form body")
645 ct := r.Header["Content-Type"]
646 switch strings.Split(ct, ";", 2)[0] {
647 case "text/plain", "application/x-www-form-urlencoded", "":
648 b, e := ioutil.ReadAll(r.Body)
655 e = parseQuery(r.Form, string(b))
659 // TODO(dsymonds): Handle multipart/form-data
661 return &badStringError{"unknown Content-Type", ct}
667 // FormValue returns the first value for the named component of the query.
668 // FormValue calls ParseForm if necessary.
669 func (r *Request) FormValue(key string) string {
673 if vs := r.Form[key]; len(vs) > 0 {
679 func (r *Request) expectsContinue() bool {
680 expectation, ok := r.Header["Expect"]
681 return ok && strings.ToLower(expectation) == "100-continue"
684 func (r *Request) wantsHttp10KeepAlive() bool {
685 if r.ProtoMajor != 1 || r.ProtoMinor != 0 {
688 value, exists := r.Header["Connection"]
692 return strings.Contains(strings.ToLower(value), "keep-alive")