1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // HTTP Request reading and parsing.
7 // The http package implements parsing of HTTP requests, replies,
8 // and URLs and provides an extensible HTTP server and a basic
27 maxLineLength = 4096 // assumed <= bufio.defaultBufSize
30 chunkSize = 4 << 10 // 4 KB chunks
33 // HTTP request parsing errors.
34 type ProtocolError struct {
39 ErrLineTooLong = &ProtocolError{"header line too long"}
40 ErrHeaderTooLong = &ProtocolError{"header too long"}
41 ErrShortBody = &ProtocolError{"entity body too short"}
42 ErrNotSupported = &ProtocolError{"feature not supported"}
43 ErrUnexpectedTrailer = &ProtocolError{"trailer header without chunked transfer encoding"}
44 ErrMissingContentLength = &ProtocolError{"missing ContentLength in HEAD response"}
45 ErrNotMultipart = &ProtocolError{"request Content-Type isn't multipart/form-data"}
46 ErrMissingBoundary = &ProtocolError{"no multipart boundary param Content-Type"}
49 type badStringError struct {
54 func (e *badStringError) String() string { return fmt.Sprintf("%s %q", e.what, e.str) }
56 var reqExcludeHeader = map[string]bool{
60 "Content-Length": true,
61 "Transfer-Encoding": true,
65 // A Request represents a parsed HTTP request header.
67 Method string // GET, POST, PUT, etc.
68 RawURL string // The raw URL given in the request.
69 URL *URL // Parsed URL.
70 Proto string // "HTTP/1.0"
74 // A header maps request lines to their values.
77 // accept-encoding: gzip, deflate
78 // Accept-Language: en-us
79 // Connection: keep-alive
83 // Header = map[string]string{
84 // "Accept-Encoding": "gzip, deflate",
85 // "Accept-Language": "en-us",
86 // "Connection": "keep-alive",
89 // HTTP defines that header names are case-insensitive.
90 // The request parser implements this by canonicalizing the
91 // name, making the first character and any characters
92 // following a hyphen uppercase and the rest lowercase.
98 // ContentLength records the length of the associated content.
99 // The value -1 indicates that the length is unknown.
100 // Values >= 0 indicate that the given number of bytes may be read from Body.
103 // TransferEncoding lists the transfer encodings from outermost to innermost.
104 // An empty list denotes the "identity" encoding.
105 TransferEncoding []string
107 // Whether to close the connection after replying to this request.
110 // The host on which the URL is sought.
111 // Per RFC 2616, this is either the value of the Host: header
112 // or the host name given in the URL itself.
115 // The referring URL, if sent in the request.
117 // Referer is misspelled as in the request itself,
118 // a mistake from the earliest days of HTTP.
119 // This value can also be fetched from the Header map
120 // as Header["Referer"]; the benefit of making it
121 // available as a structure field is that the compiler
122 // can diagnose programs that use the alternate
123 // (correct English) spelling req.Referrer but cannot
124 // diagnose programs that use Header["Referrer"].
127 // The User-Agent: header string, if sent in the request.
130 // The parsed form. Only available after ParseForm is called.
131 Form map[string][]string
133 // Trailer maps trailer keys to values. Like for Header, if the
134 // response has multiple trailer lines with the same key, they will be
135 // concatenated, delimited by commas.
139 // ProtoAtLeast returns whether the HTTP protocol used
140 // in the request is at least major.minor.
141 func (r *Request) ProtoAtLeast(major, minor int) bool {
142 return r.ProtoMajor > major ||
143 r.ProtoMajor == major && r.ProtoMinor >= minor
146 // MultipartReader returns a MIME multipart reader if this is a
147 // multipart/form-data POST request, else returns nil and an error.
148 func (r *Request) MultipartReader() (multipart.Reader, os.Error) {
149 v := r.Header.Get("Content-Type")
151 return nil, ErrNotMultipart
153 d, params := mime.ParseMediaType(v)
154 if d != "multipart/form-data" {
155 return nil, ErrNotMultipart
157 boundary, ok := params["boundary"]
159 return nil, ErrMissingBoundary
161 return multipart.NewReader(r.Body, boundary), nil
164 // Return value if nonempty, def otherwise.
165 func valueOrDefault(value, def string) string {
172 const defaultUserAgent = "Go http package"
174 // Write writes an HTTP/1.1 request -- header and body -- in wire format.
175 // This method consults the following fields of req:
177 // RawURL, if non-empty, or else URL
178 // Method (defaults to "GET")
179 // UserAgent (defaults to defaultUserAgent)
184 // If Body is present, Write forces "Transfer-Encoding: chunked" as a header
185 // and then closes Body when finished sending it.
186 func (req *Request) Write(w io.Writer) os.Error {
187 return req.write(w, false)
190 // WriteProxy is like Write but writes the request in the form
191 // expected by an HTTP proxy. It includes the scheme and host
192 // name in the URI instead of using a separate Host: header line.
193 func (req *Request) WriteProxy(w io.Writer) os.Error {
194 return req.write(w, true)
197 func (req *Request) write(w io.Writer, usingProxy bool) os.Error {
205 uri = valueOrDefault(urlEscape(req.URL.Path, encodePath), "/")
206 if req.URL.RawQuery != "" {
207 uri += "?" + req.URL.RawQuery
212 if uri == "" || uri[0] != '/' {
215 uri = req.URL.Scheme + "://" + host + uri
218 fmt.Fprintf(w, "%s %s HTTP/1.1\r\n", valueOrDefault(req.Method, "GET"), uri)
222 fmt.Fprintf(w, "Host: %s\r\n", host)
224 fmt.Fprintf(w, "User-Agent: %s\r\n", valueOrDefault(req.UserAgent, defaultUserAgent))
225 if req.Referer != "" {
226 fmt.Fprintf(w, "Referer: %s\r\n", req.Referer)
229 // Process Body,ContentLength,Close,Trailer
230 tw, err := newTransferWriter(req)
234 err = tw.WriteHeader(w)
239 // TODO: split long values? (If so, should share code with Conn.Write)
240 // TODO: if Header includes values for Host, User-Agent, or Referer, this
241 // may conflict with the User-Agent or Referer headers we add manually.
242 // One solution would be to remove the Host, UserAgent, and Referer fields
243 // from Request, and introduce Request methods along the lines of
244 // Response.{GetHeader,AddHeader} and string constants for "Host",
245 // "User-Agent" and "Referer".
246 err = writeSortedKeyValue(w, req.Header, reqExcludeHeader)
251 io.WriteString(w, "\r\n")
253 // Write body and trailer
254 err = tw.WriteBody(w)
262 // Read a line of bytes (up to \n) from b.
263 // Give up if the line exceeds maxLineLength.
264 // The returned bytes are a pointer into storage in
265 // the bufio, so they are only valid until the next bufio read.
266 func readLineBytes(b *bufio.Reader) (p []byte, err os.Error) {
267 if p, err = b.ReadSlice('\n'); err != nil {
268 // We always know when EOF is coming.
269 // If the caller asked for a line, there should be a line.
271 err = io.ErrUnexpectedEOF
272 } else if err == bufio.ErrBufferFull {
277 if len(p) >= maxLineLength {
278 return nil, ErrLineTooLong
281 // Chop off trailing white space.
283 for i = len(p); i > 0; i-- {
284 if c := p[i-1]; c != ' ' && c != '\r' && c != '\t' && c != '\n' {
291 // readLineBytes, but convert the bytes into a string.
292 func readLine(b *bufio.Reader) (s string, err os.Error) {
293 p, e := readLineBytes(b)
297 return string(p), nil
300 // Convert decimal at s[i:len(s)] to integer,
301 // returning value, string position where the digits stopped,
302 // and whether there was a valid number (digits, not too big).
303 func atoi(s string, i int) (n, i1 int, ok bool) {
305 if i >= len(s) || s[i] < '0' || s[i] > '9' {
309 for ; i < len(s) && '0' <= s[i] && s[i] <= '9'; i++ {
310 n = n*10 + int(s[i]-'0')
318 // ParseHTTPVersion parses a HTTP version string.
319 // "HTTP/1.0" returns (1, 0, true).
320 func ParseHTTPVersion(vers string) (major, minor int, ok bool) {
321 if len(vers) < 5 || vers[0:5] != "HTTP/" {
324 major, i, ok := atoi(vers, 5)
325 if !ok || i >= len(vers) || vers[i] != '.' {
328 minor, i, ok = atoi(vers, i+1)
329 if !ok || i != len(vers) {
332 return major, minor, true
335 type chunkedReader struct {
337 n uint64 // unread bytes in chunk
341 func newChunkedReader(r *bufio.Reader) *chunkedReader {
342 return &chunkedReader{r: r}
345 func (cr *chunkedReader) beginChunk() {
348 line, cr.err = readLine(cr.r)
352 cr.n, cr.err = strconv.Btoui64(line, 16)
359 line, cr.err = readLine(cr.r)
371 func (cr *chunkedReader) Read(b []uint8) (n int, err os.Error) {
381 if uint64(len(b)) > cr.n {
384 n, cr.err = cr.r.Read(b)
386 if cr.n == 0 && cr.err == nil {
387 // end of chunk (CRLF)
389 if _, cr.err = io.ReadFull(cr.r, b); cr.err == nil {
390 if b[0] != '\r' || b[1] != '\n' {
391 cr.err = os.NewError("malformed chunked encoding")
398 // ReadRequest reads and parses a request from b.
399 func ReadRequest(b *bufio.Reader) (req *Request, err os.Error) {
401 tp := textproto.NewReader(b)
404 // First line: GET /index.html HTTP/1.0
406 if s, err = tp.ReadLine(); err != nil {
408 err = io.ErrUnexpectedEOF
414 if f = strings.Split(s, " ", 3); len(f) < 3 {
415 return nil, &badStringError{"malformed HTTP request", s}
417 req.Method, req.RawURL, req.Proto = f[0], f[1], f[2]
419 if req.ProtoMajor, req.ProtoMinor, ok = ParseHTTPVersion(req.Proto); !ok {
420 return nil, &badStringError{"malformed HTTP version", req.Proto}
423 if req.URL, err = ParseRequestURL(req.RawURL); err != nil {
427 // Subsequent lines: Key: value.
428 mimeHeader, err := tp.ReadMIMEHeader()
432 req.Header = Header(mimeHeader)
434 // RFC2616: Must treat
435 // GET /index.html HTTP/1.1
436 // Host: www.google.com
438 // GET http://www.google.com/index.html HTTP/1.1
439 // Host: doesntmatter
440 // the same. In the second case, any Host line is ignored.
441 req.Host = req.URL.Host
443 req.Host = req.Header.Get("Host")
445 req.Header.Del("Host")
447 fixPragmaCacheControl(req.Header)
449 // Pull out useful fields as a convenience to clients.
450 req.Referer = req.Header.Get("Referer")
451 req.Header.Del("Referer")
453 req.UserAgent = req.Header.Get("User-Agent")
454 req.Header.Del("User-Agent")
456 // TODO: Parse specific header values:
470 // If-Unmodified-Since
472 // Proxy-Authorization
474 // TE (transfer-codings)
482 err = readTransfer(req, b)
490 // ParseQuery parses the URL-encoded query string and returns
491 // a map listing the values specified for each key.
492 // ParseQuery always returns a non-nil map containing all the
493 // valid query parameters found; err describes the first decoding error
494 // encountered, if any.
495 func ParseQuery(query string) (m map[string][]string, err os.Error) {
496 m = make(map[string][]string)
497 err = parseQuery(m, query)
501 func parseQuery(m map[string][]string, query string) (err os.Error) {
502 for _, kv := range strings.Split(query, "&", -1) {
506 kvPair := strings.Split(kv, "=", 2)
508 var key, value string
510 key, e = URLUnescape(kvPair[0])
511 if e == nil && len(kvPair) > 1 {
512 value, e = URLUnescape(kvPair[1])
518 vec := vector.StringVector(m[key])
525 // ParseForm parses the request body as a form for POST requests, or the raw query for GET requests.
527 func (r *Request) ParseForm() (err os.Error) {
532 r.Form = make(map[string][]string)
534 err = parseQuery(r.Form, r.URL.RawQuery)
536 if r.Method == "POST" {
538 return os.ErrorString("missing form body")
540 ct := r.Header.Get("Content-Type")
541 switch strings.Split(ct, ";", 2)[0] {
542 case "text/plain", "application/x-www-form-urlencoded", "":
543 b, e := ioutil.ReadAll(r.Body)
550 e = parseQuery(r.Form, string(b))
554 // TODO(dsymonds): Handle multipart/form-data
556 return &badStringError{"unknown Content-Type", ct}
562 // FormValue returns the first value for the named component of the query.
563 // FormValue calls ParseForm if necessary.
564 func (r *Request) FormValue(key string) string {
568 if vs := r.Form[key]; len(vs) > 0 {
574 func (r *Request) expectsContinue() bool {
575 return strings.ToLower(r.Header.Get("Expect")) == "100-continue"
578 func (r *Request) wantsHttp10KeepAlive() bool {
579 if r.ProtoMajor != 1 || r.ProtoMinor != 0 {
582 return strings.Contains(strings.ToLower(r.Header.Get("Connection")), "keep-alive")