libgo/go/mime/multipart/multipart.go

   1 // Copyright 2010 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4 //
   5
   6 /*
   7 Package multipart implements MIME multipart parsing, as defined in RFC
   8 2046.
   9
  10 The implementation is sufficient for HTTP (RFC 2388) and the multipart
  11 bodies generated by popular browsers.
  12 */
  13 package multipart
  14
  15 import (
  16         "bufio"
  17         "bytes"
  18         "fmt"
  19         "io"
  20         "io/ioutil"
  21         "mime"
  22         "net/textproto"
  23 )
  24
  25 var emptyParams = make(map[string]string)
  26
  27 // A Part represents a single part in a multipart body.
  28 type Part struct {
  29         // The headers of the body, if any, with the keys canonicalized
  30         // in the same fashion that the Go http.Request headers are.
  31         // i.e. "foo-bar" changes case to "Foo-Bar"
  32         Header textproto.MIMEHeader
  33
  34         buffer    *bytes.Buffer
  35         mr        *Reader
  36         bytesRead int
  37
  38         disposition       string
  39         dispositionParams map[string]string
  40 }
  41
  42 // FormName returns the name parameter if p has a Content-Disposition
  43 // of type "form-data".  Otherwise it returns the empty string.
  44 func (p *Part) FormName() string {
  45         // See http://tools.ietf.org/html/rfc2183 section 2 for EBNF
  46         // of Content-Disposition value format.
  47         if p.dispositionParams == nil {
  48                 p.parseContentDisposition()
  49         }
  50         if p.disposition != "form-data" {
  51                 return ""
  52         }
  53         return p.dispositionParams["name"]
  54 }
  55
  56 // FileName returns the filename parameter of the Part's
  57 // Content-Disposition header.
  58 func (p *Part) FileName() string {
  59         if p.dispositionParams == nil {
  60                 p.parseContentDisposition()
  61         }
  62         return p.dispositionParams["filename"]
  63 }
  64
  65 func (p *Part) parseContentDisposition() {
  66         v := p.Header.Get("Content-Disposition")
  67         var err error
  68         p.disposition, p.dispositionParams, err = mime.ParseMediaType(v)
  69         if err != nil {
  70                 p.dispositionParams = emptyParams
  71         }
  72 }
  73
  74 // NewReader creates a new multipart Reader reading from reader using the
  75 // given MIME boundary.
  76 func NewReader(reader io.Reader, boundary string) *Reader {
  77         b := []byte("\r\n--" + boundary + "--")
  78         return &Reader{
  79                 bufReader: bufio.NewReader(reader),
  80
  81                 nl:               b[:2],
  82                 nlDashBoundary:   b[:len(b)-2],
  83                 dashBoundaryDash: b[2:],
  84                 dashBoundary:     b[2 : len(b)-2],
  85         }
  86 }
  87
  88 func newPart(mr *Reader) (*Part, error) {
  89         bp := &Part{
  90                 Header: make(map[string][]string),
  91                 mr:     mr,
  92                 buffer: new(bytes.Buffer),
  93         }
  94         if err := bp.populateHeaders(); err != nil {
  95                 return nil, err
  96         }
  97         return bp, nil
  98 }
  99
 100 func (bp *Part) populateHeaders() error {
 101         r := textproto.NewReader(bp.mr.bufReader)
 102         header, err := r.ReadMIMEHeader()
 103         if err == nil {
 104                 bp.Header = header
 105         }
 106         return err
 107 }
 108
 109 // Read reads the body of a part, after its headers and before the
 110 // next part (if any) begins.
 111 func (p *Part) Read(d []byte) (n int, err error) {
 112         defer func() {
 113                 p.bytesRead += n
 114         }()
 115         if p.buffer.Len() >= len(d) {
 116                 // Internal buffer of unconsumed data is large enough for
 117                 // the read request.  No need to parse more at the moment.
 118                 return p.buffer.Read(d)
 119         }
 120         peek, err := p.mr.bufReader.Peek(4096) // TODO(bradfitz): add buffer size accessor
 121
 122         // Look for an immediate empty part without a leading \r\n
 123         // before the boundary separator.  Some MIME code makes empty
 124         // parts like this. Most browsers, however, write the \r\n
 125         // before the subsequent boundary even for empty parts and
 126         // won't hit this path.
 127         if p.bytesRead == 0 && p.mr.peekBufferIsEmptyPart(peek) {
 128                 return 0, io.EOF
 129         }
 130         unexpectedEOF := err == io.EOF
 131         if err != nil && !unexpectedEOF {
 132                 return 0, fmt.Errorf("multipart: Part Read: %v", err)
 133         }
 134         if peek == nil {
 135                 panic("nil peek buf")
 136         }
 137
 138         // Search the peek buffer for "\r\n--boundary". If found,
 139         // consume everything up to the boundary. If not, consume only
 140         // as much of the peek buffer as cannot hold the boundary
 141         // string.
 142         nCopy := 0
 143         foundBoundary := false
 144         if idx := bytes.Index(peek, p.mr.nlDashBoundary); idx != -1 {
 145                 nCopy = idx
 146                 foundBoundary = true
 147         } else if safeCount := len(peek) - len(p.mr.nlDashBoundary); safeCount > 0 {
 148                 nCopy = safeCount
 149         } else if unexpectedEOF {
 150                 // If we've run out of peek buffer and the boundary
 151                 // wasn't found (and can't possibly fit), we must have
 152                 // hit the end of the file unexpectedly.
 153                 return 0, io.ErrUnexpectedEOF
 154         }
 155         if nCopy > 0 {
 156                 if _, err := io.CopyN(p.buffer, p.mr.bufReader, int64(nCopy)); err != nil {
 157                         return 0, err
 158                 }
 159         }
 160         n, err = p.buffer.Read(d)
 161         if err == io.EOF && !foundBoundary {
 162                 // If the boundary hasn't been reached there's more to
 163                 // read, so don't pass through an EOF from the buffer
 164                 err = nil
 165         }
 166         return
 167 }
 168
 169 func (p *Part) Close() error {
 170         io.Copy(ioutil.Discard, p)
 171         return nil
 172 }
 173
 174 // Reader is an iterator over parts in a MIME multipart body.
 175 // Reader's underlying parser consumes its input as needed.  Seeking
 176 // isn't supported.
 177 type Reader struct {
 178         bufReader *bufio.Reader
 179
 180         currentPart *Part
 181         partsRead   int
 182
 183         nl               []byte // "\r\n" or "\n" (set after seeing first boundary line)
 184         nlDashBoundary   []byte // nl + "--boundary"
 185         dashBoundaryDash []byte // "--boundary--"
 186         dashBoundary     []byte // "--boundary"
 187 }
 188
 189 // NextPart returns the next part in the multipart or an error.
 190 // When there are no more parts, the error io.EOF is returned.
 191 func (r *Reader) NextPart() (*Part, error) {
 192         if r.currentPart != nil {
 193                 r.currentPart.Close()
 194         }
 195
 196         expectNewPart := false
 197         for {
 198                 line, err := r.bufReader.ReadSlice('\n')
 199                 if err == io.EOF && r.isFinalBoundary(line) {
 200                         // If the buffer ends in "--boundary--" without the
 201                         // trailing "\r\n", ReadSlice will return an error
 202                         // (since it's missing the '\n'), but this is a valid
 203                         // multipart EOF so we need to return io.EOF instead of
 204                         // a fmt-wrapped one.
 205                         return nil, io.EOF
 206                 }
 207                 if err != nil {
 208                         return nil, fmt.Errorf("multipart: NextPart: %v", err)
 209                 }
 210
 211                 if r.isBoundaryDelimiterLine(line) {
 212                         r.partsRead++
 213                         bp, err := newPart(r)
 214                         if err != nil {
 215                                 return nil, err
 216                         }
 217                         r.currentPart = bp
 218                         return bp, nil
 219                 }
 220
 221                 if r.isFinalBoundary(line) {
 222                         // Expected EOF
 223                         return nil, io.EOF
 224                 }
 225
 226                 if expectNewPart {
 227                         return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line))
 228                 }
 229
 230                 if r.partsRead == 0 {
 231                         // skip line
 232                         continue
 233                 }
 234
 235                 // Consume the "\n" or "\r\n" separator between the
 236                 // body of the previous part and the boundary line we
 237                 // now expect will follow. (either a new part or the
 238                 // end boundary)
 239                 if bytes.Equal(line, r.nl) {
 240                         expectNewPart = true
 241                         continue
 242                 }
 243
 244                 return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line)
 245         }
 246         panic("unreachable")
 247 }
 248
 249 // isFinalBoundary returns whether line is the final boundary line
 250 // indiciating that all parts are over.
 251 // It matches `^--boundary--[ \t]*(\r\n)?$`
 252 func (mr *Reader) isFinalBoundary(line []byte) bool {
 253         if !bytes.HasPrefix(line, mr.dashBoundaryDash) {
 254                 return false
 255         }
 256         rest := line[len(mr.dashBoundaryDash):]
 257         rest = skipLWSPChar(rest)
 258         return len(rest) == 0 || bytes.Equal(rest, mr.nl)
 259 }
 260
 261 func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) {
 262         // http://tools.ietf.org/html/rfc2046#section-5.1
 263         //   The boundary delimiter line is then defined as a line
 264         //   consisting entirely of two hyphen characters ("-",
 265         //   decimal value 45) followed by the boundary parameter
 266         //   value from the Content-Type header field, optional linear
 267         //   whitespace, and a terminating CRLF.
 268         if !bytes.HasPrefix(line, mr.dashBoundary) {
 269                 return false
 270         }
 271         rest := line[len(mr.dashBoundary):]
 272         rest = skipLWSPChar(rest)
 273
 274         // On the first part, see our lines are ending in \n instead of \r\n
 275         // and switch into that mode if so.  This is a violation of the spec,
 276         // but occurs in practice.
 277         if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' {
 278                 mr.nl = mr.nl[1:]
 279                 mr.nlDashBoundary = mr.nlDashBoundary[1:]
 280         }
 281         return bytes.Equal(rest, mr.nl)
 282 }
 283
 284 // peekBufferIsEmptyPart returns whether the provided peek-ahead
 285 // buffer represents an empty part.  This is only called if we've not
 286 // already read any bytes in this part and checks for the case of MIME
 287 // software not writing the \r\n on empty parts. Some does, some
 288 // doesn't.
 289 //
 290 // This checks that what follows the "--boundary" is actually the end
 291 // ("--boundary--" with optional whitespace) or optional whitespace
 292 // and then a newline, so we don't catch "--boundaryFAKE", in which
 293 // case the whole line is part of the data.
 294 func (mr *Reader) peekBufferIsEmptyPart(peek []byte) bool {
 295         // End of parts case.
 296         // Test whether peek matches `^--boundary--[ \t]*(?:\r\n|$)`
 297         if bytes.HasPrefix(peek, mr.dashBoundaryDash) {
 298                 rest := peek[len(mr.dashBoundaryDash):]
 299                 rest = skipLWSPChar(rest)
 300                 return bytes.HasPrefix(rest, mr.nl) || len(rest) == 0
 301         }
 302         if !bytes.HasPrefix(peek, mr.dashBoundary) {
 303                 return false
 304         }
 305         // Test whether rest matches `^[ \t]*\r\n`)
 306         rest := peek[len(mr.dashBoundary):]
 307         rest = skipLWSPChar(rest)
 308         return bytes.HasPrefix(rest, mr.nl)
 309 }
 310
 311 // skipLWSPChar returns b with leading spaces and tabs removed.
 312 // RFC 822 defines:
 313 //    LWSP-char = SPACE / HTAB
 314 func skipLWSPChar(b []byte) []byte {
 315         for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') {
 316                 b = b[1:]
 317         }
 318         return b
 319 }