libgo/go/net/textproto/reader.go

   1 // Copyright 2010 The Go Authors.  All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package textproto
   6
   7 import (
   8         "bufio"
   9         "bytes"
  10         "io"
  11         "io/ioutil"
  12         "os"
  13         "strconv"
  14         "strings"
  15 )
  16
  17 // BUG(rsc): To let callers manage exposure to denial of service
  18 // attacks, Reader should allow them to set and reset a limit on
  19 // the number of bytes read from the connection.
  20
  21 // A Reader implements convenience methods for reading requests
  22 // or responses from a text protocol network connection.
  23 type Reader struct {
  24         R   *bufio.Reader
  25         dot *dotReader
  26 }
  27
  28 // NewReader returns a new Reader reading from r.
  29 func NewReader(r *bufio.Reader) *Reader {
  30         return &Reader{R: r}
  31 }
  32
  33 // ReadLine reads a single line from r,
  34 // eliding the final \n or \r\n from the returned string.
  35 func (r *Reader) ReadLine() (string, os.Error) {
  36         line, err := r.readLineSlice()
  37         return string(line), err
  38 }
  39
  40 // ReadLineBytes is like ReadLine but returns a []byte instead of a string.
  41 func (r *Reader) ReadLineBytes() ([]byte, os.Error) {
  42         line, err := r.readLineSlice()
  43         if line != nil {
  44                 buf := make([]byte, len(line))
  45                 copy(buf, line)
  46                 line = buf
  47         }
  48         return line, err
  49 }
  50
  51 func (r *Reader) readLineSlice() ([]byte, os.Error) {
  52         r.closeDot()
  53         line, _, err := r.R.ReadLine()
  54         return line, err
  55 }
  56
  57 // ReadContinuedLine reads a possibly continued line from r,
  58 // eliding the final trailing ASCII white space.
  59 // Lines after the first are considered continuations if they
  60 // begin with a space or tab character.  In the returned data,
  61 // continuation lines are separated from the previous line
  62 // only by a single space: the newline and leading white space
  63 // are removed.
  64 //
  65 // For example, consider this input:
  66 //
  67 //      Line 1
  68 //        continued...
  69 //      Line 2
  70 //
  71 // The first call to ReadContinuedLine will return "Line 1 continued..."
  72 // and the second will return "Line 2".
  73 //
  74 // A line consisting of only white space is never continued.
  75 //
  76 func (r *Reader) ReadContinuedLine() (string, os.Error) {
  77         line, err := r.readContinuedLineSlice()
  78         return string(line), err
  79 }
  80
  81 // trim returns s with leading and trailing spaces and tabs removed.
  82 // It does not assume Unicode or UTF-8.
  83 func trim(s []byte) []byte {
  84         i := 0
  85         for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
  86                 i++
  87         }
  88         n := len(s)
  89         for n > i && (s[n-1] == ' ' || s[n-1] == '\t') {
  90                 n--
  91         }
  92         return s[i:n]
  93 }
  94
  95 // ReadContinuedLineBytes is like ReadContinuedLine but
  96 // returns a []byte instead of a string.
  97 func (r *Reader) ReadContinuedLineBytes() ([]byte, os.Error) {
  98         line, err := r.readContinuedLineSlice()
  99         if line != nil {
 100                 buf := make([]byte, len(line))
 101                 copy(buf, line)
 102                 line = buf
 103         }
 104         return line, err
 105 }
 106
 107 func (r *Reader) readContinuedLineSlice() ([]byte, os.Error) {
 108         // Read the first line.
 109         line, err := r.readLineSlice()
 110         if err != nil {
 111                 return line, err
 112         }
 113         if len(line) == 0 { // blank line - no continuation
 114                 return line, nil
 115         }
 116         line = trim(line)
 117
 118         copied := false
 119         if r.R.Buffered() < 1 {
 120                 // ReadByte will flush the buffer; make a copy of the slice.
 121                 copied = true
 122                 line = append([]byte(nil), line...)
 123         }
 124
 125         // Look for a continuation line.
 126         c, err := r.R.ReadByte()
 127         if err != nil {
 128                 // Delay err until we read the byte next time.
 129                 return line, nil
 130         }
 131         if c != ' ' && c != '\t' {
 132                 // Not a continuation.
 133                 r.R.UnreadByte()
 134                 return line, nil
 135         }
 136
 137         if !copied {
 138                 // The next readLineSlice will invalidate the previous one.
 139                 line = append(make([]byte, 0, len(line)*2), line...)
 140         }
 141
 142         // Read continuation lines.
 143         for {
 144                 // Consume leading spaces; one already gone.
 145                 for {
 146                         c, err = r.R.ReadByte()
 147                         if err != nil {
 148                                 break
 149                         }
 150                         if c != ' ' && c != '\t' {
 151                                 r.R.UnreadByte()
 152                                 break
 153                         }
 154                 }
 155                 var cont []byte
 156                 cont, err = r.readLineSlice()
 157                 cont = trim(cont)
 158                 line = append(line, ' ')
 159                 line = append(line, cont...)
 160                 if err != nil {
 161                         break
 162                 }
 163
 164                 // Check for leading space on next line.
 165                 if c, err = r.R.ReadByte(); err != nil {
 166                         break
 167                 }
 168                 if c != ' ' && c != '\t' {
 169                         r.R.UnreadByte()
 170                         break
 171                 }
 172         }
 173
 174         // Delay error until next call.
 175         if len(line) > 0 {
 176                 err = nil
 177         }
 178         return line, err
 179 }
 180
 181 func (r *Reader) readCodeLine(expectCode int) (code int, continued bool, message string, err os.Error) {
 182         line, err := r.ReadLine()
 183         if err != nil {
 184                 return
 185         }
 186         return parseCodeLine(line, expectCode)
 187 }
 188
 189 func parseCodeLine(line string, expectCode int) (code int, continued bool, message string, err os.Error) {
 190         if len(line) < 4 || line[3] != ' ' && line[3] != '-' {
 191                 err = ProtocolError("short response: " + line)
 192                 return
 193         }
 194         continued = line[3] == '-'
 195         code, err = strconv.Atoi(line[0:3])
 196         if err != nil || code < 100 {
 197                 err = ProtocolError("invalid response code: " + line)
 198                 return
 199         }
 200         message = line[4:]
 201         if 1 <= expectCode && expectCode < 10 && code/100 != expectCode ||
 202                 10 <= expectCode && expectCode < 100 && code/10 != expectCode ||
 203                 100 <= expectCode && expectCode < 1000 && code != expectCode {
 204                 err = &Error{code, message}
 205         }
 206         return
 207 }
 208
 209 // ReadCodeLine reads a response code line of the form
 210 //      code message
 211 // where code is a 3-digit status code and the message
 212 // extends to the rest of the line.  An example of such a line is:
 213 //      220 plan9.bell-labs.com ESMTP
 214 //
 215 // If the prefix of the status does not match the digits in expectCode,
 216 // ReadCodeLine returns with err set to &Error{code, message}.
 217 // For example, if expectCode is 31, an error will be returned if
 218 // the status is not in the range [310,319].
 219 //
 220 // If the response is multi-line, ReadCodeLine returns an error.
 221 //
 222 // An expectCode <= 0 disables the check of the status code.
 223 //
 224 func (r *Reader) ReadCodeLine(expectCode int) (code int, message string, err os.Error) {
 225         code, continued, message, err := r.readCodeLine(expectCode)
 226         if err == nil && continued {
 227                 err = ProtocolError("unexpected multi-line response: " + message)
 228         }
 229         return
 230 }
 231
 232 // ReadResponse reads a multi-line response of the form:
 233 //
 234 //      code-message line 1
 235 //      code-message line 2
 236 //      ...
 237 //      code message line n
 238 //
 239 // where code is a 3-digit status code. The first line starts with the
 240 // code and a hyphen. The response is terminated by a line that starts
 241 // with the same code followed by a space. Each line in message is
 242 // separated by a newline (\n).
 243 //
 244 // See page 36 of RFC 959 (http://www.ietf.org/rfc/rfc959.txt) for
 245 // details.
 246 //
 247 // If the prefix of the status does not match the digits in expectCode,
 248 // ReadResponse returns with err set to &Error{code, message}.
 249 // For example, if expectCode is 31, an error will be returned if
 250 // the status is not in the range [310,319].
 251 //
 252 // An expectCode <= 0 disables the check of the status code.
 253 //
 254 func (r *Reader) ReadResponse(expectCode int) (code int, message string, err os.Error) {
 255         code, continued, message, err := r.readCodeLine(expectCode)
 256         for err == nil && continued {
 257                 line, err := r.ReadLine()
 258                 if err != nil {
 259                         return 0, "", err
 260                 }
 261
 262                 var code2 int
 263                 var moreMessage string
 264                 code2, continued, moreMessage, err = parseCodeLine(line, expectCode)
 265                 if err != nil || code2 != code {
 266                         message += "\n" + strings.TrimRight(line, "\r\n")
 267                         continued = true
 268                         continue
 269                 }
 270                 message += "\n" + moreMessage
 271         }
 272         return
 273 }
 274
 275 // DotReader returns a new Reader that satisfies Reads using the
 276 // decoded text of a dot-encoded block read from r.
 277 // The returned Reader is only valid until the next call
 278 // to a method on r.
 279 //
 280 // Dot encoding is a common framing used for data blocks
 281 // in text protocols such as SMTP.  The data consists of a sequence
 282 // of lines, each of which ends in "\r\n".  The sequence itself
 283 // ends at a line containing just a dot: ".\r\n".  Lines beginning
 284 // with a dot are escaped with an additional dot to avoid
 285 // looking like the end of the sequence.
 286 //
 287 // The decoded form returned by the Reader's Read method
 288 // rewrites the "\r\n" line endings into the simpler "\n",
 289 // removes leading dot escapes if present, and stops with error os.EOF
 290 // after consuming (and discarding) the end-of-sequence line.
 291 func (r *Reader) DotReader() io.Reader {
 292         r.closeDot()
 293         r.dot = &dotReader{r: r}
 294         return r.dot
 295 }
 296
 297 type dotReader struct {
 298         r     *Reader
 299         state int
 300 }
 301
 302 // Read satisfies reads by decoding dot-encoded data read from d.r.
 303 func (d *dotReader) Read(b []byte) (n int, err os.Error) {
 304         // Run data through a simple state machine to
 305         // elide leading dots, rewrite trailing \r\n into \n,
 306         // and detect ending .\r\n line.
 307         const (
 308                 stateBeginLine = iota // beginning of line; initial state; must be zero
 309                 stateDot              // read . at beginning of line
 310                 stateDotCR            // read .\r at beginning of line
 311                 stateCR               // read \r (possibly at end of line)
 312                 stateData             // reading data in middle of line
 313                 stateEOF              // reached .\r\n end marker line
 314         )
 315         br := d.r.R
 316         for n < len(b) && d.state != stateEOF {
 317                 var c byte
 318                 c, err = br.ReadByte()
 319                 if err != nil {
 320                         if err == os.EOF {
 321                                 err = io.ErrUnexpectedEOF
 322                         }
 323                         break
 324                 }
 325                 switch d.state {
 326                 case stateBeginLine:
 327                         if c == '.' {
 328                                 d.state = stateDot
 329                                 continue
 330                         }
 331                         if c == '\r' {
 332                                 d.state = stateCR
 333                                 continue
 334                         }
 335                         d.state = stateData
 336
 337                 case stateDot:
 338                         if c == '\r' {
 339                                 d.state = stateDotCR
 340                                 continue
 341                         }
 342                         if c == '\n' {
 343                                 d.state = stateEOF
 344                                 continue
 345                         }
 346                         d.state = stateData
 347
 348                 case stateDotCR:
 349                         if c == '\n' {
 350                                 d.state = stateEOF
 351                                 continue
 352                         }
 353                         // Not part of .\r\n.
 354                         // Consume leading dot and emit saved \r.
 355                         br.UnreadByte()
 356                         c = '\r'
 357                         d.state = stateData
 358
 359                 case stateCR:
 360                         if c == '\n' {
 361                                 d.state = stateBeginLine
 362                                 break
 363                         }
 364                         // Not part of \r\n.  Emit saved \r
 365                         br.UnreadByte()
 366                         c = '\r'
 367                         d.state = stateData
 368
 369                 case stateData:
 370                         if c == '\r' {
 371                                 d.state = stateCR
 372                                 continue
 373                         }
 374                         if c == '\n' {
 375                                 d.state = stateBeginLine
 376                         }
 377                 }
 378                 b[n] = c
 379                 n++
 380         }
 381         if err == nil && d.state == stateEOF {
 382                 err = os.EOF
 383         }
 384         if err != nil && d.r.dot == d {
 385                 d.r.dot = nil
 386         }
 387         return
 388 }
 389
 390 // closeDot drains the current DotReader if any,
 391 // making sure that it reads until the ending dot line.
 392 func (r *Reader) closeDot() {
 393         if r.dot == nil {
 394                 return
 395         }
 396         buf := make([]byte, 128)
 397         for r.dot != nil {
 398                 // When Read reaches EOF or an error,
 399                 // it will set r.dot == nil.
 400                 r.dot.Read(buf)
 401         }
 402 }
 403
 404 // ReadDotBytes reads a dot-encoding and returns the decoded data.
 405 //
 406 // See the documentation for the DotReader method for details about dot-encoding.
 407 func (r *Reader) ReadDotBytes() ([]byte, os.Error) {
 408         return ioutil.ReadAll(r.DotReader())
 409 }
 410
 411 // ReadDotLines reads a dot-encoding and returns a slice
 412 // containing the decoded lines, with the final \r\n or \n elided from each.
 413 //
 414 // See the documentation for the DotReader method for details about dot-encoding.
 415 func (r *Reader) ReadDotLines() ([]string, os.Error) {
 416         // We could use ReadDotBytes and then Split it,
 417         // but reading a line at a time avoids needing a
 418         // large contiguous block of memory and is simpler.
 419         var v []string
 420         var err os.Error
 421         for {
 422                 var line string
 423                 line, err = r.ReadLine()
 424                 if err != nil {
 425                         if err == os.EOF {
 426                                 err = io.ErrUnexpectedEOF
 427                         }
 428                         break
 429                 }
 430
 431                 // Dot by itself marks end; otherwise cut one dot.
 432                 if len(line) > 0 && line[0] == '.' {
 433                         if len(line) == 1 {
 434                                 break
 435                         }
 436                         line = line[1:]
 437                 }
 438                 v = append(v, line)
 439         }
 440         return v, err
 441 }
 442
 443 // ReadMIMEHeader reads a MIME-style header from r.
 444 // The header is a sequence of possibly continued Key: Value lines
 445 // ending in a blank line.
 446 // The returned map m maps CanonicalMIMEHeaderKey(key) to a
 447 // sequence of values in the same order encountered in the input.
 448 //
 449 // For example, consider this input:
 450 //
 451 //      My-Key: Value 1
 452 //      Long-Key: Even
 453 //             Longer Value
 454 //      My-Key: Value 2
 455 //
 456 // Given that input, ReadMIMEHeader returns the map:
 457 //
 458 //      map[string][]string{
 459 //              "My-Key": {"Value 1", "Value 2"},
 460 //              "Long-Key": {"Even Longer Value"},
 461 //      }
 462 //
 463 func (r *Reader) ReadMIMEHeader() (MIMEHeader, os.Error) {
 464         m := make(MIMEHeader)
 465         for {
 466                 kv, err := r.readContinuedLineSlice()
 467                 if len(kv) == 0 {
 468                         return m, err
 469                 }
 470
 471                 // Key ends at first colon; must not have spaces.
 472                 i := bytes.IndexByte(kv, ':')
 473                 if i < 0 || bytes.IndexByte(kv[0:i], ' ') >= 0 {
 474                         return m, ProtocolError("malformed MIME header line: " + string(kv))
 475                 }
 476                 key := CanonicalMIMEHeaderKey(string(kv[0:i]))
 477
 478                 // Skip initial spaces in value.
 479                 i++ // skip colon
 480                 for i < len(kv) && (kv[i] == ' ' || kv[i] == '\t') {
 481                         i++
 482                 }
 483                 value := string(kv[i:])
 484
 485                 m[key] = append(m[key], value)
 486
 487                 if err != nil {
 488                         return m, err
 489                 }
 490         }
 491         panic("unreachable")
 492 }
 493
 494 // CanonicalMIMEHeaderKey returns the canonical format of the
 495 // MIME header key s.  The canonicalization converts the first
 496 // letter and any letter following a hyphen to upper case;
 497 // the rest are converted to lowercase.  For example, the
 498 // canonical key for "accept-encoding" is "Accept-Encoding".
 499 func CanonicalMIMEHeaderKey(s string) string {
 500         // Quick check for canonical encoding.
 501         needUpper := true
 502         for i := 0; i < len(s); i++ {
 503                 c := s[i]
 504                 if needUpper && 'a' <= c && c <= 'z' {
 505                         goto MustRewrite
 506                 }
 507                 if !needUpper && 'A' <= c && c <= 'Z' {
 508                         goto MustRewrite
 509                 }
 510                 needUpper = c == '-'
 511         }
 512         return s
 513
 514 MustRewrite:
 515         // Canonicalize: first letter upper case
 516         // and upper case after each dash.
 517         // (Host, User-Agent, If-Modified-Since).
 518         // MIME headers are ASCII only, so no Unicode issues.
 519         a := []byte(s)
 520         upper := true
 521         for i, v := range a {
 522                 if upper && 'a' <= v && v <= 'z' {
 523                         a[i] = v + 'A' - 'a'
 524                 }
 525                 if !upper && 'A' <= v && v <= 'Z' {
 526                         a[i] = v + 'a' - 'A'
 527                 }
 528                 upper = v == '-'
 529         }
 530         return string(a)
 531 }