libgo/go/tabwriter/tabwriter.go

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 // The tabwriter package implements a write filter (tabwriter.Writer)
   6 // that translates tabbed columns in input into properly aligned text.
   7 //
   8 // The package is using the Elastic Tabstops algorithm described at
   9 // http://nickgravgaard.com/elastictabstops/index.html.
  10 //
  11 package tabwriter
  12
  13 import (
  14         "bytes"
  15         "io"
  16         "os"
  17         "utf8"
  18 )
  19
  20
  21 // ----------------------------------------------------------------------------
  22 // Filter implementation
  23
  24 // A cell represents a segment of text terminated by tabs or line breaks.
  25 // The text itself is stored in a separate buffer; cell only describes the
  26 // segment's size in bytes, its width in runes, and whether it's an htab
  27 // ('\t') terminated cell.
  28 //
  29 type cell struct {
  30         size  int  // cell size in bytes
  31         width int  // cell width in runes
  32         htab  bool // true if the cell is terminated by an htab ('\t')
  33 }
  34
  35
  36 // A Writer is a filter that inserts padding around tab-delimited
  37 // columns in its input to align them in the output.
  38 //
  39 // The Writer treats incoming bytes as UTF-8 encoded text consisting
  40 // of cells terminated by (horizontal or vertical) tabs or line
  41 // breaks (newline or formfeed characters). Cells in adjacent lines
  42 // constitute a column. The Writer inserts padding as needed to
  43 // make all cells in a column have the same width, effectively
  44 // aligning the columns. It assumes that all characters have the
  45 // same width except for tabs for which a tabwidth must be specified.
  46 // Note that cells are tab-terminated, not tab-separated: trailing
  47 // non-tab text at the end of a line does not form a column cell.
  48 //
  49 // The Writer assumes that all Unicode code points have the same width;
  50 // this may not be true in some fonts.
  51 //
  52 // If DiscardEmptyColumns is set, empty columns that are terminated
  53 // entirely by vertical (or "soft") tabs are discarded. Columns
  54 // terminated by horizontal (or "hard") tabs are not affected by
  55 // this flag.
  56 //
  57 // If a Writer is configured to filter HTML, HTML tags and entities
  58 // are simply passed through. The widths of tags and entities are
  59 // assumed to be zero (tags) and one (entities) for formatting purposes.
  60 //
  61 // A segment of text may be escaped by bracketing it with Escape
  62 // characters. The tabwriter passes escaped text segments through
  63 // unchanged. In particular, it does not interpret any tabs or line
  64 // breaks within the segment. If the StripEscape flag is set, the
  65 // Escape characters are stripped from the output; otherwise they
  66 // are passed through as well. For the purpose of formatting, the
  67 // width of the escaped text is always computed excluding the Escape
  68 // characters.
  69 //
  70 // The formfeed character ('\f') acts like a newline but it also
  71 // terminates all columns in the current line (effectively calling
  72 // Flush). Cells in the next line start new columns. Unless found
  73 // inside an HTML tag or inside an escaped text segment, formfeed
  74 // characters appear as newlines in the output.
  75 //
  76 // The Writer must buffer input internally, because proper spacing
  77 // of one line may depend on the cells in future lines. Clients must
  78 // call Flush when done calling Write.
  79 //
  80 type Writer struct {
  81         // configuration
  82         output   io.Writer
  83         minwidth int
  84         tabwidth int
  85         padding  int
  86         padbytes [8]byte
  87         flags    uint
  88
  89         // current state
  90         buf     bytes.Buffer // collected text excluding tabs or line breaks
  91         pos     int          // buffer position up to which cell.width of incomplete cell has been computed
  92         cell    cell         // current incomplete cell; cell.width is up to buf[pos] excluding ignored sections
  93         endChar byte         // terminating char of escaped sequence (Escape for escapes, '>', ';' for HTML tags/entities, or 0)
  94         lines   [][]cell     // list of lines; each line is a list of cells
  95         widths  []int        // list of column widths in runes - re-used during formatting
  96 }
  97
  98
  99 func (b *Writer) addLine() { b.lines = append(b.lines, []cell{}) }
 100
 101
 102 // Reset the current state.
 103 func (b *Writer) reset() {
 104         b.buf.Reset()
 105         b.pos = 0
 106         b.cell = cell{}
 107         b.endChar = 0
 108         b.lines = b.lines[0:0]
 109         b.widths = b.widths[0:0]
 110         b.addLine()
 111 }
 112
 113
 114 // Internal representation (current state):
 115 //
 116 // - all text written is appended to buf; tabs and line breaks are stripped away
 117 // - at any given time there is a (possibly empty) incomplete cell at the end
 118 //   (the cell starts after a tab or line break)
 119 // - cell.size is the number of bytes belonging to the cell so far
 120 // - cell.width is text width in runes of that cell from the start of the cell to
 121 //   position pos; html tags and entities are excluded from this width if html
 122 //   filtering is enabled
 123 // - the sizes and widths of processed text are kept in the lines list
 124 //   which contains a list of cells for each line
 125 // - the widths list is a temporary list with current widths used during
 126 //   formatting; it is kept in Writer because it's re-used
 127 //
 128 //                    |<---------- size ---------->|
 129 //                    |                            |
 130 //                    |<- width ->|<- ignored ->|  |
 131 //                    |           |             |  |
 132 // [---processed---tab------------<tag>...</tag>...]
 133 // ^                  ^                         ^
 134 // |                  |                         |
 135 // buf                start of incomplete cell  pos
 136
 137
 138 // Formatting can be controlled with these flags.
 139 const (
 140         // Ignore html tags and treat entities (starting with '&'
 141         // and ending in ';') as single characters (width = 1).
 142         FilterHTML uint = 1 << iota
 143
 144         // Strip Escape characters bracketing escaped text segments
 145         // instead of passing them through unchanged with the text.
 146         StripEscape
 147
 148         // Force right-alignment of cell content.
 149         // Default is left-alignment.
 150         AlignRight
 151
 152         // Handle empty columns as if they were not present in
 153         // the input in the first place.
 154         DiscardEmptyColumns
 155
 156         // Always use tabs for indentation columns (i.e., padding of
 157         // leading empty cells on the left) independent of padchar.
 158         TabIndent
 159
 160         // Print a vertical bar ('|') between columns (after formatting).
 161         // Discarded colums appear as zero-width columns ("||").
 162         Debug
 163 )
 164
 165
 166 // A Writer must be initialized with a call to Init. The first parameter (output)
 167 // specifies the filter output. The remaining parameters control the formatting:
 168 //
 169 //      minwidth        minimal cell width including any padding
 170 //      tabwidth        width of tab characters (equivalent number of spaces)
 171 //      padding         padding added to a cell before computing its width
 172 //      padchar         ASCII char used for padding
 173 //                      if padchar == '\t', the Writer will assume that the
 174 //                      width of a '\t' in the formatted output is tabwidth,
 175 //                      and cells are left-aligned independent of align_left
 176 //                      (for correct-looking results, tabwidth must correspond
 177 //                      to the tab width in the viewer displaying the result)
 178 //      flags           formatting control
 179 //
 180 // To format in tab-separated columns with a tab stop of 8:
 181 //      b.Init(w, 8, 1, 8, '\t', 0);
 182 //
 183 // To format in space-separated columns with at least 4 spaces between columns:
 184 //      b.Init(w, 0, 4, 8, ' ', 0);
 185 //
 186 func (b *Writer) Init(output io.Writer, minwidth, tabwidth, padding int, padchar byte, flags uint) *Writer {
 187         if minwidth < 0 || tabwidth < 0 || padding < 0 {
 188                 panic("negative minwidth, tabwidth, or padding")
 189         }
 190         b.output = output
 191         b.minwidth = minwidth
 192         b.tabwidth = tabwidth
 193         b.padding = padding
 194         for i := range b.padbytes {
 195                 b.padbytes[i] = padchar
 196         }
 197         if padchar == '\t' {
 198                 // tab padding enforces left-alignment
 199                 flags &^= AlignRight
 200         }
 201         b.flags = flags
 202
 203         b.reset()
 204
 205         return b
 206 }
 207
 208
 209 // debugging support (keep code around)
 210 func (b *Writer) dump() {
 211         pos := 0
 212         for i, line := range b.lines {
 213                 print("(", i, ") ")
 214                 for _, c := range line {
 215                         print("[", string(b.buf.Bytes()[pos:pos+c.size]), "]")
 216                         pos += c.size
 217                 }
 218                 print("\n")
 219         }
 220         print("\n")
 221 }
 222
 223
 224 // local error wrapper so we can distinguish os.Errors we want to return
 225 // as errors from genuine panics (which we don't want to return as errors)
 226 type osError struct {
 227         err os.Error
 228 }
 229
 230
 231 func (b *Writer) write0(buf []byte) {
 232         n, err := b.output.Write(buf)
 233         if n != len(buf) && err == nil {
 234                 err = os.EIO
 235         }
 236         if err != nil {
 237                 panic(osError{err})
 238         }
 239 }
 240
 241
 242 func (b *Writer) writeN(src []byte, n int) {
 243         for n > len(src) {
 244                 b.write0(src)
 245                 n -= len(src)
 246         }
 247         b.write0(src[0:n])
 248 }
 249
 250
 251 var (
 252         newline = []byte{'\n'}
 253         tabs    = []byte("\t\t\t\t\t\t\t\t")
 254 )
 255
 256
 257 func (b *Writer) writePadding(textw, cellw int, useTabs bool) {
 258         if b.padbytes[0] == '\t' || useTabs {
 259                 // padding is done with tabs
 260                 if b.tabwidth == 0 {
 261                         return // tabs have no width - can't do any padding
 262                 }
 263                 // make cellw the smallest multiple of b.tabwidth
 264                 cellw = (cellw + b.tabwidth - 1) / b.tabwidth * b.tabwidth
 265                 n := cellw - textw // amount of padding
 266                 if n < 0 {
 267                         panic("internal error")
 268                 }
 269                 b.writeN(tabs, (n+b.tabwidth-1)/b.tabwidth)
 270                 return
 271         }
 272
 273         // padding is done with non-tab characters
 274         b.writeN(b.padbytes[0:], cellw-textw)
 275 }
 276
 277
 278 var vbar = []byte{'|'}
 279
 280 func (b *Writer) writeLines(pos0 int, line0, line1 int) (pos int) {
 281         pos = pos0
 282         for i := line0; i < line1; i++ {
 283                 line := b.lines[i]
 284
 285                 // if TabIndent is set, use tabs to pad leading empty cells
 286                 useTabs := b.flags&TabIndent != 0
 287
 288                 for j, c := range line {
 289                         if j > 0 && b.flags&Debug != 0 {
 290                                 // indicate column break
 291                                 b.write0(vbar)
 292                         }
 293
 294                         if c.size == 0 {
 295                                 // empty cell
 296                                 if j < len(b.widths) {
 297                                         b.writePadding(c.width, b.widths[j], useTabs)
 298                                 }
 299                         } else {
 300                                 // non-empty cell
 301                                 useTabs = false
 302                                 if b.flags&AlignRight == 0 { // align left
 303                                         b.write0(b.buf.Bytes()[pos : pos+c.size])
 304                                         pos += c.size
 305                                         if j < len(b.widths) {
 306                                                 b.writePadding(c.width, b.widths[j], false)
 307                                         }
 308                                 } else { // align right
 309                                         if j < len(b.widths) {
 310                                                 b.writePadding(c.width, b.widths[j], false)
 311                                         }
 312                                         b.write0(b.buf.Bytes()[pos : pos+c.size])
 313                                         pos += c.size
 314                                 }
 315                         }
 316                 }
 317
 318                 if i+1 == len(b.lines) {
 319                         // last buffered line - we don't have a newline, so just write
 320                         // any outstanding buffered data
 321                         b.write0(b.buf.Bytes()[pos : pos+b.cell.size])
 322                         pos += b.cell.size
 323                 } else {
 324                         // not the last line - write newline
 325                         b.write0(newline)
 326                 }
 327         }
 328         return
 329 }
 330
 331
 332 // Format the text between line0 and line1 (excluding line1); pos
 333 // is the buffer position corresponding to the beginning of line0.
 334 // Returns the buffer position corresponding to the beginning of
 335 // line1 and an error, if any.
 336 //
 337 func (b *Writer) format(pos0 int, line0, line1 int) (pos int) {
 338         pos = pos0
 339         column := len(b.widths)
 340         for this := line0; this < line1; this++ {
 341                 line := b.lines[this]
 342
 343                 if column < len(line)-1 {
 344                         // cell exists in this column => this line
 345                         // has more cells than the previous line
 346                         // (the last cell per line is ignored because cells are
 347                         // tab-terminated; the last cell per line describes the
 348                         // text before the newline/formfeed and does not belong
 349                         // to a column)
 350
 351                         // print unprinted lines until beginning of block
 352                         pos = b.writeLines(pos, line0, this)
 353                         line0 = this
 354
 355                         // column block begin
 356                         width := b.minwidth // minimal column width
 357                         discardable := true // true if all cells in this column are empty and "soft"
 358                         for ; this < line1; this++ {
 359                                 line = b.lines[this]
 360                                 if column < len(line)-1 {
 361                                         // cell exists in this column
 362                                         c := line[column]
 363                                         // update width
 364                                         if w := c.width + b.padding; w > width {
 365                                                 width = w
 366                                         }
 367                                         // update discardable
 368                                         if c.width > 0 || c.htab {
 369                                                 discardable = false
 370                                         }
 371                                 } else {
 372                                         break
 373                                 }
 374                         }
 375                         // column block end
 376
 377                         // discard empty columns if necessary
 378                         if discardable && b.flags&DiscardEmptyColumns != 0 {
 379                                 width = 0
 380                         }
 381
 382                         // format and print all columns to the right of this column
 383                         // (we know the widths of this column and all columns to the left)
 384                         b.widths = append(b.widths, width) // push width
 385                         pos = b.format(pos, line0, this)
 386                         b.widths = b.widths[0 : len(b.widths)-1] // pop width
 387                         line0 = this
 388                 }
 389         }
 390
 391         // print unprinted lines until end
 392         return b.writeLines(pos, line0, line1)
 393 }
 394
 395
 396 // Append text to current cell.
 397 func (b *Writer) append(text []byte) {
 398         b.buf.Write(text)
 399         b.cell.size += len(text)
 400 }
 401
 402
 403 // Update the cell width.
 404 func (b *Writer) updateWidth() {
 405         b.cell.width += utf8.RuneCount(b.buf.Bytes()[b.pos:b.buf.Len()])
 406         b.pos = b.buf.Len()
 407 }
 408
 409
 410 // To escape a text segment, bracket it with Escape characters.
 411 // For instance, the tab in this string "Ignore this tab: \xff\t\xff"
 412 // does not terminate a cell and constitutes a single character of
 413 // width one for formatting purposes.
 414 //
 415 // The value 0xff was chosen because it cannot appear in a valid UTF-8 sequence.
 416 //
 417 const Escape = '\xff'
 418
 419
 420 // Start escaped mode.
 421 func (b *Writer) startEscape(ch byte) {
 422         switch ch {
 423         case Escape:
 424                 b.endChar = Escape
 425         case '<':
 426                 b.endChar = '>'
 427         case '&':
 428                 b.endChar = ';'
 429         }
 430 }
 431
 432
 433 // Terminate escaped mode. If the escaped text was an HTML tag, its width
 434 // is assumed to be zero for formatting purposes; if it was an HTML entity,
 435 // its width is assumed to be one. In all other cases, the width is the
 436 // unicode width of the text.
 437 //
 438 func (b *Writer) endEscape() {
 439         switch b.endChar {
 440         case Escape:
 441                 b.updateWidth()
 442                 if b.flags&StripEscape == 0 {
 443                         b.cell.width -= 2 // don't count the Escape chars
 444                 }
 445         case '>': // tag of zero width
 446         case ';':
 447                 b.cell.width++ // entity, count as one rune
 448         }
 449         b.pos = b.buf.Len()
 450         b.endChar = 0
 451 }
 452
 453
 454 // Terminate the current cell by adding it to the list of cells of the
 455 // current line. Returns the number of cells in that line.
 456 //
 457 func (b *Writer) terminateCell(htab bool) int {
 458         b.cell.htab = htab
 459         line := &b.lines[len(b.lines)-1]
 460         *line = append(*line, b.cell)
 461         b.cell = cell{}
 462         return len(*line)
 463 }
 464
 465
 466 func handlePanic(err *os.Error) {
 467         if e := recover(); e != nil {
 468                 *err = e.(osError).err // re-panics if it's not a local osError
 469         }
 470 }
 471
 472
 473 // Flush should be called after the last call to Write to ensure
 474 // that any data buffered in the Writer is written to output. Any
 475 // incomplete escape sequence at the end is simply considered
 476 // complete for formatting purposes.
 477 //
 478 func (b *Writer) Flush() (err os.Error) {
 479         defer b.reset() // even in the presence of errors
 480         defer handlePanic(&err)
 481
 482         // add current cell if not empty
 483         if b.cell.size > 0 {
 484                 if b.endChar != 0 {
 485                         // inside escape - terminate it even if incomplete
 486                         b.endEscape()
 487                 }
 488                 b.terminateCell(false)
 489         }
 490
 491         // format contents of buffer
 492         b.format(0, 0, len(b.lines))
 493
 494         return
 495 }
 496
 497
 498 var hbar = []byte("---\n")
 499
 500 // Write writes buf to the writer b.
 501 // The only errors returned are ones encountered
 502 // while writing to the underlying output stream.
 503 //
 504 func (b *Writer) Write(buf []byte) (n int, err os.Error) {
 505         defer handlePanic(&err)
 506
 507         // split text into cells
 508         n = 0
 509         for i, ch := range buf {
 510                 if b.endChar == 0 {
 511                         // outside escape
 512                         switch ch {
 513                         case '\t', '\v', '\n', '\f':
 514                                 // end of cell
 515                                 b.append(buf[n:i])
 516                                 b.updateWidth()
 517                                 n = i + 1 // ch consumed
 518                                 ncells := b.terminateCell(ch == '\t')
 519                                 if ch == '\n' || ch == '\f' {
 520                                         // terminate line
 521                                         b.addLine()
 522                                         if ch == '\f' || ncells == 1 {
 523                                                 // A '\f' always forces a flush. Otherwise, if the previous
 524                                                 // line has only one cell which does not have an impact on
 525                                                 // the formatting of the following lines (the last cell per
 526                                                 // line is ignored by format()), thus we can flush the
 527                                                 // Writer contents.
 528                                                 if err = b.Flush(); err != nil {
 529                                                         return
 530                                                 }
 531                                                 if ch == '\f' && b.flags&Debug != 0 {
 532                                                         // indicate section break
 533                                                         b.write0(hbar)
 534                                                 }
 535                                         }
 536                                 }
 537
 538                         case Escape:
 539                                 // start of escaped sequence
 540                                 b.append(buf[n:i])
 541                                 b.updateWidth()
 542                                 n = i
 543                                 if b.flags&StripEscape != 0 {
 544                                         n++ // strip Escape
 545                                 }
 546                                 b.startEscape(Escape)
 547
 548                         case '<', '&':
 549                                 // possibly an html tag/entity
 550                                 if b.flags&FilterHTML != 0 {
 551                                         // begin of tag/entity
 552                                         b.append(buf[n:i])
 553                                         b.updateWidth()
 554                                         n = i
 555                                         b.startEscape(ch)
 556                                 }
 557                         }
 558
 559                 } else {
 560                         // inside escape
 561                         if ch == b.endChar {
 562                                 // end of tag/entity
 563                                 j := i + 1
 564                                 if ch == Escape && b.flags&StripEscape != 0 {
 565                                         j = i // strip Escape
 566                                 }
 567                                 b.append(buf[n:j])
 568                                 n = i + 1 // ch consumed
 569                                 b.endEscape()
 570                         }
 571                 }
 572         }
 573
 574         // append leftover text
 575         b.append(buf[n:])
 576         n = len(buf)
 577         return
 578 }
 579
 580
 581 // NewWriter allocates and initializes a new tabwriter.Writer.
 582 // The parameters are the same as for the the Init function.
 583 //
 584 func NewWriter(output io.Writer, minwidth, tabwidth, padding int, padchar byte, flags uint) *Writer {
 585         return new(Writer).Init(output, minwidth, tabwidth, padding, padchar, flags)
 586 }