libgo/go/go/doc/comment.go

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 // Godoc comment extraction and comment -> HTML formatting.
   6
   7 package doc
   8
   9 import (
  10         "go/ast"
  11         "io"
  12         "regexp"
  13         "strings"
  14         "template" // for htmlEscape
  15 )
  16
  17
  18 func isWhitespace(ch byte) bool { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' }
  19
  20
  21 func stripTrailingWhitespace(s string) string {
  22         i := len(s)
  23         for i > 0 && isWhitespace(s[i-1]) {
  24                 i--
  25         }
  26         return s[0:i]
  27 }
  28
  29
  30 // CommentText returns the text of comment,
  31 // with the comment markers - //, /*, and */ - removed.
  32 func CommentText(comment *ast.CommentGroup) string {
  33         if comment == nil {
  34                 return ""
  35         }
  36         comments := make([]string, len(comment.List))
  37         for i, c := range comment.List {
  38                 comments[i] = string(c.Text)
  39         }
  40
  41         lines := make([]string, 0, 10) // most comments are less than 10 lines
  42         for _, c := range comments {
  43                 // Remove comment markers.
  44                 // The parser has given us exactly the comment text.
  45                 switch c[1] {
  46                 case '/':
  47                         //-style comment
  48                         c = c[2:]
  49                         // Remove leading space after //, if there is one.
  50                         // TODO(gri) This appears to be necessary in isolated
  51                         //           cases (bignum.RatFromString) - why?
  52                         if len(c) > 0 && c[0] == ' ' {
  53                                 c = c[1:]
  54                         }
  55                 case '*':
  56                         /*-style comment */
  57                         c = c[2 : len(c)-2]
  58                 }
  59
  60                 // Split on newlines.
  61                 cl := strings.Split(c, "\n", -1)
  62
  63                 // Walk lines, stripping trailing white space and adding to list.
  64                 for _, l := range cl {
  65                         lines = append(lines, stripTrailingWhitespace(l))
  66                 }
  67         }
  68
  69         // Remove leading blank lines; convert runs of
  70         // interior blank lines to a single blank line.
  71         n := 0
  72         for _, line := range lines {
  73                 if line != "" || n > 0 && lines[n-1] != "" {
  74                         lines[n] = line
  75                         n++
  76                 }
  77         }
  78         lines = lines[0:n]
  79
  80         // Add final "" entry to get trailing newline from Join.
  81         if n > 0 && lines[n-1] != "" {
  82                 lines = append(lines, "")
  83         }
  84
  85         return strings.Join(lines, "\n")
  86 }
  87
  88
  89 // Split bytes into lines.
  90 func split(text []byte) [][]byte {
  91         // count lines
  92         n := 0
  93         last := 0
  94         for i, c := range text {
  95                 if c == '\n' {
  96                         last = i + 1
  97                         n++
  98                 }
  99         }
 100         if last < len(text) {
 101                 n++
 102         }
 103
 104         // split
 105         out := make([][]byte, n)
 106         last = 0
 107         n = 0
 108         for i, c := range text {
 109                 if c == '\n' {
 110                         out[n] = text[last : i+1]
 111                         last = i + 1
 112                         n++
 113                 }
 114         }
 115         if last < len(text) {
 116                 out[n] = text[last:]
 117         }
 118
 119         return out
 120 }
 121
 122
 123 var (
 124         ldquo = []byte("&ldquo;")
 125         rdquo = []byte("&rdquo;")
 126 )
 127
 128 // Escape comment text for HTML. If nice is set,
 129 // also turn `` into &ldquo; and '' into &rdquo;.
 130 func commentEscape(w io.Writer, s []byte, nice bool) {
 131         last := 0
 132         if nice {
 133                 for i := 0; i < len(s)-1; i++ {
 134                         ch := s[i]
 135                         if ch == s[i+1] && (ch == '`' || ch == '\'') {
 136                                 template.HTMLEscape(w, s[last:i])
 137                                 last = i + 2
 138                                 switch ch {
 139                                 case '`':
 140                                         w.Write(ldquo)
 141                                 case '\'':
 142                                         w.Write(rdquo)
 143                                 }
 144                                 i++ // loop will add one more
 145                         }
 146                 }
 147         }
 148         template.HTMLEscape(w, s[last:])
 149 }
 150
 151
 152 const (
 153         // Regexp for Go identifiers
 154         identRx = `[a-zA-Z_][a-zA-Z_0-9]*` // TODO(gri) ASCII only for now - fix this
 155
 156         // Regexp for URLs
 157         protocol = `(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero):`
 158         hostPart = `[a-zA-Z0-9_@\-]+`
 159         filePart = `[a-zA-Z0-9_?%#~&/\-+=]+`
 160         urlRx    = protocol + `//` + // http://
 161                 hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/
 162                 filePart + `([:.,]` + filePart + `)*`
 163 )
 164
 165 var matchRx = regexp.MustCompile(`(` + identRx + `)|(` + urlRx + `)`)
 166
 167 var (
 168         html_a      = []byte(`<a href="`)
 169         html_aq     = []byte(`">`)
 170         html_enda   = []byte("</a>")
 171         html_i      = []byte("<i>")
 172         html_endi   = []byte("</i>")
 173         html_p      = []byte("<p>\n")
 174         html_endp   = []byte("</p>\n")
 175         html_pre    = []byte("<pre>")
 176         html_endpre = []byte("</pre>\n")
 177 )
 178
 179
 180 // Emphasize and escape a line of text for HTML. URLs are converted into links;
 181 // if the URL also appears in the words map, the link is taken from the map (if
 182 // the corresponding map value is the empty string, the URL is not converted
 183 // into a link). Go identifiers that appear in the words map are italicized; if
 184 // the corresponding map value is not the empty string, it is considered a URL
 185 // and the word is converted into a link. If nice is set, the remaining text's
 186 // appearance is improved where it makes sense (e.g., `` is turned into &ldquo;
 187 // and '' into &rdquo;).
 188 func emphasize(w io.Writer, line []byte, words map[string]string, nice bool) {
 189         for {
 190                 m := matchRx.FindSubmatchIndex(line)
 191                 if m == nil {
 192                         break
 193                 }
 194                 // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is identRx)
 195
 196                 // write text before match
 197                 commentEscape(w, line[0:m[0]], nice)
 198
 199                 // analyze match
 200                 match := line[m[0]:m[1]]
 201                 url := ""
 202                 italics := false
 203                 if words != nil {
 204                         url, italics = words[string(match)]
 205                 }
 206                 if m[2] < 0 {
 207                         // didn't match against first parenthesized sub-regexp; must be match against urlRx
 208                         if !italics {
 209                                 // no alternative URL in words list, use match instead
 210                                 url = string(match)
 211                         }
 212                         italics = false // don't italicize URLs
 213                 }
 214
 215                 // write match
 216                 if len(url) > 0 {
 217                         w.Write(html_a)
 218                         template.HTMLEscape(w, []byte(url))
 219                         w.Write(html_aq)
 220                 }
 221                 if italics {
 222                         w.Write(html_i)
 223                 }
 224                 commentEscape(w, match, nice)
 225                 if italics {
 226                         w.Write(html_endi)
 227                 }
 228                 if len(url) > 0 {
 229                         w.Write(html_enda)
 230                 }
 231
 232                 // advance
 233                 line = line[m[1]:]
 234         }
 235         commentEscape(w, line, nice)
 236 }
 237
 238
 239 func indentLen(s []byte) int {
 240         i := 0
 241         for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
 242                 i++
 243         }
 244         return i
 245 }
 246
 247
 248 func isBlank(s []byte) bool { return len(s) == 0 || (len(s) == 1 && s[0] == '\n') }
 249
 250
 251 func commonPrefix(a, b []byte) []byte {
 252         i := 0
 253         for i < len(a) && i < len(b) && a[i] == b[i] {
 254                 i++
 255         }
 256         return a[0:i]
 257 }
 258
 259
 260 func unindent(block [][]byte) {
 261         if len(block) == 0 {
 262                 return
 263         }
 264
 265         // compute maximum common white prefix
 266         prefix := block[0][0:indentLen(block[0])]
 267         for _, line := range block {
 268                 if !isBlank(line) {
 269                         prefix = commonPrefix(prefix, line[0:indentLen(line)])
 270                 }
 271         }
 272         n := len(prefix)
 273
 274         // remove
 275         for i, line := range block {
 276                 if !isBlank(line) {
 277                         block[i] = line[n:]
 278                 }
 279         }
 280 }
 281
 282
 283 // Convert comment text to formatted HTML.
 284 // The comment was prepared by DocReader,
 285 // so it is known not to have leading, trailing blank lines
 286 // nor to have trailing spaces at the end of lines.
 287 // The comment markers have already been removed.
 288 //
 289 // Turn each run of multiple \n into </p><p>
 290 // Turn each run of indented lines into a <pre> block without indent.
 291 //
 292 // URLs in the comment text are converted into links; if the URL also appears
 293 // in the words map, the link is taken from the map (if the corresponding map
 294 // value is the empty string, the URL is not converted into a link).
 295 //
 296 // Go identifiers that appear in the words map are italicized; if the corresponding
 297 // map value is not the empty string, it is considered a URL and the word is converted
 298 // into a link.
 299 func ToHTML(w io.Writer, s []byte, words map[string]string) {
 300         inpara := false
 301
 302         close := func() {
 303                 if inpara {
 304                         w.Write(html_endp)
 305                         inpara = false
 306                 }
 307         }
 308         open := func() {
 309                 if !inpara {
 310                         w.Write(html_p)
 311                         inpara = true
 312                 }
 313         }
 314
 315         lines := split(s)
 316         unindent(lines)
 317         for i := 0; i < len(lines); {
 318                 line := lines[i]
 319                 if isBlank(line) {
 320                         // close paragraph
 321                         close()
 322                         i++
 323                         continue
 324                 }
 325                 if indentLen(line) > 0 {
 326                         // close paragraph
 327                         close()
 328
 329                         // count indented or blank lines
 330                         j := i + 1
 331                         for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
 332                                 j++
 333                         }
 334                         // but not trailing blank lines
 335                         for j > i && isBlank(lines[j-1]) {
 336                                 j--
 337                         }
 338                         block := lines[i:j]
 339                         i = j
 340
 341                         unindent(block)
 342
 343                         // put those lines in a pre block
 344                         w.Write(html_pre)
 345                         for _, line := range block {
 346                                 emphasize(w, line, nil, false) // no nice text formatting
 347                         }
 348                         w.Write(html_endpre)
 349                         continue
 350                 }
 351                 // open paragraph
 352                 open()
 353                 emphasize(w, lines[i], words, true) // nice text formatting
 354                 i++
 355         }
 356         close()
 357 }