1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Godoc comment extraction and comment -> HTML formatting.
14 "template" // for htmlEscape
18 func isWhitespace(ch byte) bool { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' }
21 func stripTrailingWhitespace(s string) string {
23 for i > 0 && isWhitespace(s[i-1]) {
30 // CommentText returns the text of comment,
31 // with the comment markers - //, /*, and */ - removed.
32 func CommentText(comment *ast.CommentGroup) string {
36 comments := make([]string, len(comment.List))
37 for i, c := range comment.List {
38 comments[i] = string(c.Text)
41 lines := make([]string, 0, 10) // most comments are less than 10 lines
42 for _, c := range comments {
43 // Remove comment markers.
44 // The parser has given us exactly the comment text.
49 // Remove leading space after //, if there is one.
50 // TODO(gri) This appears to be necessary in isolated
51 // cases (bignum.RatFromString) - why?
52 if len(c) > 0 && c[0] == ' ' {
61 cl := strings.Split(c, "\n", -1)
63 // Walk lines, stripping trailing white space and adding to list.
64 for _, l := range cl {
65 lines = append(lines, stripTrailingWhitespace(l))
69 // Remove leading blank lines; convert runs of
70 // interior blank lines to a single blank line.
72 for _, line := range lines {
73 if line != "" || n > 0 && lines[n-1] != "" {
80 // Add final "" entry to get trailing newline from Join.
81 if n > 0 && lines[n-1] != "" {
82 lines = append(lines, "")
85 return strings.Join(lines, "\n")
89 // Split bytes into lines.
90 func split(text []byte) [][]byte {
94 for i, c := range text {
100 if last < len(text) {
105 out := make([][]byte, n)
108 for i, c := range text {
110 out[n] = text[last : i+1]
115 if last < len(text) {
124 ldquo = []byte("“")
125 rdquo = []byte("”")
128 // Escape comment text for HTML. If nice is set,
129 // also turn `` into “ and '' into ”.
130 func commentEscape(w io.Writer, s []byte, nice bool) {
133 for i := 0; i < len(s)-1; i++ {
135 if ch == s[i+1] && (ch == '`' || ch == '\'') {
136 template.HTMLEscape(w, s[last:i])
144 i++ // loop will add one more
148 template.HTMLEscape(w, s[last:])
153 // Regexp for Go identifiers
154 identRx = `[a-zA-Z_][a-zA-Z_0-9]*` // TODO(gri) ASCII only for now - fix this
157 protocol = `(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero):`
158 hostPart = `[a-zA-Z0-9_@\-]+`
159 filePart = `[a-zA-Z0-9_?%#~&/\-+=]+`
160 urlRx = protocol + `//` + // http://
161 hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/
162 filePart + `([:.,]` + filePart + `)*`
165 var matchRx = regexp.MustCompile(`(` + identRx + `)|(` + urlRx + `)`)
168 html_a = []byte(`<a href="`)
169 html_aq = []byte(`">`)
170 html_enda = []byte("</a>")
171 html_i = []byte("<i>")
172 html_endi = []byte("</i>")
173 html_p = []byte("<p>\n")
174 html_endp = []byte("</p>\n")
175 html_pre = []byte("<pre>")
176 html_endpre = []byte("</pre>\n")
180 // Emphasize and escape a line of text for HTML. URLs are converted into links;
181 // if the URL also appears in the words map, the link is taken from the map (if
182 // the corresponding map value is the empty string, the URL is not converted
183 // into a link). Go identifiers that appear in the words map are italicized; if
184 // the corresponding map value is not the empty string, it is considered a URL
185 // and the word is converted into a link. If nice is set, the remaining text's
186 // appearance is improved where it makes sense (e.g., `` is turned into “
187 // and '' into ”).
188 func emphasize(w io.Writer, line []byte, words map[string]string, nice bool) {
190 m := matchRx.FindSubmatchIndex(line)
194 // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is identRx)
196 // write text before match
197 commentEscape(w, line[0:m[0]], nice)
200 match := line[m[0]:m[1]]
204 url, italics = words[string(match)]
207 // didn't match against first parenthesized sub-regexp; must be match against urlRx
209 // no alternative URL in words list, use match instead
212 italics = false // don't italicize URLs
218 template.HTMLEscape(w, []byte(url))
224 commentEscape(w, match, nice)
235 commentEscape(w, line, nice)
239 func indentLen(s []byte) int {
241 for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
248 func isBlank(s []byte) bool { return len(s) == 0 || (len(s) == 1 && s[0] == '\n') }
251 func commonPrefix(a, b []byte) []byte {
253 for i < len(a) && i < len(b) && a[i] == b[i] {
260 func unindent(block [][]byte) {
265 // compute maximum common white prefix
266 prefix := block[0][0:indentLen(block[0])]
267 for _, line := range block {
269 prefix = commonPrefix(prefix, line[0:indentLen(line)])
275 for i, line := range block {
283 // Convert comment text to formatted HTML.
284 // The comment was prepared by DocReader,
285 // so it is known not to have leading, trailing blank lines
286 // nor to have trailing spaces at the end of lines.
287 // The comment markers have already been removed.
289 // Turn each run of multiple \n into </p><p>
290 // Turn each run of indented lines into a <pre> block without indent.
292 // URLs in the comment text are converted into links; if the URL also appears
293 // in the words map, the link is taken from the map (if the corresponding map
294 // value is the empty string, the URL is not converted into a link).
296 // Go identifiers that appear in the words map are italicized; if the corresponding
297 // map value is not the empty string, it is considered a URL and the word is converted
299 func ToHTML(w io.Writer, s []byte, words map[string]string) {
317 for i := 0; i < len(lines); {
325 if indentLen(line) > 0 {
329 // count indented or blank lines
331 for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
334 // but not trailing blank lines
335 for j > i && isBlank(lines[j-1]) {
343 // put those lines in a pre block
345 for _, line := range block {
346 emphasize(w, line, nil, false) // no nice text formatting
353 emphasize(w, lines[i], words, true) // nice text formatting