OSDN Git Service

Update license.
[qt-creator-jp/qt-creator-jp.git] / src / libs / utils / htmldocextractor.cpp
1 /**************************************************************************
2 **
3 ** This file is part of Qt Creator
4 **
5 ** Copyright (c) 2011 Nokia Corporation and/or its subsidiary(-ies).
6 **
7 ** Contact: Nokia Corporation (info@qt.nokia.com)
8 **
9 **
10 ** GNU Lesser General Public License Usage
11 **
12 ** This file may be used under the terms of the GNU Lesser General Public
13 ** License version 2.1 as published by the Free Software Foundation and
14 ** appearing in the file LICENSE.LGPL included in the packaging of this file.
15 ** Please review the following information to ensure the GNU Lesser General
16 ** Public License version 2.1 requirements will be met:
17 ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
18 **
19 ** In addition, as a special exception, Nokia gives you certain additional
20 ** rights. These rights are described in the Nokia Qt LGPL Exception
21 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
22 **
23 ** Other Usage
24 **
25 ** Alternatively, this file may be used in accordance with the terms and
26 ** conditions contained in a signed written agreement between you and Nokia.
27 **
28 ** If you have questions regarding the use of this file, please contact
29 ** Nokia at qt-info@nokia.com.
30 **
31 **************************************************************************/
32
33 #include "htmldocextractor.h"
34
35 #include <QtCore/QLatin1String>
36 #include <QtCore/QLatin1Char>
37 #include <QtCore/QStringList>
38 #include <QtCore/QRegExp>
39
40 using namespace Utils;
41
42 namespace {
43     QRegExp createMinimalExp(const QString &pattern) {
44         QRegExp exp(pattern);
45         exp.setMinimal(true);
46         return exp;
47     }
48 }
49
50 HtmlDocExtractor::HtmlDocExtractor() :
51     m_formatContents(true),
52     m_mode(FirstParagraph)
53 {}
54
55 void HtmlDocExtractor::setMode(Mode mode)
56 { m_mode = mode; }
57
58 void HtmlDocExtractor::applyFormatting(const bool format)
59 { m_formatContents = format; }
60
61 QString HtmlDocExtractor::getClassOrNamespaceBrief(const QString &html, const QString &mark) const
62 {
63     QString contents = getContentsByMarks(html, mark + QLatin1String("-brief"), mark);
64     if (!contents.isEmpty() && m_formatContents)
65         contents.remove(QLatin1String("<a href=\"#details\">More...</a>"));
66     processOutput(&contents);
67
68     return contents;
69 }
70
71 QString HtmlDocExtractor::getClassOrNamespaceDescription(const QString &html,
72                                                          const QString &mark) const
73 {
74     if (m_mode == FirstParagraph)
75         return getClassOrNamespaceBrief(html, mark);
76
77     QString contents = getContentsByMarks(html, mark + QLatin1String("-description"), mark);
78     if (!contents.isEmpty() && m_formatContents)
79         contents.remove(QLatin1String("Detailed Description"));
80     processOutput(&contents);
81
82     return contents;
83 }
84
85 QString HtmlDocExtractor::getEnumDescription(const QString &html, const QString &mark) const
86 {
87     return getClassOrNamespaceMemberDescription(html, mark, mark);
88 }
89
90 QString HtmlDocExtractor::getTypedefDescription(const QString &html, const QString &mark) const
91 {
92     return getClassOrNamespaceMemberDescription(html, mark, mark);
93 }
94
95 QString HtmlDocExtractor::getMacroDescription(const QString &html,
96                                               const QString &mark) const
97 {
98     return getClassOrNamespaceMemberDescription(html, mark, mark);
99 }
100
101 QString HtmlDocExtractor::getFunctionDescription(const QString &html,
102                                                  const QString &mark,
103                                                  const bool mainOverload) const
104 {
105     QString cleanMark = mark;
106     QString startMark = mark;
107     const int parenthesis = mark.indexOf(QLatin1Char('('));
108     if (parenthesis != -1) {
109         startMark = mark.left(parenthesis);
110         cleanMark = startMark;
111         if (mainOverload) {
112             startMark.append(QLatin1String("[overload1]"));
113         } else {
114             QString complement = mark.right(mark.length() - parenthesis);
115             complement.remove(QRegExp(QLatin1String("[\\(\\), ]")));
116             startMark.append(complement);
117         }
118     }
119
120     QString contents = getClassOrNamespaceMemberDescription(html, startMark, cleanMark);
121     if (contents.isEmpty()) {
122         // Maybe this is a property function, which is documented differently. Besides
123         // setX/isX/hasX there are other (not so usual) names for them. A few examples of those:
124         //   - toPlainText / Prop. plainText from QPlainTextEdit.
125         //   - resize / Prop. size from QWidget.
126         //   - move / Prop. pos from QWidget (nothing similar in the names in this case).
127         // So I try to find the link to this property in the list of properties, extract its
128         // anchor and then follow by the name found.
129         const QString &pattern =
130             QString(QLatin1String("<a href=\"[a-z\\.]+#([A-Za-z]+)-prop\">%1</a>")).arg(cleanMark);
131         QRegExp exp = createMinimalExp(pattern);
132         if (exp.indexIn(html) != -1) {
133             const QString &prop = exp.cap(1);
134             contents = getClassOrNamespaceMemberDescription(html,
135                                                             prop + QLatin1String("-prop"),
136                                                             prop);
137         }
138     }
139
140     return contents;
141 }
142
143 QString HtmlDocExtractor::getQmlComponentDescription(const QString &html, const QString &mark) const
144 {
145     return getClassOrNamespaceDescription(html, mark);
146 }
147
148 QString HtmlDocExtractor::getQmlPropertyDescription(const QString &html, const QString &mark) const
149 {
150     QString startMark = QString("<a name=\"%1-prop\">").arg(mark);
151     int index = html.indexOf(startMark);
152     if (index == -1) {
153         startMark = QString("<a name=\"%1-signal\">").arg(mark);
154         index = html.indexOf(startMark);
155     }
156     if (index == -1)
157         return QString();
158
159     QString contents = html.mid(index + startMark.size());
160     index = contents.indexOf(QLatin1String("<p>"));
161     if (index == -1)
162         return QString();
163     contents = contents.mid(index);
164     processOutput(&contents);
165
166     return contents;
167 }
168
169 QString HtmlDocExtractor::getClassOrNamespaceMemberDescription(const QString &html,
170                                                                const QString &startMark,
171                                                                const QString &endMark) const
172 {
173     QString contents = getContentsByMarks(html, startMark, endMark);
174     processOutput(&contents);
175
176     return contents;
177 }
178
179 QString HtmlDocExtractor::getContentsByMarks(const QString &html,
180                                              QString startMark,
181                                              QString endMark) const
182 {
183     startMark.prepend(QLatin1String("$$$"));
184     endMark.prepend(QLatin1String("<!-- @@@"));
185
186     QString contents;
187     int start = html.indexOf(startMark);
188     if (start != -1) {
189         start = html.indexOf(QLatin1String("-->"), start);
190         if (start != -1) {
191             int end = html.indexOf(endMark, start);
192             if (end != -1) {
193                 start += 3;
194                 contents = html.mid(start, end - start);
195             }
196         }
197     }
198     return contents;
199 }
200
201 void HtmlDocExtractor::processOutput(QString *html) const
202 {
203     if (html->isEmpty())
204         return;
205
206     if (m_mode == FirstParagraph) {
207         // Try to get the entire first paragraph, but if one is not found or if its opening
208         // tag is not in the very beginning (using an empirical value as the limit) the html
209         // is cleared to avoid too much content.
210         int index = html->indexOf(QLatin1String("<p>"));
211         if (index != -1 && index < 400) {
212             index = html->indexOf(QLatin1String("</p>"), index + 3);
213             if (index != -1) {
214                 // Most paragraphs end with a period, but there are cases without punctuation
215                 // and cases like this: <p>This is a description. Example:</p>
216                 const int period = html->lastIndexOf(QLatin1Char('.'), index);
217                 if (period != -1) {
218                     html->truncate(period + 1);
219                     html->append(QLatin1String("</p>"));
220                 } else {
221                     html->truncate(index + 4);
222                 }
223             } else {
224                 html->clear();
225             }
226         } else {
227             html->clear();
228         }
229     }
230
231     if (!html->isEmpty() && m_formatContents) {
232         stripBold(html);
233         replaceNonStyledHeadingsForBold(html);
234         replaceTablesForSimpleLines(html);
235         replaceListsForSimpleLines(html);
236         stripLinks(html);
237         stripHorizontalLines(html);
238         stripDivs(html);
239         stripTagsStyles(html);
240         stripHeadings(html);
241         stripImagens(html);
242         stripEmptyParagraphs(html);
243     }
244 }
245
246 void HtmlDocExtractor::stripAllHtml(QString *html)
247 {
248     html->remove(createMinimalExp(QLatin1String("<.*>")));
249 }
250
251 void HtmlDocExtractor::stripHeadings(QString *html)
252 {
253     html->remove(createMinimalExp(QLatin1String("<h\\d{1}.*>|</h\\d{1}>")));
254 }
255
256 void HtmlDocExtractor::stripLinks(QString *html)
257 {
258     html->remove(createMinimalExp(QLatin1String("<a\\s+.*>|</a>")));
259 }
260
261 void HtmlDocExtractor::stripHorizontalLines(QString *html)
262 {
263     html->remove(createMinimalExp(QLatin1String("<hr\\s+/>")));
264 }
265
266 void HtmlDocExtractor::stripDivs(QString *html)
267 {
268     html->remove(createMinimalExp(QLatin1String("<div\\s+.*>|</div>|<div\\s+.*/\\s*>")));
269 }
270
271 void HtmlDocExtractor::stripTagsStyles(QString *html)
272 {
273     const QRegExp &exp = createMinimalExp(QLatin1String("<(.*\\s+)class=\".*\">"));
274     html->replace(exp, QLatin1String("<\\1>"));
275 }
276
277 void HtmlDocExtractor::stripTeletypes(QString *html)
278 {
279     html->remove(QLatin1String("<tt>"));
280     html->remove(QLatin1String("</tt>"));
281 }
282
283 void HtmlDocExtractor::stripImagens(QString *html)
284 {
285     html->remove(createMinimalExp(QLatin1String("<img.*>")));
286 }
287
288 void HtmlDocExtractor::stripBold(QString *html)
289 {
290     html->remove(QLatin1String("<b>"));
291     html->remove(QLatin1String("</b>"));
292 }
293
294 void HtmlDocExtractor::stripEmptyParagraphs(QString *html)
295 {
296     html->remove(QLatin1String("<p></p>"));
297 }
298
299 void HtmlDocExtractor::replaceNonStyledHeadingsForBold(QString *html)
300 {
301     const QRegExp &hStart = createMinimalExp(QLatin1String("<h\\d{1}>"));
302     const QRegExp &hEnd = createMinimalExp(QLatin1String("</h\\d{1}>"));
303     html->replace(hStart, QLatin1String("<p><b>"));
304     html->replace(hEnd, QLatin1String("</b></p>"));
305 }
306
307 void HtmlDocExtractor::replaceTablesForSimpleLines(QString *html)
308 {
309     html->replace(createMinimalExp(QLatin1String("(?:<p>)?<table.*>")), QLatin1String("<p>"));
310     html->replace(QLatin1String("</table>"), QLatin1String("</p>"));
311     html->remove(createMinimalExp(QLatin1String("<thead.*>")));
312     html->remove(QLatin1String("</thead>"));
313     html->remove(createMinimalExp(QLatin1String("<tfoot.*>")));
314     html->remove(QLatin1String("</tfoot>"));
315     html->remove(createMinimalExp(QLatin1String("<tr.*><th.*>.*</th></tr>")));
316     html->replace(QLatin1String("</td><td"), QLatin1String("</td>&nbsp;<td"));
317     html->remove(createMinimalExp(QLatin1String("<td.*><p>")));
318     html->remove(createMinimalExp(QLatin1String("<td.*>")));
319     html->remove(createMinimalExp(QLatin1String("(?:</p>)?</td>")));
320     html->replace(createMinimalExp(QLatin1String("<tr.*>")),
321                   QLatin1String("&nbsp;&nbsp;&nbsp;&nbsp;"));
322     html->replace(QLatin1String("</tr>"), QLatin1String("<br />"));
323 }
324
325 void HtmlDocExtractor::replaceListsForSimpleLines(QString *html)
326 {
327     html->remove(createMinimalExp(QLatin1String("<(?:ul|ol).*>")));
328     html->remove(createMinimalExp(QLatin1String("</(?:ul|ol)>")));
329     html->replace(QLatin1String("<li>"), QLatin1String("&nbsp;&nbsp;&nbsp;&nbsp;"));
330     html->replace(QLatin1String("</li>"), QLatin1String("<br />"));
331 }