OSDN Git Service

2b7a1a28b58533ed13d3cb8e356315b82bc922b5
[qt-creator-jp/qt-creator-jp.git] / src / libs / utils / htmldocextractor.cpp
1 /**************************************************************************
2 **
3 ** This file is part of Qt Creator
4 **
5 ** Copyright (c) 2011 Nokia Corporation and/or its subsidiary(-ies).
6 **
7 ** Contact: Nokia Corporation (qt-info@nokia.com)
8 **
9 ** No Commercial Usage
10 **
11 ** This file contains pre-release code and may not be distributed.
12 ** You may use this file in accordance with the terms and conditions
13 ** contained in the Technology Preview License Agreement accompanying
14 ** this package.
15 **
16 ** GNU Lesser General Public License Usage
17 **
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 2.1 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL included in the
21 ** packaging of this file.  Please review the following information to
22 ** ensure the GNU Lesser General Public License version 2.1 requirements
23 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
24 **
25 ** In addition, as a special exception, Nokia gives you certain additional
26 ** rights.  These rights are described in the Nokia Qt LGPL Exception
27 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
28 **
29 ** If you have questions regarding the use of this file, please contact
30 ** Nokia at qt-info@nokia.com.
31 **
32 **************************************************************************/
33
34 #include "htmldocextractor.h"
35
36 #include <QtCore/QLatin1String>
37 #include <QtCore/QLatin1Char>
38 #include <QtCore/QStringList>
39 #include <QtCore/QRegExp>
40
41 using namespace Utils;
42
43 namespace {
44     QRegExp createMinimalExp(const QString &pattern) {
45         QRegExp exp(pattern);
46         exp.setMinimal(true);
47         return exp;
48     }
49 }
50
51 HtmlDocExtractor::HtmlDocExtractor() :
52     m_formatContents(true),
53     m_mode(FirstParagraph)
54 {}
55
56 void HtmlDocExtractor::setMode(Mode mode)
57 { m_mode = mode; }
58
59 void HtmlDocExtractor::applyFormatting(const bool format)
60 { m_formatContents = format; }
61
62 QString HtmlDocExtractor::getClassOrNamespaceBrief(const QString &html, const QString &mark) const
63 {
64     QString contents = getContentsByMarks(html, mark + QLatin1String("-brief"), mark);
65     if (!contents.isEmpty() && m_formatContents)
66         contents.remove(QLatin1String("<a href=\"#details\">More...</a>"));
67     processOutput(&contents);
68
69     return contents;
70 }
71
72 QString HtmlDocExtractor::getClassOrNamespaceDescription(const QString &html,
73                                                          const QString &mark) const
74 {
75     if (m_mode == FirstParagraph)
76         return getClassOrNamespaceBrief(html, mark);
77
78     QString contents = getContentsByMarks(html, mark + QLatin1String("-description"), mark);
79     if (!contents.isEmpty() && m_formatContents)
80         contents.remove(QLatin1String("Detailed Description"));
81     processOutput(&contents);
82
83     return contents;
84 }
85
86 QString HtmlDocExtractor::getEnumDescription(const QString &html, const QString &mark) const
87 {
88     return getClassOrNamespaceMemberDescription(html, mark, mark);
89 }
90
91 QString HtmlDocExtractor::getTypedefDescription(const QString &html, const QString &mark) const
92 {
93     return getClassOrNamespaceMemberDescription(html, mark, mark);
94 }
95
96 QString HtmlDocExtractor::getMacroDescription(const QString &html,
97                                               const QString &mark) const
98 {
99     return getClassOrNamespaceMemberDescription(html, mark, mark);
100 }
101
102 QString HtmlDocExtractor::getFunctionDescription(const QString &html,
103                                                  const QString &mark,
104                                                  const bool mainOverload) const
105 {
106     QString cleanMark = mark;
107     QString startMark = mark;
108     const int parenthesis = mark.indexOf(QLatin1Char('('));
109     if (parenthesis != -1) {
110         startMark = mark.left(parenthesis);
111         cleanMark = startMark;
112         if (mainOverload) {
113             startMark.append(QLatin1String("[overload1]"));
114         } else {
115             QString complement = mark.right(mark.length() - parenthesis);
116             complement.remove(QRegExp(QLatin1String("[\\(\\), ]")));
117             startMark.append(complement);
118         }
119     }
120
121     QString contents = getClassOrNamespaceMemberDescription(html, startMark, cleanMark);
122     if (contents.isEmpty()) {
123         // Maybe this is a property function, which is documented differently. Besides
124         // setX/isX/hasX there are other (not so usual) names for them. A few examples of those:
125         //   - toPlainText / Prop. plainText from QPlainTextEdit.
126         //   - resize / Prop. size from QWidget.
127         //   - move / Prop. pos from QWidget (nothing similar in the names in this case).
128         // So I try to find the link to this property in the list of properties, extract its
129         // anchor and then follow by the name found.
130         const QString &pattern =
131             QString(QLatin1String("<a href=\"[a-z\\.]+#([A-Za-z]+)-prop\">%1</a>")).arg(cleanMark);
132         QRegExp exp = createMinimalExp(pattern);
133         if (exp.indexIn(html) != -1) {
134             const QString &prop = exp.cap(1);
135             contents = getClassOrNamespaceMemberDescription(html,
136                                                             prop + QLatin1String("-prop"),
137                                                             prop);
138         }
139     }
140
141     return contents;
142 }
143
144 QString HtmlDocExtractor::getQmlComponentDescription(const QString &html, const QString &mark) const
145 {
146     return getClassOrNamespaceDescription(html, mark);
147 }
148
149 QString HtmlDocExtractor::getQmlPropertyDescription(const QString &html, const QString &mark) const
150 {
151     QString startMark = QString("<a name=\"%1-prop\">").arg(mark);
152     int index = html.indexOf(startMark);
153     if (index == -1) {
154         startMark = QString("<a name=\"%1-signal\">").arg(mark);
155         index = html.indexOf(startMark);
156     }
157     if (index == -1)
158         return QString();
159
160     QString contents = html.mid(index + startMark.size());
161     index = contents.indexOf(QLatin1String("<p>"));
162     if (index == -1)
163         return QString();
164     contents = contents.mid(index);
165     processOutput(&contents);
166
167     return contents;
168 }
169
170 QString HtmlDocExtractor::getClassOrNamespaceMemberDescription(const QString &html,
171                                                                const QString &startMark,
172                                                                const QString &endMark) const
173 {
174     QString contents = getContentsByMarks(html, startMark, endMark);
175     processOutput(&contents);
176
177     return contents;
178 }
179
180 QString HtmlDocExtractor::getContentsByMarks(const QString &html,
181                                              QString startMark,
182                                              QString endMark) const
183 {
184     startMark.prepend(QLatin1String("$$$"));
185     endMark.prepend(QLatin1String("<!-- @@@"));
186
187     QString contents;
188     int start = html.indexOf(startMark);
189     if (start != -1) {
190         start = html.indexOf(QLatin1String("-->"), start);
191         if (start != -1) {
192             int end = html.indexOf(endMark, start);
193             if (end != -1) {
194                 start += 3;
195                 contents = html.mid(start, end - start);
196             }
197         }
198     }
199     return contents;
200 }
201
202 void HtmlDocExtractor::processOutput(QString *html) const
203 {
204     if (html->isEmpty())
205         return;
206
207     if (m_mode == FirstParagraph) {
208         // Try to get the entire first paragraph, but if one is not found or if its opening
209         // tag is not in the very beginning (using an empirical value as the limit) the html
210         // is cleared to avoid too much content.
211         int index = html->indexOf(QLatin1String("<p>"));
212         if (index != -1 && index < 400) {
213             index = html->indexOf(QLatin1String("</p>"), index + 3);
214             if (index != -1) {
215                 // Most paragraphs end with a period, but there are cases without punctuation
216                 // and cases like this: <p>This is a description. Example:</p>
217                 const int period = html->lastIndexOf(QLatin1Char('.'), index);
218                 if (period != -1) {
219                     html->truncate(period + 1);
220                     html->append(QLatin1String("</p>"));
221                 } else {
222                     html->truncate(index + 4);
223                 }
224             } else {
225                 html->clear();
226             }
227         } else {
228             html->clear();
229         }
230     }
231
232     if (!html->isEmpty() && m_formatContents) {
233         stripBold(html);
234         replaceNonStyledHeadingsForBold(html);
235         replaceTablesForSimpleLines(html);
236         replaceListsForSimpleLines(html);
237         stripLinks(html);
238         stripHorizontalLines(html);
239         stripDivs(html);
240         stripTagsStyles(html);
241         stripHeadings(html);
242         stripImagens(html);
243         stripEmptyParagraphs(html);
244     }
245 }
246
247 void HtmlDocExtractor::stripAllHtml(QString *html)
248 {
249     html->remove(createMinimalExp(QLatin1String("<.*>")));
250 }
251
252 void HtmlDocExtractor::stripHeadings(QString *html)
253 {
254     html->remove(createMinimalExp(QLatin1String("<h\\d{1}.*>|</h\\d{1}>")));
255 }
256
257 void HtmlDocExtractor::stripLinks(QString *html)
258 {
259     html->remove(createMinimalExp(QLatin1String("<a\\s+.*>|</a>")));
260 }
261
262 void HtmlDocExtractor::stripHorizontalLines(QString *html)
263 {
264     html->remove(createMinimalExp(QLatin1String("<hr\\s+/>")));
265 }
266
267 void HtmlDocExtractor::stripDivs(QString *html)
268 {
269     html->remove(createMinimalExp(QLatin1String("<div\\s+.*>|</div>|<div\\s+.*/\\s*>")));
270 }
271
272 void HtmlDocExtractor::stripTagsStyles(QString *html)
273 {
274     const QRegExp &exp = createMinimalExp(QLatin1String("<(.*\\s+)class=\".*\">"));
275     html->replace(exp, QLatin1String("<\\1>"));
276 }
277
278 void HtmlDocExtractor::stripTeletypes(QString *html)
279 {
280     html->remove(QLatin1String("<tt>"));
281     html->remove(QLatin1String("</tt>"));
282 }
283
284 void HtmlDocExtractor::stripImagens(QString *html)
285 {
286     html->remove(createMinimalExp(QLatin1String("<img.*>")));
287 }
288
289 void HtmlDocExtractor::stripBold(QString *html)
290 {
291     html->remove(QLatin1String("<b>"));
292     html->remove(QLatin1String("</b>"));
293 }
294
295 void HtmlDocExtractor::stripEmptyParagraphs(QString *html)
296 {
297     html->remove(QLatin1String("<p></p>"));
298 }
299
300 void HtmlDocExtractor::replaceNonStyledHeadingsForBold(QString *html)
301 {
302     const QRegExp &hStart = createMinimalExp(QLatin1String("<h\\d{1}>"));
303     const QRegExp &hEnd = createMinimalExp(QLatin1String("</h\\d{1}>"));
304     html->replace(hStart, QLatin1String("<p><b>"));
305     html->replace(hEnd, QLatin1String("</b></p>"));
306 }
307
308 void HtmlDocExtractor::replaceTablesForSimpleLines(QString *html)
309 {
310     html->replace(createMinimalExp(QLatin1String("(?:<p>)?<table.*>")), QLatin1String("<p>"));
311     html->replace(QLatin1String("</table>"), QLatin1String("</p>"));
312     html->remove(createMinimalExp(QLatin1String("<thead.*>")));
313     html->remove(QLatin1String("</thead>"));
314     html->remove(createMinimalExp(QLatin1String("<tfoot.*>")));
315     html->remove(QLatin1String("</tfoot>"));
316     html->remove(createMinimalExp(QLatin1String("<tr.*><th.*>.*</th></tr>")));
317     html->replace(QLatin1String("</td><td"), QLatin1String("</td>&nbsp;<td"));
318     html->remove(createMinimalExp(QLatin1String("<td.*><p>")));
319     html->remove(createMinimalExp(QLatin1String("<td.*>")));
320     html->remove(createMinimalExp(QLatin1String("(?:</p>)?</td>")));
321     html->replace(createMinimalExp(QLatin1String("<tr.*>")),
322                   QLatin1String("&nbsp;&nbsp;&nbsp;&nbsp;"));
323     html->replace(QLatin1String("</tr>"), QLatin1String("<br />"));
324 }
325
326 void HtmlDocExtractor::replaceListsForSimpleLines(QString *html)
327 {
328     html->remove(createMinimalExp(QLatin1String("<(?:ul|ol).*>")));
329     html->remove(createMinimalExp(QLatin1String("</(?:ul|ol)>")));
330     html->replace(QLatin1String("<li>"), QLatin1String("&nbsp;&nbsp;&nbsp;&nbsp;"));
331     html->replace(QLatin1String("</li>"), QLatin1String("<br />"));
332 }