1 /**************************************************************************
3 ** This file is part of Qt Creator
5 ** Copyright (c) 2011 Nokia Corporation and/or its subsidiary(-ies).
7 ** Contact: Nokia Corporation (qt-info@nokia.com)
11 ** This file contains pre-release code and may not be distributed.
12 ** You may use this file in accordance with the terms and conditions
13 ** contained in the Technology Preview License Agreement accompanying
16 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 2.1 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 2.1 requirements
23 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
25 ** In addition, as a special exception, Nokia gives you certain additional
26 ** rights. These rights are described in the Nokia Qt LGPL Exception
27 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
29 ** If you have questions regarding the use of this file, please contact
30 ** Nokia at qt-info@nokia.com.
32 **************************************************************************/
34 #include "htmldocextractor.h"
36 #include <QtCore/QLatin1String>
37 #include <QtCore/QLatin1Char>
38 #include <QtCore/QStringList>
39 #include <QtCore/QRegExp>
41 using namespace Utils;
44 QRegExp createMinimalExp(const QString &pattern) {
51 HtmlDocExtractor::HtmlDocExtractor() :
52 m_formatContents(true),
53 m_mode(FirstParagraph)
56 void HtmlDocExtractor::setMode(Mode mode)
59 void HtmlDocExtractor::applyFormatting(const bool format)
60 { m_formatContents = format; }
62 QString HtmlDocExtractor::getClassOrNamespaceBrief(const QString &html, const QString &mark) const
64 QString contents = getContentsByMarks(html, mark + QLatin1String("-brief"), mark);
65 if (!contents.isEmpty() && m_formatContents)
66 contents.remove(QLatin1String("<a href=\"#details\">More...</a>"));
67 processOutput(&contents);
72 QString HtmlDocExtractor::getClassOrNamespaceDescription(const QString &html,
73 const QString &mark) const
75 if (m_mode == FirstParagraph)
76 return getClassOrNamespaceBrief(html, mark);
78 QString contents = getContentsByMarks(html, mark + QLatin1String("-description"), mark);
79 if (!contents.isEmpty() && m_formatContents)
80 contents.remove(QLatin1String("Detailed Description"));
81 processOutput(&contents);
86 QString HtmlDocExtractor::getEnumDescription(const QString &html, const QString &mark) const
88 return getClassOrNamespaceMemberDescription(html, mark, mark);
91 QString HtmlDocExtractor::getTypedefDescription(const QString &html, const QString &mark) const
93 return getClassOrNamespaceMemberDescription(html, mark, mark);
96 QString HtmlDocExtractor::getMacroDescription(const QString &html,
97 const QString &mark) const
99 return getClassOrNamespaceMemberDescription(html, mark, mark);
102 QString HtmlDocExtractor::getFunctionDescription(const QString &html,
104 const bool mainOverload) const
106 QString cleanMark = mark;
107 QString startMark = mark;
108 const int parenthesis = mark.indexOf(QLatin1Char('('));
109 if (parenthesis != -1) {
110 startMark = mark.left(parenthesis);
111 cleanMark = startMark;
113 startMark.append(QLatin1String("[overload1]"));
115 QString complement = mark.right(mark.length() - parenthesis);
116 complement.remove(QRegExp(QLatin1String("[\\(\\), ]")));
117 startMark.append(complement);
121 QString contents = getClassOrNamespaceMemberDescription(html, startMark, cleanMark);
122 if (contents.isEmpty()) {
123 // Maybe this is a property function, which is documented differently. Besides
124 // setX/isX/hasX there are other (not so usual) names for them. A few examples of those:
125 // - toPlainText / Prop. plainText from QPlainTextEdit.
126 // - resize / Prop. size from QWidget.
127 // - move / Prop. pos from QWidget (nothing similar in the names in this case).
128 // So I try to find the link to this property in the list of properties, extract its
129 // anchor and then follow by the name found.
130 const QString &pattern =
131 QString(QLatin1String("<a href=\"[a-z\\.]+#([A-Za-z]+)-prop\">%1</a>")).arg(cleanMark);
132 QRegExp exp = createMinimalExp(pattern);
133 if (exp.indexIn(html) != -1) {
134 const QString &prop = exp.cap(1);
135 contents = getClassOrNamespaceMemberDescription(html,
136 prop + QLatin1String("-prop"),
144 QString HtmlDocExtractor::getQmlComponentDescription(const QString &html, const QString &mark) const
146 return getClassOrNamespaceDescription(html, mark);
149 QString HtmlDocExtractor::getQmlPropertyDescription(const QString &html, const QString &mark) const
151 QString startMark = QString("<a name=\"%1-prop\">").arg(mark);
152 int index = html.indexOf(startMark);
154 startMark = QString("<a name=\"%1-signal\">").arg(mark);
155 index = html.indexOf(startMark);
160 QString contents = html.mid(index + startMark.size());
161 index = contents.indexOf(QLatin1String("<p>"));
164 contents = contents.mid(index);
165 processOutput(&contents);
170 QString HtmlDocExtractor::getClassOrNamespaceMemberDescription(const QString &html,
171 const QString &startMark,
172 const QString &endMark) const
174 QString contents = getContentsByMarks(html, startMark, endMark);
175 processOutput(&contents);
180 QString HtmlDocExtractor::getContentsByMarks(const QString &html,
182 QString endMark) const
184 startMark.prepend(QLatin1String("$$$"));
185 endMark.prepend(QLatin1String("<!-- @@@"));
188 int start = html.indexOf(startMark);
190 start = html.indexOf(QLatin1String("-->"), start);
192 int end = html.indexOf(endMark, start);
195 contents = html.mid(start, end - start);
202 void HtmlDocExtractor::processOutput(QString *html) const
207 if (m_mode == FirstParagraph) {
208 // Try to get the entire first paragraph, but if one is not found or if its opening
209 // tag is not in the very beginning (using an empirical value as the limit) the html
210 // is cleared to avoid too much content.
211 int index = html->indexOf(QLatin1String("<p>"));
212 if (index != -1 && index < 400) {
213 index = html->indexOf(QLatin1String("</p>"), index + 3);
215 // Most paragraphs end with a period, but there are cases without punctuation
216 // and cases like this: <p>This is a description. Example:</p>
217 const int period = html->lastIndexOf(QLatin1Char('.'), index);
219 html->truncate(period + 1);
220 html->append(QLatin1String("</p>"));
222 html->truncate(index + 4);
232 if (!html->isEmpty() && m_formatContents) {
234 replaceNonStyledHeadingsForBold(html);
235 replaceTablesForSimpleLines(html);
236 replaceListsForSimpleLines(html);
238 stripHorizontalLines(html);
240 stripTagsStyles(html);
243 stripEmptyParagraphs(html);
247 void HtmlDocExtractor::stripAllHtml(QString *html)
249 html->remove(createMinimalExp(QLatin1String("<.*>")));
252 void HtmlDocExtractor::stripHeadings(QString *html)
254 html->remove(createMinimalExp(QLatin1String("<h\\d{1}.*>|</h\\d{1}>")));
257 void HtmlDocExtractor::stripLinks(QString *html)
259 html->remove(createMinimalExp(QLatin1String("<a\\s+.*>|</a>")));
262 void HtmlDocExtractor::stripHorizontalLines(QString *html)
264 html->remove(createMinimalExp(QLatin1String("<hr\\s+/>")));
267 void HtmlDocExtractor::stripDivs(QString *html)
269 html->remove(createMinimalExp(QLatin1String("<div\\s+.*>|</div>|<div\\s+.*/\\s*>")));
272 void HtmlDocExtractor::stripTagsStyles(QString *html)
274 const QRegExp &exp = createMinimalExp(QLatin1String("<(.*\\s+)class=\".*\">"));
275 html->replace(exp, QLatin1String("<\\1>"));
278 void HtmlDocExtractor::stripTeletypes(QString *html)
280 html->remove(QLatin1String("<tt>"));
281 html->remove(QLatin1String("</tt>"));
284 void HtmlDocExtractor::stripImagens(QString *html)
286 html->remove(createMinimalExp(QLatin1String("<img.*>")));
289 void HtmlDocExtractor::stripBold(QString *html)
291 html->remove(QLatin1String("<b>"));
292 html->remove(QLatin1String("</b>"));
295 void HtmlDocExtractor::stripEmptyParagraphs(QString *html)
297 html->remove(QLatin1String("<p></p>"));
300 void HtmlDocExtractor::replaceNonStyledHeadingsForBold(QString *html)
302 const QRegExp &hStart = createMinimalExp(QLatin1String("<h\\d{1}>"));
303 const QRegExp &hEnd = createMinimalExp(QLatin1String("</h\\d{1}>"));
304 html->replace(hStart, QLatin1String("<p><b>"));
305 html->replace(hEnd, QLatin1String("</b></p>"));
308 void HtmlDocExtractor::replaceTablesForSimpleLines(QString *html)
310 html->replace(createMinimalExp(QLatin1String("(?:<p>)?<table.*>")), QLatin1String("<p>"));
311 html->replace(QLatin1String("</table>"), QLatin1String("</p>"));
312 html->remove(createMinimalExp(QLatin1String("<thead.*>")));
313 html->remove(QLatin1String("</thead>"));
314 html->remove(createMinimalExp(QLatin1String("<tfoot.*>")));
315 html->remove(QLatin1String("</tfoot>"));
316 html->remove(createMinimalExp(QLatin1String("<tr.*><th.*>.*</th></tr>")));
317 html->replace(QLatin1String("</td><td"), QLatin1String("</td> <td"));
318 html->remove(createMinimalExp(QLatin1String("<td.*><p>")));
319 html->remove(createMinimalExp(QLatin1String("<td.*>")));
320 html->remove(createMinimalExp(QLatin1String("(?:</p>)?</td>")));
321 html->replace(createMinimalExp(QLatin1String("<tr.*>")),
322 QLatin1String(" "));
323 html->replace(QLatin1String("</tr>"), QLatin1String("<br />"));
326 void HtmlDocExtractor::replaceListsForSimpleLines(QString *html)
328 html->remove(createMinimalExp(QLatin1String("<(?:ul|ol).*>")));
329 html->remove(createMinimalExp(QLatin1String("</(?:ul|ol)>")));
330 html->replace(QLatin1String("<li>"), QLatin1String(" "));
331 html->replace(QLatin1String("</li>"), QLatin1String("<br />"));