1 /**************************************************************************
3 ** This file is part of Qt Creator
5 ** Copyright (c) 2011 Nokia Corporation and/or its subsidiary(-ies).
7 ** Contact: Nokia Corporation (info@qt.nokia.com)
10 ** GNU Lesser General Public License Usage
12 ** This file may be used under the terms of the GNU Lesser General Public
13 ** License version 2.1 as published by the Free Software Foundation and
14 ** appearing in the file LICENSE.LGPL included in the packaging of this file.
15 ** Please review the following information to ensure the GNU Lesser General
16 ** Public License version 2.1 requirements will be met:
17 ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
19 ** In addition, as a special exception, Nokia gives you certain additional
20 ** rights. These rights are described in the Nokia Qt LGPL Exception
21 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
25 ** Alternatively, this file may be used in accordance with the terms and
26 ** conditions contained in a signed written agreement between you and Nokia.
28 ** If you have questions regarding the use of this file, please contact
29 ** Nokia at qt-info@nokia.com.
31 **************************************************************************/
35 #include <QtCore/QByteArray>
37 const char cgi_chars[] = "0123456789abcdef"; // RFC 1738 suggests lower-case to be optimal
39 QString CGI::encodeURL(const QString &rawText)
41 QByteArray utf = rawText.toUtf8();
43 enc.reserve(utf.length()); // Make sure we at least have space for a normal US-ASCII URL
45 QByteArray::const_iterator it = utf.constBegin();
46 while (it != utf.constEnd()) {
48 if (('A' <= ch && ch <= 'Z')
49 || ('a' <= ch && ch <= 'z')
50 || ('0' <= ch && ch <= '9'))
64 ushort c1 = (*it & 0xF0) >> 4;
65 ushort c2 = (*it & 0x0F);
67 enc.append(QChar(*(cgi_chars + c1)));
68 enc.append(QChar(*(cgi_chars + c2)));
77 QString CGI::decodeURL(const QString &urlText)
80 QString::const_iterator it = urlText.constBegin();
81 while (it != urlText.constEnd()) {
82 ushort ch = (*it).unicode();
86 char c1 = char(0x00ff & (*(++it)).unicode());
87 char c2 = char(0x00ff & (*(++it)).unicode());
89 if ('A' <= c1 && c1 <= 'Z')
91 else if ('a' <= c1 && c1 <= 'z')
93 else if ('0' <= c1 && c1 <= '9')
96 continue; // Malformed URL!
97 v <<= 4; // c1 was MSB half
98 if ('A' <= c2 && c2 <= 'Z')
100 else if ('a' <= c2 && c2 <= 'z')
102 else if ('0' <= c2 && c2 <= '9')
105 continue; // Malformed URL!
116 // should not happen with proper URLs but stay on the safe side
117 dec.append(QString(*it).toUtf8());
123 return QString::fromUtf8(dec.constData(), dec.length());
126 // -------------------------------------------------------------------------------------------------
127 inline const char *unicodeToHTML(ushort unicode_char)
129 switch (unicode_char) {
130 // Latin -------------------------------
131 case 0x0022: return "quot"; // (34 ) quotation mark = APL quote
132 case 0x0026: return "amp"; // (38 ) ampersand
133 case 0x003C: return "lt"; // (60 ) less-than sign
134 case 0x003E: return "gt"; // (62 ) greater-than sign
135 case 0x00A0: return "nbsp"; // (160 ) no-break space = non-breaking space
136 case 0x00A1: return "iexcl"; // (161 ) inverted exclamation mark
137 case 0x00A2: return "cent"; // (162 ) cent sign
138 case 0x00A3: return "pound"; // (163 ) pound sign
139 case 0x00A4: return "curren"; // (164 ) currency sign
140 case 0x00A5: return "yen"; // (165 ) yen sign = yuan sign
141 case 0x00A6: return "brvbar"; // (166 ) broken bar = broken vertical bar
142 case 0x00A7: return "sect"; // (167 ) section sign
143 case 0x00A8: return "uml"; // (168 ) diaeresis = spacing diaeresis
144 case 0x00A9: return "copy"; // (169 ) copyright sign
145 case 0x00AA: return "ordf"; // (170 ) feminine ordinal indicator
146 case 0x00AB: return "laquo"; // (171 ) left-pointing double angle quotation mark = left pointing guillemet
147 case 0x00AC: return "not"; // (172 ) not sign
148 case 0x00AD: return "shy"; // (173 ) soft hyphen = discretionary hyphen
149 case 0x00AE: return "reg"; // (174 ) registered sign = registered trade mark sign
150 case 0x00AF: return "macr"; // (175 ) macron = spacing macron = overline = APL overbar
151 case 0x00B0: return "deg"; // (176 ) degree sign
152 case 0x00B1: return "plusmn"; // (177 ) plus-minus sign = plus-or-minus sign
153 case 0x00B2: return "sup2"; // (178 ) superscript two = superscript digit two = squared
154 case 0x00B3: return "sup3"; // (179 ) superscript three = superscript digit three = cubed
155 case 0x00B4: return "acute"; // (180 ) acute accent = spacing acute
156 case 0x00B5: return "micro"; // (181 ) micro sign
157 case 0x00B6: return "para"; // (182 ) pilcrow sign = paragraph sign
158 case 0x00B7: return "middot"; // (183 ) middle dot = Georgian comma = Greek middle dot
159 case 0x00B8: return "cedil"; // (184 ) cedilla = spacing cedilla
160 case 0x00B9: return "sup1"; // (185 ) superscript one = superscript digit one
161 case 0x00BA: return "ordm"; // (186 ) masculine ordinal indicator
162 case 0x00BB: return "raquo"; // (187 ) right-pointing double angle quotation mark = right pointing guillemet
163 case 0x00BC: return "frac14"; // (188 ) vulgar fraction one quarter = fraction one quarter
164 case 0x00BD: return "frac12"; // (189 ) vulgar fraction one half = fraction one half
165 case 0x00BE: return "frac34"; // (190 ) vulgar fraction three quarters = fraction three quarters
166 case 0x00BF: return "iquest"; // (191 ) inverted question mark = turned question mark
167 case 0x00C0: return "Agrave"; // (192 ) capital letter A with grave = capital letter
168 case 0x00C1: return "Aacute"; // (193 ) capital letter A with acute
169 case 0x00C2: return "Acirc"; // (194 ) capital letter A with circumflex
170 case 0x00C3: return "Atilde"; // (195 ) capital letter A with tilde
171 case 0x00C4: return "Auml"; // (196 ) capital letter A with diaeresis
172 case 0x00C5: return "Aring"; // (197 ) capital letter A with ring above = capital letter
173 case 0x00C6: return "AElig"; // (198 ) capital letter AE = capital ligature
174 case 0x00C7: return "Ccedil"; // (199 ) capital letter C with cedilla
175 case 0x00C8: return "Egrave"; // (200 ) capital letter E with grave
176 case 0x00C9: return "Eacute"; // (201 ) capital letter E with acute
177 case 0x00CA: return "Ecirc"; // (202 ) capital letter E with circumflex
178 case 0x00CB: return "Euml"; // (203 ) capital letter E with diaeresis
179 case 0x00CC: return "Igrave"; // (204 ) capital letter I with grave
180 case 0x00CD: return "Iacute"; // (205 ) capital letter I with acute
181 case 0x00CE: return "Icirc"; // (206 ) capital letter I with circumflex
182 case 0x00CF: return "Iuml"; // (207 ) capital letter I with diaeresis
183 case 0x00D0: return "ETH"; // (208 ) capital letter ETH
184 case 0x00D1: return "Ntilde"; // (209 ) capital letter N with tilde
185 case 0x00D2: return "Ograve"; // (210 ) capital letter O with grave
186 case 0x00D3: return "Oacute"; // (211 ) capital letter O with acute
187 case 0x00D4: return "Ocirc"; // (212 ) capital letter O with circumflex
188 case 0x00D5: return "Otilde"; // (213 ) capital letter O with tilde
189 case 0x00D6: return "Ouml"; // (214 ) capital letter O with diaeresis
190 case 0x00D7: return "times"; // (215 ) multiplication sign
191 case 0x00D8: return "Oslash"; // (216 ) capital letter O with stroke = capital letter
192 case 0x00D9: return "Ugrave"; // (217 ) capital letter U with grave
193 case 0x00DA: return "Uacute"; // (218 ) capital letter U with acute
194 case 0x00DB: return "Ucirc"; // (219 ) capital letter U with circumflex
195 case 0x00DC: return "Uuml"; // (220 ) capital letter U with diaeresis
196 case 0x00DD: return "Yacute"; // (221 ) capital letter Y with acute
197 case 0x00DE: return "THORN"; // (222 ) capital letter THORN
198 case 0x00DF: return "szlig"; // (223 ) small letter sharp s = ess-zed
199 case 0x00E0: return "agrave"; // (224 ) small letter a with grave = small letter
200 case 0x00E1: return "aacute"; // (225 ) small letter a with acute
201 case 0x00E2: return "acirc"; // (226 ) small letter a with circumflex
202 case 0x00E3: return "atilde"; // (227 ) small letter a with tilde
203 case 0x00E4: return "auml"; // (228 ) small letter a with diaeresis
204 case 0x00E5: return "aring"; // (229 ) small letter a with ring above = small letter
205 case 0x00E6: return "aelig"; // (230 ) small letter ae = small letter
206 case 0x00E7: return "ccedil"; // (231 ) small letter c with cedilla
207 case 0x00E8: return "egrave"; // (232 ) small letter e with grave
208 case 0x00E9: return "eacute"; // (233 ) small letter e with acute
209 case 0x00EA: return "ecirc"; // (234 ) small letter e with circumflex
210 case 0x00EB: return "euml"; // (235 ) small letter e with diaeresis
211 case 0x00EC: return "igrave"; // (236 ) small letter i with grave
212 case 0x00ED: return "iacute"; // (237 ) small letter i with acute
213 case 0x00EE: return "icirc"; // (238 ) small letter i with circumflex
214 case 0x00EF: return "iuml"; // (239 ) small letter i with diaeresis
215 case 0x00F0: return "eth"; // (240 ) small letter eth
216 case 0x00F1: return "ntilde"; // (241 ) small letter n with tilde
217 case 0x00F2: return "ograve"; // (242 ) small letter o with grave
218 case 0x00F3: return "oacute"; // (243 ) small letter o with acute
219 case 0x00F4: return "ocirc"; // (244 ) small letter o with circumflex
220 case 0x00F5: return "otilde"; // (245 ) small letter o with tilde
221 case 0x00F6: return "ouml"; // (246 ) small letter o with diaeresis
222 case 0x00F7: return "divide"; // (247 ) division sign
223 case 0x00F8: return "oslash"; // (248 ) small letter o with stroke = small letter
224 case 0x00F9: return "ugrave"; // (249 ) small letter u with grave
225 case 0x00FA: return "uacute"; // (250 ) small letter u with acute
226 case 0x00FB: return "ucirc"; // (251 ) small letter u with circumflex
227 case 0x00FC: return "uuml"; // (252 ) small letter u with diaeresis
228 case 0x00FD: return "yacute"; // (253 ) small letter y with acute
229 case 0x00FE: return "thorn"; // (254 ) small letter thorn
230 case 0x00FF: return "yuml"; // (255 ) small letter y with diaeresis
231 case 0x0152: return "OElig"; // (338 ) capital ligature OE
232 case 0x0153: return "oelig"; // (339 ) small ligature oe
233 case 0x0160: return "Scaron"; // (352 ) capital letter S with caron
234 case 0x0161: return "scaron"; // (353 ) small letter s with caron
235 case 0x0178: return "Yuml"; // (376 ) capital letter Y with diaeresis
236 case 0x0192: return "fnof"; // (402 ) small f with hook = function = florin
237 case 0x02C6: return "circ"; // (710 ) modifier letter circumflex accent
238 case 0x02DC: return "tilde"; // (732 ) small tilde
239 // Greek -------------------------------
240 case 0x0391: return "Alpha"; // (913 ) capital letter alpha
241 case 0x0392: return "Beta"; // (914 ) capital letter beta
242 case 0x0393: return "Gamma"; // (915 ) capital letter gamma
243 case 0x0394: return "Delta"; // (916 ) capital letter delta
244 case 0x0395: return "Epsilon"; // (917 ) capital letter epsilon
245 case 0x0396: return "Zeta"; // (918 ) capital letter zeta
246 case 0x0397: return "Eta"; // (919 ) capital letter eta
247 case 0x0398: return "Theta"; // (920 ) capital letter theta
248 case 0x0399: return "Iota"; // (921 ) capital letter iota
249 case 0x039A: return "Kappa"; // (922 ) capital letter kappa
250 case 0x039B: return "Lambda"; // (923 ) capital letter lambda
251 case 0x039C: return "Mu"; // (924 ) capital letter mu
252 case 0x039D: return "Nu"; // (925 ) capital letter nu
253 case 0x039E: return "Xi"; // (926 ) capital letter xi
254 case 0x039F: return "Omicron"; // (927 ) capital letter omicron
255 case 0x03A0: return "Pi"; // (928 ) capital letter pi
256 case 0x03A1: return "Rho"; // (929 ) capital letter rho
257 case 0x03A3: return "Sigma"; // (931 ) capital letter sigma
258 case 0x03A4: return "Tau"; // (932 ) capital letter tau
259 case 0x03A5: return "Upsilon"; // (933 ) capital letter upsilon
260 case 0x03A6: return "Phi"; // (934 ) capital letter phi
261 case 0x03A7: return "Chi"; // (935 ) capital letter chi
262 case 0x03A8: return "Psi"; // (936 ) capital letter psi
263 case 0x03A9: return "Omega"; // (937 ) capital letter omega
264 case 0x03B1: return "alpha"; // (945 ) small letter alpha
265 case 0x03B2: return "beta"; // (946 ) small letter beta
266 case 0x03B3: return "gamma"; // (947 ) small letter gamma
267 case 0x03B4: return "delta"; // (948 ) small letter delta
268 case 0x03B5: return "epsilon"; // (949 ) small letter epsilon
269 case 0x03B6: return "zeta"; // (950 ) small letter zeta
270 case 0x03B7: return "eta"; // (951 ) small letter eta
271 case 0x03B8: return "theta"; // (952 ) small letter theta
272 case 0x03B9: return "iota"; // (953 ) small letter iota
273 case 0x03BA: return "kappa"; // (954 ) small letter kappa
274 case 0x03BB: return "lambda"; // (955 ) small letter lambda
275 case 0x03BC: return "mu"; // (956 ) small letter mu
276 case 0x03BD: return "nu"; // (957 ) small letter nu
277 case 0x03BE: return "xi"; // (958 ) small letter xi
278 case 0x03BF: return "omicron"; // (959 ) small letter omicron
279 case 0x03C0: return "pi"; // (960 ) small letter pi
280 case 0x03C1: return "rho"; // (961 ) small letter rho
281 case 0x03C2: return "sigmaf"; // (962 ) small letter final sigma
282 case 0x03C3: return "sigma"; // (963 ) small letter sigma
283 case 0x03C4: return "tau"; // (964 ) small letter tau
284 case 0x03C5: return "upsilon"; // (965 ) small letter upsilon
285 case 0x03C6: return "phi"; // (966 ) small letter phi
286 case 0x03C7: return "chi"; // (967 ) small letter chi
287 case 0x03C8: return "psi"; // (968 ) small letter psi
288 case 0x03C9: return "omega"; // (969 ) small letter omega
289 case 0x03D1: return "thetasym";// (977 ) small letter theta symbol
290 case 0x03D2: return "upsih"; // (978 ) upsilon with hook symbol
291 case 0x03D6: return "piv"; // (982 ) pi symbol
292 // General Punctuation -----------------
293 case 0x2002: return "ensp"; // (8194) en space
294 case 0x2003: return "emsp"; // (8195) em space
295 case 0x2009: return "thinsp"; // (8201) thin space
296 case 0x200C: return "zwnj"; // (8204) zero width non-joiner
297 case 0x200D: return "zwj"; // (8205) zero width joiner
298 case 0x200E: return "lrm"; // (8206) left-to-right mark
299 case 0x200F: return "rlm"; // (8207) right-to-left mark
300 case 0x2013: return "ndash"; // (8211) en dash
301 case 0x2014: return "mdash"; // (8212) em dash
302 case 0x2018: return "lsquo"; // (8216) left single quotation mark
303 case 0x2019: return "rsquo"; // (8217) right single quotation mark
304 case 0x201A: return "sbquo"; // (8218) single low-9 quotation mark
305 case 0x201C: return "ldquo"; // (8220) left double quotation mark
306 case 0x201D: return "rdquo"; // (8221) right double quotation mark
307 case 0x201E: return "bdquo"; // (8222) double low-9 quotation mark
308 case 0x2020: return "dagger"; // (8224) dagger
309 case 0x2021: return "Dagger"; // (8225) double dagger
310 case 0x2022: return "bull"; // (8226) bullet = black small circle
311 case 0x2026: return "hellip"; // (8230) horizontal ellipsis = three dot leader
312 case 0x2030: return "permil"; // (8240) per mille sign
313 case 0x2032: return "prime"; // (8242) prime = minutes = feet
314 case 0x2033: return "Prime"; // (8243) double prime = seconds = inches
315 case 0x2039: return "lsaquo"; // (8249) single left-pointing angle quotation mark
316 case 0x203A: return "rsaquo"; // (8250) single right-pointing angle quotation mark
317 case 0x203E: return "oline"; // (8254) overline = spacing overscore
318 case 0x2044: return "frasl"; // (8260) fraction slash
319 // Currency Symbols --------------------
320 case 0x20AC: return "euro"; // (8364) euro sign
321 // Letterlike Symbols ------------------
322 case 0x2111: return "image"; // (8465) blackletter capital I = imaginary part
323 case 0x2118: return "weierp"; // (8472) script capital P = power set = Weierstrass p
324 case 0x211C: return "real"; // (8476) blackletter capital R = real part symbol
325 case 0x2122: return "trade"; // (8482) trade mark sign
326 case 0x2135: return "alefsym"; // (8501) alef symbol = first transfinite cardinal
327 // Arrows ------------------------------
328 case 0x2190: return "larr"; // (8592) leftwards arrow
329 case 0x2191: return "uarr"; // (8593) upwards arrow
330 case 0x2192: return "rarr"; // (8594) rightwards arrow
331 case 0x2193: return "darr"; // (8595) downwards arrow
332 case 0x2194: return "harr"; // (8596) left right arrow
333 case 0x21B5: return "crarr"; // (8629) downwards arrow with corner leftwards = carriage return
334 case 0x21D0: return "lArr"; // (8656) leftwards double arrow
335 case 0x21D1: return "uArr"; // (8657) upwards double arrow
336 case 0x21D2: return "rArr"; // (8658) rightwards double arrow
337 case 0x21D3: return "dArr"; // (8659) downwards double arrow
338 case 0x21D4: return "hArr"; // (8660) left right double arrow
339 // Mathematical Operators --------------
340 case 0x2200: return "forall"; // (8704) for all
341 case 0x2202: return "part"; // (8706) partial differential
342 case 0x2203: return "exist"; // (8707) there exists
343 case 0x2205: return "empty"; // (8709) empty set = null set = diameter
344 case 0x2207: return "nabla"; // (8711) nabla = backward difference
345 case 0x2208: return "isin"; // (8712) element of
346 case 0x2209: return "notin"; // (8713) not an element of
347 case 0x220B: return "ni"; // (8715) contains as member
348 case 0x220F: return "prod"; // (8719) n-ary product = product sign
349 case 0x2211: return "sum"; // (8721) n-ary sumation
350 case 0x2212: return "minus"; // (8722) minus sign
351 case 0x2217: return "lowast"; // (8727) asterisk operator
352 case 0x221A: return "radic"; // (8730) square root = radical sign
353 case 0x221D: return "prop"; // (8733) proportional to
354 case 0x221E: return "infin"; // (8734) infinity
355 case 0x2220: return "ang"; // (8736) angle
356 case 0x2227: return "and"; // (8743) logical and = wedge
357 case 0x2228: return "or"; // (8744) logical or = vee
358 case 0x2229: return "cap"; // (8745) intersection = cap
359 case 0x222A: return "cup"; // (8746) union = cup
360 case 0x222B: return "int"; // (8747) integral
361 case 0x2234: return "there4"; // (8756) therefore
362 case 0x223C: return "sim"; // (8764) tilde operator = varies with = similar to
363 case 0x2245: return "cong"; // (8773) approximately equal to
364 case 0x2248: return "asymp"; // (8776) almost equal to = asymptotic to
365 case 0x2260: return "ne"; // (8800) not equal to
366 case 0x2261: return "equiv"; // (8801) identical to
367 case 0x2264: return "le"; // (8804) less-than or equal to
368 case 0x2265: return "ge"; // (8805) greater-than or equal to
369 case 0x2282: return "sub"; // (8834) subset of
370 case 0x2283: return "sup"; // (8835) superset of
371 case 0x2284: return "nsub"; // (8836) not a subset of
372 case 0x2286: return "sube"; // (8838) subset of or equal to
373 case 0x2287: return "supe"; // (8839) superset of or equal to
374 case 0x2295: return "oplus"; // (8853) circled plus = direct sum
375 case 0x2297: return "otimes"; // (8855) circled times = vector product
376 case 0x22A5: return "perp"; // (8869) up tack = orthogonal to = perpendicular
377 case 0x22C5: return "sdot"; // (8901) dot operator
378 // Miscellaneous Technical -------------
379 case 0x2308: return "lceil"; // (8968) left ceiling = apl upstile
380 case 0x2309: return "rceil"; // (8969) right ceiling
381 case 0x230A: return "lfloor"; // (8970) left floor = apl downstile
382 case 0x230B: return "rfloor"; // (8971) right floor
383 case 0x2329: return "lang"; // (9001) left-pointing angle bracket = bra
384 case 0x232A: return "rang"; // (9002) right-pointing angle bracket = ket
385 // Geometric Shapes --------------------
386 case 0x25CA: return "loz"; // (9674) lozenge
387 // Miscellaneous Symbols ---------------
388 case 0x2660: return "spades"; // (9824) black spade suit
389 case 0x2663: return "clubs"; // (9827) black club suit = shamrock
390 case 0x2665: return "hearts"; // (9829) black heart suit = valentine
391 case 0x2666: return "diams"; // (9830) black diamond suit
397 QString CGI::encodeHTML(const QString &rawText, int conversionFlags)
400 enc.reserve(rawText.length()); // at least
402 QString::const_iterator it = rawText.constBegin();
403 while (it != rawText.constEnd()) {
404 const char *html = unicodeToHTML((*it).unicode());
409 } else if ((conversionFlags & CGI::LineBreaks)
410 && ((*it).toLatin1() == '\n')) {
411 enc.append("<BR>\n");
412 } else if ((conversionFlags & CGI::Spaces)
413 && ((*it).toLatin1() == ' ')) {
414 enc.append(" ");
415 } else if ((conversionFlags & CGI::Tabs)
416 && ((*it).toLatin1() == '\t')) {
417 enc.append(" ");
418 } else if ((*it).unicode() > 0x00FF) {
420 enc.append(QString::number((*it).unicode()));