2 * Copyright (C) 2005-2009 Atsushi Konno All rights reserved.
3 * Copyright (C) 2005 QSDN,Inc. All rights reserved.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #include "chxj_encoding.h"
19 #include "chxj_apply_convrule.h"
20 #include "chxj_url_encode.h"
21 #include "chxj_dump_string.h"
27 chxj_encoding(request_rec *r, const char *src, apr_size_t *len)
37 mod_chxj_config *dconf;
38 chxjconvrule_entry *entryp;
42 DBG(r,"REQ[%X] start chxj_encoding()", (unsigned int)(apr_size_t)r);
44 dconf = chxj_get_module_config(r->per_dir_config, &chxj_module);
47 DBG(r,"none encoding.");
48 DBG(r,"REQ[%X] end chxj_encoding()", (unsigned int)(apr_size_t)r);
52 ERR(r, "runtime exception: chxj_encoding(): invalid string size.[%d]", (int)*len);
53 DBG(r,"REQ[%X] end chxj_encoding()", (unsigned int)(apr_size_t)r);
54 return (char *)apr_pstrdup(r->pool, "");
57 entryp = chxj_apply_convrule(r, dconf->convrules);
58 if (entryp->encoding == NULL) {
59 DBG(r,"REQ[%X] none encoding.", (unsigned int)(apr_size_t)r);
60 DBG(r,"REQ[%X] end chxj_encoding()", (unsigned int)(apr_size_t)r);
64 if (STRCASEEQ('n','N',"none", entryp->encoding)) {
65 DBG(r,"REQ[%X] none encoding.", (unsigned int)(apr_size_t)r);
66 DBG(r,"REQ[%X] end chxj_encoding()", (unsigned int)(apr_size_t)r);
70 apr_pool_create(&pool, r->pool);
72 ibuf = apr_palloc(pool, ilen+1);
74 ERR(r, "runtime exception: chxj_encoding(): Out of memory.");
75 DBG(r,"REQ[%X] end chxj_encoding()", (unsigned int)(apr_size_t)r);
78 memset(ibuf, 0, ilen+1);
79 memcpy(ibuf, src, ilen);
82 spos = obuf = apr_palloc(pool, olen);
84 DBG(r,"REQ[%X] end chxj_encoding()", (unsigned int)(apr_size_t)r);
87 DBG(r,"REQ[%X] encode convert [%s] -> [%s]", (unsigned int)(apr_size_t)r, entryp->encoding, "CP932");
89 memset(obuf, 0, olen);
90 cd = iconv_open("CP932", entryp->encoding);
91 if (cd == (iconv_t)-1) {
92 if (EINVAL == errno) {
93 ERR(r, "The conversion from %s to %s is not supported by the implementation.", entryp->encoding, "CP932");
96 ERR(r, "iconv open failed. from:[%s] to:[%s] errno:[%d]", entryp->encoding, "CP932", errno);
98 DBG(r,"REQ[%X] end chxj_encoding()", (unsigned int)(apr_size_t)r);
102 result = iconv(cd, &ibuf, &ilen, &obuf, &olen);
103 if (result == (size_t)(-1)) {
104 if (E2BIG == errno) {
105 ERR(r, "There is not sufficient room at *outbuf.");
108 else if (EILSEQ == errno) {
109 ERR(r, "%s:%d An invalid multibyte sequence has been encountered in the input. input:[%s]", __FILE__,__LINE__,ibuf);
110 chxj_convert_illegal_charactor_sequence(r, entryp, &ibuf, &ilen, &obuf, &olen);
112 else if (EINVAL == errno) {
113 ERR(r, "An incomplete multibyte sequence has been encountered in the input. input:[%s]", ibuf);
121 chxj_dump_string(r, APLOG_MARK, "RESULT Convert Encoding", spos, *len);
122 DBG(r,"REQ[%X] end chxj_encoding()", (unsigned int)(apr_size_t)r);
128 chxj_convert_illegal_charactor_sequence(request_rec *r, chxjconvrule_entry *entryp, char **ibuf, apr_size_t *ilen, char **obuf, apr_size_t *olen)
130 if (STRCASEEQ('u','U',"UTF-8", entryp->encoding) || STRCASEEQ('u','U',"UTF8", entryp->encoding)) {
131 if ((0xe0 & **ibuf) == 0xc0) {
132 /* 2byte charactor */
137 DBG(r, "passed 2byte.");
139 else if ((0xf0 & **ibuf) == 0xe0) {
140 /* 3byte charactor */
145 DBG(r, "passed 3byte.");
147 else if ((0xf8 & **ibuf) == 0xf0) {
148 /* 4byte charactor */
153 DBG(r, "passed 4byte.");
155 else if ((0xc0 & **ibuf) == 0x80) {
156 /* 1byte charactor */
161 DBG(r, "passed 1byte.");
164 /* unknown charactor */
169 DBG(r, "passed 1byte.");
172 else if (STRCASEEQ('e','E', "EUCJP", entryp->encoding)
173 || STRCASEEQ('c','C', "CSEUCPKDFMTJAPANESE", entryp->encoding)
174 || STRCASEEQ('e','E', "EUC-JISX0213", entryp->encoding)
175 || STRCASEEQ('e','E', "EUC-JP-MS", entryp->encoding)
176 || STRCASEEQ('e','E', "EUC-JP", entryp->encoding)
177 || STRCASEEQ('e','E', "EUCJP-MS", entryp->encoding)
178 || STRCASEEQ('e','E', "EUCJP-OPEN", entryp->encoding)
179 || STRCASEEQ('e','E', "EUCJP-WIN", entryp->encoding)
180 || STRCASEEQ('e','E', "EUCJP", entryp->encoding)) {
181 if ((unsigned char)**ibuf == 0x8F) {
182 /* 3byte charactor */
187 DBG(r, "passed 3byte.");
190 /* 2byte charactor */
195 DBG(r, "passed 2byte.");
198 else if (STRCASEEQ('c', 'C', "CP932", entryp->encoding)
199 || STRCASEEQ('c', 'C', "CSIBM932", entryp->encoding)
200 || STRCASEEQ('i', 'I', "IBM-932", entryp->encoding)
201 || STRCASEEQ('i', 'I', "IBM932", entryp->encoding)
202 || STRCASEEQ('m', 'M', "MS932", entryp->encoding)
203 || STRCASEEQ('m', 'M', "MS_KANJI", entryp->encoding)
204 || STRCASEEQ('s', 'S', "SJIS-OPEN", entryp->encoding)
205 || STRCASEEQ('s', 'S', "SJIS-WIN", entryp->encoding)
206 || STRCASEEQ('s', 'S', "SJIS", entryp->encoding)) {
207 if ( ( ((0x81 <= (unsigned char)**ibuf) && (0x9f >= (unsigned char)**ibuf))
208 || ((0xe0 <= (unsigned char)**ibuf) && (0xfc >= (unsigned char)**ibuf)))
210 ( ((0x40 <= (unsigned char)*((*ibuf)+1)) && (0x7e >= (unsigned char)*((*ibuf)+1)))
211 ||((0x80 <= (unsigned char)*((*ibuf)+1)) && (0xfc >= (unsigned char)*((*ibuf)+1))))) {
212 /* 2byte charactor */
217 DBG(r, "passed 2byte.");
220 /* 1byte charactor */
225 DBG(r, "passed 1byte.");
229 /* unknown 1byte charactor */
234 DBG(r, "passed 1byte.");
237 *ilen = strlen(*ibuf);
238 DBG(r, "new len = [%" APR_SIZE_T_FMT "].", (apr_size_t)*ilen);
244 chxj_rencoding(request_rec *r, const char *src, apr_size_t *len)
254 mod_chxj_config *dconf;
255 chxjconvrule_entry *entryp;
257 DBG(r,"REQ[%X] start chxj_rencoding()", (unsigned int)(apr_size_t)r);
260 ERR(r, "runtime exception: chxj_rencoding(): invalid string size.[%d]", (int)*len);
261 DBG(r,"REQ[%X] end chxj_rencoding()", (unsigned int)(apr_size_t)r);
262 return (char *)apr_pstrdup(r->pool, "");
265 dconf = chxj_get_module_config(r->per_dir_config, &chxj_module);
267 DBG(r,"REQ[%X] none encoding.", (unsigned int)(apr_size_t)r);
268 DBG(r,"REQ[%X] end chxj_rencoding()", (unsigned int)(apr_size_t)r);
272 entryp = chxj_apply_convrule(r, dconf->convrules);
273 if (! entryp->encoding) {
274 DBG(r,"REQ[%X] none encoding.", (unsigned int)(apr_size_t)r);
275 DBG(r,"REQ[%X] end chxj_rencoding()", (unsigned int)(apr_size_t)r);
279 if (STRCASEEQ('n','N',"none", entryp->encoding)) {
280 DBG(r,"REQ[%X] none encoding.", (unsigned int)(apr_size_t)r);
281 DBG(r,"REQ[%X] end chxj_rencoding()", (unsigned int)(apr_size_t)r);
286 ibuf = apr_palloc(r->pool, ilen+1);
288 DBG(r,"REQ[%X] end chxj_rencoding()", (unsigned int)(apr_size_t)r);
292 memset(ibuf, 0, ilen+1);
293 memcpy(ibuf, src, ilen+0);
296 spos = obuf = apr_palloc(r->pool, olen);
298 DBG(r,"REQ[%X] end chxj_rencoding()", (unsigned int)(apr_size_t)r);
301 DBG(r,"encode convert [%s] -> [%s]", "CP932", entryp->encoding);
303 memset(obuf, 0, olen);
305 cd = iconv_open(entryp->encoding, "CP932");
306 if (cd == (iconv_t)-1) {
307 if (EINVAL == errno) {
308 ERR(r, "The conversion from %s to %s is not supported by the implementation.", "CP932", entryp->encoding);
310 DBG(r,"REQ[%X] end chxj_rencoding()", (unsigned int)(apr_size_t)r);
315 result = iconv(cd, &ibuf, &ilen, &obuf, &olen);
316 if (result == (size_t)(-1)) {
317 if (E2BIG == errno) {
318 ERR(r, "There is not sufficient room at *outbuf");
321 else if (EILSEQ == errno) {
322 ERR(r, "An invalid multibyte sequence has been encountered in the input. input:[%s]", ibuf);
323 chxj_convert_illegal_charactor_sequence(r, entryp, &ibuf, &ilen, &obuf, &olen);
325 else if (EINVAL == errno) {
326 ERR(r, "An incomplete multibyte sequence has been encountered in the input. input:[%s]", ibuf);
335 chxj_dump_string(r, APLOG_MARK, "RESULT Convert REncoding", spos, *len);
336 DBG(r,"REQ[%X] end chxj_rencoding()", (unsigned int)(apr_size_t)r);
343 chxj_encoding_parameter(request_rec *r, const char *value, int xmlflag)
359 DBG(r, "REQ[%X] start chxj_encoding_parameter()", (unsigned int)(apr_size_t)r);
361 src = apr_pstrdup(r->pool, value);
363 anchor_pos = strchr(src, '#');
366 anchor = apr_pstrdup(r->pool, anchor_pos);
371 spos = strchr(src, '?');
373 DBG(r, "REQ[%X] end chxj_encoding_parameter()", (unsigned int)(apr_size_t)r);
375 return apr_pstrcat(r->pool, src, "#", anchor, NULL);
382 src_sv = apr_pstrdup(r->pool, src);
383 param = apr_palloc(r->pool, 1);
390 use_amp_flag = (xmlflag) ? 1 : 0;
392 pair = apr_strtok(spos, "&", &pstat);
395 if (strncasecmp(pair, "amp;", 4) == 0) {
399 sep_pos = strchr(pair, '=');
400 if (pair == sep_pos) {
401 key = apr_pstrdup(r->pool, "");
404 key = apr_strtok(pair, "=", &vstat);
408 apr_size_t klen = (apr_size_t)strlen(key);
409 key = chxj_url_decode(r->pool, key);
410 len = (apr_size_t)strlen(key);
412 key = chxj_encoding(r, key, &len);
414 key = chxj_url_encode(r->pool, key);
416 val = apr_strtok(pair, "=", &vstat);
417 if (! val && sep_pos) {
418 val = apr_pstrdup(r->pool, "");
421 apr_size_t vlen = (apr_size_t)strlen(val);
422 val = chxj_url_decode(r->pool, val);
423 len = (apr_size_t)strlen(val);
425 val = chxj_encoding(r, val, &len);
427 val = chxj_url_encode(r->pool, val);
428 if (strlen(param) == 0) {
429 param = apr_pstrcat(r->pool, param, key, "=", val, NULL);
433 param = apr_pstrcat(r->pool, param, "&", key, "=", val, NULL);
436 param = apr_pstrcat(r->pool, param, "&", key, "=", val, NULL);
441 if (strlen(param) == 0) {
442 param = apr_pstrcat(r->pool, param, key, NULL);
446 param = apr_pstrcat(r->pool, param, "&", key, NULL);
449 param = apr_pstrcat(r->pool, param, "&", key, NULL);
454 DBG(r, "REQ[%X] end chxj_encoding_parameter()", (unsigned int)(apr_size_t)r);
457 return apr_pstrcat(r->pool, src_sv, "?", param, "#", anchor, NULL);
459 return apr_pstrcat(r->pool, src_sv, "?", param, NULL);
465 chxj_iconv(request_rec *r, apr_pool_t *pool, const char *src, apr_size_t *len, const char *from, const char *to)
478 ERR(r, "runtime exception: chxj_iconv(): invalid string size.[%d]", (int)*len);
479 return (char *)apr_pstrdup(pool, "");
483 ibuf = apr_palloc(pool, ilen+1);
485 ERR(r, "runtime exception: chxj_iconv(): Out of memory.");
488 memset(ibuf, 0, ilen+1);
489 memcpy(ibuf, src, ilen);
492 spos = obuf = apr_palloc(pool, olen);
494 ERR(r, "%s:%d runtime exception: chxj_iconv(): Out of memory", APLOG_MARK);
497 memset(obuf, 0, olen);
498 cd = iconv_open(to, from);
499 if (cd == (iconv_t)-1) {
500 if (EINVAL == errno) {
501 ERR(r, "The conversion from %s to %s is not supported by the implementation.", from, to);
504 ERR(r, "iconv open failed. from:[%s] to:[%s] errno:[%d]", from, to, errno);
509 result = iconv(cd, &ibuf, &ilen, &obuf, &olen);
510 if (result == (size_t)(-1)) {
511 if (E2BIG == errno) {
512 ERR(r, "There is not sufficient room at *outbuf.");
514 else if (EILSEQ == errno) {
515 ERR(r, "An invalid multibyte sequence has been encountered in the input. input:[%s]", ibuf);
517 else if (EINVAL == errno) {
518 ERR(r, "An incomplete multibyte sequence has been encountered in the input. input:[%s]", ibuf);