2 * Copyright (C) 2005-2011 Atsushi Konno All rights reserved.
3 * Copyright (C) 2005 QSDN,Inc. All rights reserved.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #include "chxj_encoding.h"
19 #include "chxj_apply_convrule.h"
20 #include "chxj_url_encode.h"
26 chxj_encoding(request_rec *r, const char *src, apr_size_t *len)
36 mod_chxj_config *dconf;
37 chxjconvrule_entry *entryp;
41 DBG(r,"REQ[%X] start %s()",TO_ADDR(r),__func__);
43 dconf = chxj_get_module_config(r->per_dir_config, &chxj_module);
46 DBG(r,"REQ[%X] none encoding.",TO_ADDR(r));
47 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
51 ERR(r, "REQ[%X] runtime exception: chxj_encoding(): invalid string size.[%d]", TO_ADDR(r),(int)*len);
52 DBG(r, "REQ[%X] end %s()",TO_ADDR(r),__func__);
53 return (char *)apr_pstrdup(r->pool, "");
56 entryp = chxj_apply_convrule(r, dconf->convrules);
57 if (entryp->encoding == NULL) {
58 DBG(r,"REQ[%X] none encoding.",TO_ADDR(r));
59 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
63 if (STRCASEEQ('n','N',"none", entryp->encoding)) {
64 DBG(r,"REQ[%X] none encoding.",TO_ADDR(r));
65 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
69 apr_pool_create(&pool, r->pool);
71 ibuf = apr_palloc(pool, ilen+1);
73 ERR(r, "runtime exception: chxj_encoding(): Out of memory.");
76 memset(ibuf, 0, ilen+1);
77 memcpy(ibuf, src, ilen);
80 spos = obuf = apr_palloc(pool, olen);
82 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
85 DBG(r,"REQ[%X] encode convert [%s] -> [%s]", TO_ADDR(r),entryp->encoding, "CP932");
87 memset(obuf, 0, olen);
88 cd = iconv_open("CP932", entryp->encoding);
89 if (cd == (iconv_t)-1) {
90 if (EINVAL == errno) {
91 ERR(r, "REQ[%X] The conversion from %s to %s is not supported by the implementation.", TO_ADDR(r),entryp->encoding, "CP932");
94 ERR(r, "REQ[%X] iconv open failed. from:[%s] to:[%s] errno:[%d]", TO_ADDR(r),entryp->encoding, "CP932", errno);
96 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
100 result = iconv(cd, &ibuf, &ilen, &obuf, &olen);
101 if (result == (size_t)(-1)) {
102 if (E2BIG == errno) {
103 ERR(r, "REQ[%X] There is not sufficient room at *outbuf.",TO_ADDR(r));
106 else if (EILSEQ == errno) {
107 ERR(r, "REQ[%X] %s:%d An invalid multibyte sequence has been encountered in the input. input:[%s]", TO_ADDR(r),__FILE__,__LINE__,ibuf);
108 chxj_convert_illegal_charactor_sequence(r, entryp, &ibuf, &ilen, &obuf, &olen);
110 else if (EINVAL == errno) {
111 ERR(r, "REQ[%X] An incomplete multibyte sequence has been encountered in the input. input:[%s]", TO_ADDR(r),ibuf);
119 DBG(r,"REQ[%X] len=[%d] obuf=[%.*s]", TO_ADDR(r),(int)*len, (int)*len, spos);
120 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
126 chxj_convert_illegal_charactor_sequence(request_rec *r, chxjconvrule_entry *entryp, char **ibuf, apr_size_t *ilen, char **obuf, apr_size_t *olen)
128 if (STRCASEEQ('u','U',"UTF-8", entryp->encoding) || STRCASEEQ('u','U',"UTF8", entryp->encoding)) {
129 if ((0xe0 & **ibuf) == 0xc0) {
130 /* 2byte charactor */
135 DBG(r, "REQ[%X] passed 2byte.",TO_ADDR(r));
137 else if ((0xf0 & **ibuf) == 0xe0) {
138 /* 3byte charactor */
143 DBG(r, "REQ[%X] passed 3byte.",TO_ADDR(r));
145 else if ((0xf8 & **ibuf) == 0xf0) {
146 /* 4byte charactor */
151 DBG(r, "REQ[%X] passed 4byte.",TO_ADDR(r));
153 else if ((0xc0 & **ibuf) == 0x80) {
154 /* 1byte charactor */
159 DBG(r, "REQ[%X] passed 1byte.",TO_ADDR(r));
162 /* unknown charactor */
167 DBG(r, "REQ[%X] passed 1byte.",TO_ADDR(r));
170 else if (STRCASEEQ('e','E', "EUCJP", entryp->encoding)
171 || STRCASEEQ('c','C', "CSEUCPKDFMTJAPANESE", entryp->encoding)
172 || STRCASEEQ('e','E', "EUC-JISX0213", entryp->encoding)
173 || STRCASEEQ('e','E', "EUC-JP-MS", entryp->encoding)
174 || STRCASEEQ('e','E', "EUC-JP", entryp->encoding)
175 || STRCASEEQ('e','E', "EUCJP-MS", entryp->encoding)
176 || STRCASEEQ('e','E', "EUCJP-OPEN", entryp->encoding)
177 || STRCASEEQ('e','E', "EUCJP-WIN", entryp->encoding)
178 || STRCASEEQ('e','E', "EUCJP", entryp->encoding)) {
179 if ((unsigned char)**ibuf == 0x8F) {
180 /* 3byte charactor */
185 DBG(r, "REQ[%X] passed 3byte.",TO_ADDR(r));
188 /* 2byte charactor */
193 DBG(r, "REQ[%X] passed 2byte.",TO_ADDR(r));
196 else if (STRCASEEQ('c', 'C', "CP932", entryp->encoding)
197 || STRCASEEQ('c', 'C', "CSIBM932", entryp->encoding)
198 || STRCASEEQ('i', 'I', "IBM-932", entryp->encoding)
199 || STRCASEEQ('i', 'I', "IBM932", entryp->encoding)
200 || STRCASEEQ('m', 'M', "MS932", entryp->encoding)
201 || STRCASEEQ('m', 'M', "MS_KANJI", entryp->encoding)
202 || STRCASEEQ('s', 'S', "SJIS-OPEN", entryp->encoding)
203 || STRCASEEQ('s', 'S', "SJIS-WIN", entryp->encoding)
204 || STRCASEEQ('s', 'S', "SJIS", entryp->encoding)) {
205 if ( ( ((0x81 <= (unsigned char)**ibuf) && (0x9f >= (unsigned char)**ibuf))
206 || ((0xe0 <= (unsigned char)**ibuf) && (0xfc >= (unsigned char)**ibuf)))
208 ( ((0x40 <= (unsigned char)*((*ibuf)+1)) && (0x7e >= (unsigned char)*((*ibuf)+1)))
209 ||((0x80 <= (unsigned char)*((*ibuf)+1)) && (0xfc >= (unsigned char)*((*ibuf)+1))))) {
210 /* 2byte charactor */
215 DBG(r, "REQ[%X] passed 2byte.", TO_ADDR(r));
218 /* 1byte charactor */
223 DBG(r, "REQ[%X] passed 1byte.",TO_ADDR(r));
227 /* unknown 1byte charactor */
232 DBG(r, "REQ[%X] passed 1byte.", TO_ADDR(r));
235 *ilen = strlen(*ibuf);
236 DBG(r, "REQ[%X] new len = [%" APR_SIZE_T_FMT "].", TO_ADDR(r),(apr_size_t)*ilen);
242 chxj_rencoding(request_rec *r, const char *src, apr_size_t *len)
252 mod_chxj_config *dconf;
253 chxjconvrule_entry *entryp;
255 DBG(r,"REQ[%X] start %s()",TO_ADDR(r),__func__);
258 ERR(r, "REQ[%X] runtime exception: chxj_rencoding(): invalid string size.[%d]", TO_ADDR(r),(int)*len);
259 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
260 return (char *)apr_pstrdup(r->pool, "");
263 dconf = chxj_get_module_config(r->per_dir_config, &chxj_module);
265 DBG(r,"REQ[%X] none encoding.",TO_ADDR(r));
266 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
270 entryp = chxj_apply_convrule(r, dconf->convrules);
271 if (! entryp->encoding) {
272 DBG(r,"REQ[%X] none encoding.",TO_ADDR(r));
273 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
277 if (STRCASEEQ('n','N',"none", entryp->encoding)) {
278 DBG(r,"REQ[%X] none encoding.", TO_ADDR(r));
279 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
284 ibuf = apr_palloc(r->pool, ilen+1);
286 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
290 memset(ibuf, 0, ilen+1);
291 memcpy(ibuf, src, ilen+0);
294 spos = obuf = apr_palloc(r->pool, olen);
296 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
299 DBG(r,"REQ[%X] encode convert [%s] -> [%s]", TO_ADDR(r),"CP932", entryp->encoding);
301 memset(obuf, 0, olen);
303 cd = iconv_open(entryp->encoding, "CP932");
304 if (cd == (iconv_t)-1) {
305 if (EINVAL == errno) {
306 ERR(r, "REQ[%X] The conversion from %s to %s is not supported by the implementation.", TO_ADDR(r),"CP932", entryp->encoding);
308 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
313 result = iconv(cd, &ibuf, &ilen, &obuf, &olen);
314 if (result == (size_t)(-1)) {
315 if (E2BIG == errno) {
316 ERR(r, "REQ[%X] There is not sufficient room at *outbuf",TO_ADDR(r));
319 else if (EILSEQ == errno) {
320 ERR(r, "REQ[%X] An invalid multibyte sequence has been encountered in the input. input:[%s]", TO_ADDR(r),ibuf);
321 chxj_convert_illegal_charactor_sequence(r, entryp, &ibuf, &ilen, &obuf, &olen);
323 else if (EINVAL == errno) {
324 ERR(r, "REQ[%X] An incomplete multibyte sequence has been encountered in the input. input:[%s]", TO_ADDR(r),ibuf);
333 DBG(r,"REQ[%X] len=[%d] obuf=[%.*s]", TO_ADDR(r),(int)*len, (int)*len, spos);
334 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
340 chxj_encoding_parameter(request_rec *r, const char *value)
356 DBG(r,"REQ[%X] start %s()",TO_ADDR(r),__func__);
358 src = apr_pstrdup(r->pool, value);
360 anchor_pos = strchr(src, '#');
363 anchor = apr_pstrdup(r->pool, anchor_pos);
368 spos = strchr(src, '?');
370 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
372 return apr_pstrcat(r->pool, src, "#", anchor, NULL);
379 src_sv = apr_pstrdup(r->pool, src);
380 param = apr_palloc(r->pool, 1);
389 pair = apr_strtok(spos, "&", &pstat);
392 if (strncasecmp(pair, "amp;", 4) == 0) {
396 sep_pos = strchr(pair, '=');
397 if (pair == sep_pos) {
398 key = apr_pstrdup(r->pool, "");
401 key = apr_strtok(pair, "=", &vstat);
405 apr_size_t klen = (apr_size_t)strlen(key);
406 key = chxj_url_decode(r->pool, key);
407 len = (apr_size_t)strlen(key);
409 key = chxj_encoding(r, key, &len);
410 key = chxj_url_encode(r->pool, key);
412 #if 0 /* XXX:2009/4/10 */
413 key = chxj_url_encode(r->pool, key);
416 val = apr_strtok(pair, "=", &vstat);
417 if (! val && sep_pos) {
418 val = apr_pstrdup(r->pool, "");
421 apr_size_t vlen = (apr_size_t)strlen(val);
422 val = chxj_url_decode(r->pool, val);
423 len = (apr_size_t)strlen(val);
425 val = chxj_encoding(r, val, &len);
426 val = chxj_url_encode(r->pool, val);
428 #if 0 /* XXX:2009/4/10 */
429 val = chxj_url_encode(r->pool, val);
431 if (strlen(param) == 0) {
432 param = apr_pstrcat(r->pool, param, key, "=", val, NULL);
436 param = apr_pstrcat(r->pool, param, "&", key, "=", val, NULL);
439 param = apr_pstrcat(r->pool, param, "&", key, "=", val, NULL);
444 if (strlen(param) == 0) {
445 param = apr_pstrcat(r->pool, param, key, NULL);
449 param = apr_pstrcat(r->pool, param, "&", key, NULL);
452 param = apr_pstrcat(r->pool, param, "&", key, NULL);
457 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
460 return apr_pstrcat(r->pool, src_sv, "?", param, "#", anchor, NULL);
462 return apr_pstrcat(r->pool, src_sv, "?", param, NULL);