2 * Copyright (C) 2005-2009 Atsushi Konno All rights reserved.
3 * Copyright (C) 2005 QSDN,Inc. All rights reserved.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #include "chxj_encoding.h"
19 #include "chxj_apply_convrule.h"
20 #include "chxj_url_encode.h"
26 chxj_encoding(request_rec *r, const char *src, apr_size_t *len)
36 mod_chxj_config *dconf;
37 chxjconvrule_entry *entryp;
41 DBG(r,"start chxj_encoding()");
43 dconf = chxj_get_module_config(r->per_dir_config, &chxj_module);
46 DBG(r,"none encoding.");
50 ERR(r, "runtime exception: chxj_encoding(): invalid string size.[%d]", (int)*len);
51 return (char *)apr_pstrdup(r->pool, "");
54 entryp = chxj_apply_convrule(r, dconf->convrules);
55 if (entryp->encoding == NULL) {
56 DBG(r,"none encoding.");
60 if (STRCASEEQ('n','N',"none", entryp->encoding)) {
61 DBG(r,"none encoding.");
65 apr_pool_create(&pool, r->pool);
67 ibuf = apr_palloc(pool, ilen+1);
69 ERR(r, "runtime exception: chxj_encoding(): Out of memory.");
72 memset(ibuf, 0, ilen+1);
73 memcpy(ibuf, src, ilen);
76 spos = obuf = apr_palloc(pool, olen);
78 DBG(r,"end chxj_encoding()");
81 DBG(r,"encode convert [%s] -> [%s]", entryp->encoding, "CP932");
83 memset(obuf, 0, olen);
84 cd = iconv_open("CP932", entryp->encoding);
85 if (cd == (iconv_t)-1) {
86 if (EINVAL == errno) {
87 ERR(r, "The conversion from %s to %s is not supported by the implementation.", entryp->encoding, "CP932");
90 ERR(r, "iconv open failed. from:[%s] to:[%s] errno:[%d]", entryp->encoding, "CP932", errno);
92 DBG(r,"end chxj_encoding()");
96 result = iconv(cd, &ibuf, &ilen, &obuf, &olen);
97 if (result == (size_t)(-1)) {
99 ERR(r, "There is not sufficient room at *outbuf.");
102 else if (EILSEQ == errno) {
103 ERR(r, "%s:%d An invalid multibyte sequence has been encountered in the input. input:[%s]", __FILE__,__LINE__,ibuf);
104 chxj_convert_illegal_charactor_sequence(r, entryp, &ibuf, &ilen, &obuf, &olen);
106 else if (EINVAL == errno) {
107 ERR(r, "An incomplete multibyte sequence has been encountered in the input. input:[%s]", ibuf);
115 DBG(r,"end chxj_encoding() len=[%d] obuf=[%.*s]", (int)*len, (int)*len, spos);
121 chxj_convert_illegal_charactor_sequence(request_rec *r, chxjconvrule_entry *entryp, char **ibuf, apr_size_t *ilen, char **obuf, apr_size_t *olen)
123 if (STRCASEEQ('u','U',"UTF-8", entryp->encoding) || STRCASEEQ('u','U',"UTF8", entryp->encoding)) {
124 if ((0xe0 & **ibuf) == 0xc0) {
125 /* 2byte charactor */
130 DBG(r, "passed 2byte.");
132 else if ((0xf0 & **ibuf) == 0xe0) {
133 /* 3byte charactor */
138 DBG(r, "passed 3byte.");
140 else if ((0xf8 & **ibuf) == 0xf0) {
141 /* 4byte charactor */
146 DBG(r, "passed 4byte.");
148 else if ((0xc0 & **ibuf) == 0x80) {
149 /* 1byte charactor */
154 DBG(r, "passed 1byte.");
157 /* unknown charactor */
162 DBG(r, "passed 1byte.");
165 else if (STRCASEEQ('e','E', "EUCJP", entryp->encoding)
166 || STRCASEEQ('c','C', "CSEUCPKDFMTJAPANESE", entryp->encoding)
167 || STRCASEEQ('e','E', "EUC-JISX0213", entryp->encoding)
168 || STRCASEEQ('e','E', "EUC-JP-MS", entryp->encoding)
169 || STRCASEEQ('e','E', "EUC-JP", entryp->encoding)
170 || STRCASEEQ('e','E', "EUCJP-MS", entryp->encoding)
171 || STRCASEEQ('e','E', "EUCJP-OPEN", entryp->encoding)
172 || STRCASEEQ('e','E', "EUCJP-WIN", entryp->encoding)
173 || STRCASEEQ('e','E', "EUCJP", entryp->encoding)) {
174 if ((unsigned char)**ibuf == 0x8F) {
175 /* 3byte charactor */
180 DBG(r, "passed 3byte.");
183 /* 2byte charactor */
188 DBG(r, "passed 2byte.");
191 else if (STRCASEEQ('c', 'C', "CP932", entryp->encoding)
192 || STRCASEEQ('c', 'C', "CSIBM932", entryp->encoding)
193 || STRCASEEQ('i', 'I', "IBM-932", entryp->encoding)
194 || STRCASEEQ('i', 'I', "IBM932", entryp->encoding)
195 || STRCASEEQ('m', 'M', "MS932", entryp->encoding)
196 || STRCASEEQ('m', 'M', "MS_KANJI", entryp->encoding)
197 || STRCASEEQ('s', 'S', "SJIS-OPEN", entryp->encoding)
198 || STRCASEEQ('s', 'S', "SJIS-WIN", entryp->encoding)
199 || STRCASEEQ('s', 'S', "SJIS", entryp->encoding)) {
200 if ( ( ((0x81 <= (unsigned char)**ibuf) && (0x9f >= (unsigned char)**ibuf))
201 || ((0xe0 <= (unsigned char)**ibuf) && (0xfc >= (unsigned char)**ibuf)))
203 ( ((0x40 <= (unsigned char)*((*ibuf)+1)) && (0x7e >= (unsigned char)*((*ibuf)+1)))
204 ||((0x80 <= (unsigned char)*((*ibuf)+1)) && (0xfc >= (unsigned char)*((*ibuf)+1))))) {
205 /* 2byte charactor */
210 DBG(r, "passed 2byte.");
213 /* 1byte charactor */
218 DBG(r, "passed 1byte.");
222 /* unknown 1byte charactor */
227 DBG(r, "passed 1byte.");
230 *ilen = strlen(*ibuf);
231 DBG(r, "new len = [%" APR_SIZE_T_FMT "].", (apr_size_t)*ilen);
237 chxj_rencoding(request_rec *r, const char *src, apr_size_t *len)
247 mod_chxj_config *dconf;
248 chxjconvrule_entry *entryp;
250 DBG(r,"start chxj_rencoding()");
253 ERR(r, "runtime exception: chxj_rencoding(): invalid string size.[%d]", (int)*len);
254 return (char *)apr_pstrdup(r->pool, "");
257 dconf = chxj_get_module_config(r->per_dir_config, &chxj_module);
259 DBG(r,"none encoding.");
260 DBG(r,"end chxj_rencoding()");
264 entryp = chxj_apply_convrule(r, dconf->convrules);
265 if (! entryp->encoding) {
266 DBG(r,"none encoding.");
267 DBG(r,"end chxj_rencoding()");
271 if (STRCASEEQ('n','N',"none", entryp->encoding)) {
272 DBG(r,"none encoding.");
273 DBG(r,"end chxj_rencoding()");
278 ibuf = apr_palloc(r->pool, ilen+1);
280 DBG(r,"end chxj_rencoding()");
284 memset(ibuf, 0, ilen+1);
285 memcpy(ibuf, src, ilen+0);
288 spos = obuf = apr_palloc(r->pool, olen);
290 DBG(r,"end chxj_rencoding()");
293 DBG(r,"encode convert [%s] -> [%s]", "CP932", entryp->encoding);
295 memset(obuf, 0, olen);
297 cd = iconv_open(entryp->encoding, "CP932");
298 if (cd == (iconv_t)-1) {
299 if (EINVAL == errno) {
300 ERR(r, "The conversion from %s to %s is not supported by the implementation.", "CP932", entryp->encoding);
302 DBG(r,"end chxj_rencoding()");
307 result = iconv(cd, &ibuf, &ilen, &obuf, &olen);
308 if (result == (size_t)(-1)) {
309 if (E2BIG == errno) {
310 ERR(r, "There is not sufficient room at *outbuf");
313 else if (EILSEQ == errno) {
314 ERR(r, "An invalid multibyte sequence has been encountered in the input. input:[%s]", ibuf);
315 chxj_convert_illegal_charactor_sequence(r, entryp, &ibuf, &ilen, &obuf, &olen);
317 else if (EINVAL == errno) {
318 ERR(r, "An incomplete multibyte sequence has been encountered in the input. input:[%s]", ibuf);
327 DBG(r,"end chxj_rencoding() len=[%d] obuf=[%.*s]", (int)*len, (int)*len, spos);
334 chxj_encoding_parameter(request_rec *r, const char *value)
350 DBG(r, "start chxj_encoding_parameter()");
352 src = apr_pstrdup(r->pool, value);
354 anchor_pos = strchr(src, '#');
357 anchor = apr_pstrdup(r->pool, anchor_pos);
362 spos = strchr(src, '?');
364 DBG(r, "end chxj_encoding_parameter()");
366 return apr_pstrcat(r->pool, src, "#", anchor, NULL);
373 src_sv = apr_pstrdup(r->pool, src);
374 param = apr_palloc(r->pool, 1);
383 pair = apr_strtok(spos, "&", &pstat);
386 if (strncasecmp(pair, "amp;", 4) == 0) {
390 sep_pos = strchr(pair, '=');
391 if (pair == sep_pos) {
392 key = apr_pstrdup(r->pool, "");
395 key = apr_strtok(pair, "=", &vstat);
399 apr_size_t klen = (apr_size_t)strlen(key);
400 key = chxj_url_decode(r->pool, key);
401 len = (apr_size_t)strlen(key);
403 key = chxj_encoding(r, key, &len);
405 key = chxj_url_encode(r->pool, key);
407 val = apr_strtok(pair, "=", &vstat);
408 if (! val && sep_pos) {
409 val = apr_pstrdup(r->pool, "");
412 apr_size_t vlen = (apr_size_t)strlen(val);
413 val = chxj_url_decode(r->pool, val);
414 len = (apr_size_t)strlen(val);
416 val = chxj_encoding(r, val, &len);
418 val = chxj_url_encode(r->pool, val);
419 if (strlen(param) == 0) {
420 param = apr_pstrcat(r->pool, param, key, "=", val, NULL);
424 param = apr_pstrcat(r->pool, param, "&", key, "=", val, NULL);
427 param = apr_pstrcat(r->pool, param, "&", key, "=", val, NULL);
432 if (strlen(param) == 0) {
433 param = apr_pstrcat(r->pool, param, key, NULL);
437 param = apr_pstrcat(r->pool, param, "&", key, NULL);
440 param = apr_pstrcat(r->pool, param, "&", key, NULL);
445 DBG(r, "end chxj_encoding_parameter()");
448 return apr_pstrcat(r->pool, src_sv, "?", param, "#", anchor, NULL);
450 return apr_pstrcat(r->pool, src_sv, "?", param, NULL);