2 * Copyright (C) 2005-2011 Atsushi Konno All rights reserved.
3 * Copyright (C) 2005 QSDN,Inc. All rights reserved.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
21 #include "chxj_apache.h"
22 #include "qs_parse_string.h"
23 #include "qs_parse_tag.h"
30 #define NL_COUNT_MAX (10)
32 typedef struct node_stack_element {
34 struct node_stack_element *next;
35 struct node_stack_element **ref;
38 typedef struct node_stack {
39 NodeStackElement head;
40 NodeStackElement tail;
43 static int s_cut_tag (const char *s, int len);
44 static int s_cut_text(const char *s, int len, int script);
45 static void qs_dump_node(Doc *doc, Node *node, int indent);
46 static void qs_push_node(Doc *doc, Node *node, NodeStack stack);
47 static Node *qs_pop_node(Doc *doc, NodeStack stack);
48 #ifdef DUMP_NODE_STACK
49 static void qs_dump_node_stack(Doc *doc, NodeStack stack);
51 static void qs_free_node_stack(Doc *doc, NodeStack stack);
52 static void s_error_check(Doc *doc, const char *name, int line, NodeStack node_stack, NodeStack err_stack);
53 static Node *qs_new_nl_node(Doc *doc);
57 qs_parse_string(Doc *doc, const char *src, int srclen)
74 memset(encoding, 0, 256);
76 doc->now_parent_node = qs_init_root_node(doc);
77 if (! src || srclen <= 0) {
78 return doc->root_node;
81 node_stack = apr_palloc(doc->r->pool, sizeof(struct node_stack));
82 memset(node_stack, 0, sizeof(struct node_stack));
83 err_stack = apr_palloc(doc->r->pool, sizeof(struct node_stack));
84 memset(err_stack, 0, sizeof(struct node_stack));
87 node_stack = calloc(sizeof(struct node_stack), 1);
88 err_stack = calloc(sizeof(struct node_stack), 1);
92 * It is the pre reading.
93 * Because I want to specify encoding.
95 for (ii=0; ii<srclen; ii++) {
96 if (src[ii] == '\n') nl_cnt++;
97 if (nl_cnt >= NL_COUNT_MAX) break; /* not found <?xml ...> */
99 if (is_white_space(src[ii]))
102 if ((unsigned char)'<' == src[ii]) {
103 int endpoint = s_cut_tag(&src[ii], srclen - ii);
104 Node *node = qs_parse_tag(doc, &src[ii], endpoint);
106 QX_LOGGER_FATAL("runtime exception: qs_parse_string(): Out of memory.");
107 return doc->root_node;
111 if (node->name[0] != '?') break;
113 if (strcasecmp(node->name, "?xml") == 0) {
115 for(parse_attr = node->attr;
116 parse_attr && *encoding == '\0';
117 parse_attr = parse_attr->next) {
118 if (STRCASEEQ('e','E',"encoding",parse_attr->name)) {
119 switch (*parse_attr->value) {
122 if (strcasecmp(parse_attr->value, "x-sjis" ) == 0) {
123 strcpy((char *)encoding, (char *)"NONE");
129 if ((strcasecmp(parse_attr->value, "Shift_JIS") == 0)
130 || (strcasecmp(parse_attr->value, "SJIS" ) == 0)
131 || (strcasecmp(parse_attr->value, "Shift-JIS") == 0)) {
132 strcpy((char *)encoding, (char *)"NONE");
138 if ((strcasecmp(parse_attr->value, "EUC_JP") == 0)
139 || (strcasecmp(parse_attr->value, "EUC-JP") == 0)
140 || (strcasecmp(parse_attr->value, "EUCJP" ) == 0)) {
141 strcpy((char *)encoding, "EUC-JP");
147 if ((strcasecmp(parse_attr->value, "UTF-8") == 0)
148 || (strcasecmp(parse_attr->value, "UTF8") == 0)) {
149 strcpy((char *)encoding, "UTF-8");
154 strcpy((char *)encoding, "NONE");
165 if (strcasecmp(encoding, "NONE") != 0 && strlen(encoding) != 0) {
167 olen = srclen * 4 + 1;
168 sv_osrc = osrc =(char *)apr_palloc(doc->pool, olen);
169 memset((char *)osrc, 0, olen);
170 if ((cd = iconv_open("CP932", encoding)) != (iconv_t) -1) {
172 ibuf = apr_palloc(doc->pool, ilen+1);
173 memset(ibuf, 0, ilen+1);
174 memcpy(ibuf, src, ilen);
176 size_t result = iconv(cd, &ibuf, &ilen, &osrc, &olen);
177 if (result == (size_t)(-1)) {
188 * Now, true parsing is done here.
191 for (ii=0; ii<srclen; ii++) {
192 if (src[ii] == '\n') {
194 if (doc->now_parent_node != NULL) {
195 Node *node = qs_new_nl_node(doc);
196 qs_add_child_node(doc,node);
199 if (doc->parse_mode != PARSE_MODE_NO_PARSE
200 && is_white_space(src[ii])
201 && (doc->now_parent_node == NULL || !STRCASEEQ('p','P',"pre",doc->now_parent_node->name))) {
204 if ((unsigned char)'<' == src[ii] && strncasecmp("<![CDATA[",&src[ii], sizeof("<![CDATA[")-1) != 0) {
205 int endpoint = s_cut_tag(&src[ii], srclen - ii);
206 Node *node = qs_parse_tag(doc, &src[ii], endpoint);
208 QX_LOGGER_FATAL("runtime exception: qs_parse_string(): Out of memory.");
209 return doc->root_node;
214 if (node->name[0] == '/' ) {
215 if (doc->parse_mode == PARSE_MODE_CHTML) {
216 if (has_child(&(node->name[1]))) {
217 if (doc->now_parent_node->parent != NULL) {
218 doc->now_parent_node = doc->now_parent_node->parent;
219 doc->parse_mode = PARSE_MODE_CHTML;
221 if (STRCASEEQ('s','S',"script",&node->name[1])) {
224 s_error_check(doc, &node->name[1], node->line, node_stack, err_stack);
232 if (doc->parse_mode == PARSE_MODE_NO_PARSE) {
233 if (STRCASEEQ('c','C',"chxj:if",&node->name[1]) || STRCASEEQ('p','P',"plaintext",&node->name[1]) || STRCASEEQ('c','C',"chxj:raw",&node->name[1])) {
234 if (doc->now_parent_node->parent != NULL) {
235 doc->now_parent_node = doc->now_parent_node->parent;
236 doc->parse_mode = PARSE_MODE_CHTML;
237 s_error_check(doc, &node->name[1], node->line, node_stack, err_stack);
242 if (doc->parse_mode != PARSE_MODE_NO_PARSE) {
246 if (*node->name == '!' && strncmp(node->name, "!--", 3) == 0) {
250 qs_add_child_node(doc,node);
251 if ((has_child(node->name) && doc->parse_mode != PARSE_MODE_NO_PARSE) || STRCASEEQ('p','P',"plaintext",node->name)) {
252 qs_push_node(doc, node, node_stack);
255 if (doc->parse_mode == PARSE_MODE_NO_PARSE) {
256 if (node->name[0] == '/')
260 if (doc->parse_mode == PARSE_MODE_CHTML && (STRCASEEQ('c','C',"chxj:if", node->name) || STRCASEEQ('c','C',"chxj:raw",node->name))) {
262 doc->parse_mode = PARSE_MODE_NO_PARSE;
263 doc->now_parent_node = node;
264 for(parse_attr = node->attr;
266 parse_attr = parse_attr->next) {
267 if (STRCASEEQ('p','P',"parse",parse_attr->name)) {
268 if (STRCASEEQ('t','T',"true",parse_attr->value)) {
269 doc->parse_mode = PARSE_MODE_CHTML;
274 else if (doc->parse_mode == PARSE_MODE_CHTML && STRCASEEQ('p','P',"plaintext",node->name)) {
275 doc->parse_mode = PARSE_MODE_NO_PARSE;
276 doc->now_parent_node = node;
279 if (doc->parse_mode == PARSE_MODE_CHTML && has_child(node->name)) {
280 doc->now_parent_node = node;
282 if (STRCASEEQ('s','S',"script", node->name)) {
285 if (doc->parse_mode == PARSE_MODE_CHTML && node->closed_by_itself) {
286 if (has_child(node->name)) {
287 if (doc->now_parent_node->parent != NULL) {
288 doc->now_parent_node = doc->now_parent_node->parent;
289 doc->parse_mode = PARSE_MODE_CHTML;
291 if (STRCASEEQ('s','S',"script",node->name)) {
294 s_error_check(doc, node->name, node->line, node_stack, err_stack);
302 else if (strncasecmp("<![CDATA[", &src[ii], sizeof("<![CDATA[") - 1) == 0) {
304 int endpoint = s_cut_tag(&src[ii], srclen - ii);
305 Node *node = qs_new_tag(doc);
307 QX_LOGGER_DEBUG("runtime exception: qs_parse_string(): Out of memory");
308 return doc->root_node;
310 node->value = (char *)apr_palloc(doc->pool,endpoint+1);
311 node->name = (char *)apr_palloc(doc->pool,4+1);
312 node->otext = (char *)apr_palloc(doc->pool,endpoint+1);
313 node->size = endpoint;
315 memset(node->value, 0, endpoint+1);
316 memset(node->otext, 0, endpoint+1);
317 memset(node->name, 0, 4+1 );
318 memcpy(node->value, &src[ii+sizeof("<![CDATA[")-1], endpoint - (sizeof("<![CDATA[")-1) - (sizeof("]]")-1));
319 memcpy(node->name, "text", 4);
320 memcpy(node->otext,node->value, endpoint);
322 qs_add_child_node(doc,node);
323 ii += (endpoint - 1);
327 int endpoint = s_cut_text(&src[ii], srclen - ii, script_flag);
328 Node *node = qs_new_tag(doc);
330 QX_LOGGER_DEBUG("runtime exception: qs_parse_string(): Out of memory");
331 return doc->root_node;
333 node->value = (char *)apr_palloc(doc->pool,endpoint+1);
334 node->name = (char *)apr_palloc(doc->pool,4+1);
335 node->otext = (char *)apr_palloc(doc->pool,endpoint+1);
336 node->size = endpoint;
338 memset(node->value, 0, endpoint+1);
339 memset(node->otext, 0, endpoint+1);
340 memset(node->name, 0, 4+1 );
341 memcpy(node->value, &src[ii], endpoint);
342 memcpy(node->name, "text", 4);
343 memcpy(node->otext,node->value, endpoint);
345 qs_add_child_node(doc,node);
346 ii += (endpoint - 1);
350 QX_LOGGER_DEBUG("parse_string end");
353 if (doc->r != NULL) {
354 qs_dump_node(doc, doc->root_node, 0);
357 #ifdef DUMP_NODE_STACK
358 qs_dump_node_stack(doc, node_stack);
362 for (prevNode = qs_pop_node(doc,node_stack);
364 prevNode = qs_pop_node(doc, node_stack)) {
365 if (has_child(prevNode->name)) {
367 ERR(doc->r, "tag parse error (perhaps, not close). tag_name:[%s] line:[%d]", prevNode->name, prevNode->line);
369 fprintf(stderr, "error :tag parse error (perhaps, not close). tag_name:[%s] line:[%d]\n", prevNode->name, prevNode->line);
373 qs_free_node_stack(doc, node_stack); node_stack = NULL;
374 qs_free_node_stack(doc, err_stack); err_stack = NULL;
375 return doc->root_node;
380 s_error_check(Doc *doc, const char *name, int line, NodeStack node_stack, NodeStack err_stack)
384 for (prevNode = qs_pop_node(doc,node_stack);
386 prevNode = qs_pop_node(doc, node_stack)) {
387 if (prevNode && strcasecmp(prevNode->name, name) != 0) {
388 qs_push_node(doc, prevNode, err_stack);
395 Node *tmpNode = qs_pop_node(doc,node_stack);
396 if (tmpNode == NULL && err != 1) {
398 ERR(doc->r, "tag parse error (perhaps, miss spell). tag_name:[%s] line:[%d]", name, line);
400 fprintf(stderr, "error :tag parse error (perhaps, miss spell). tag_name:[%s] line:[%d]\n", name, line);
401 for (prevNode = qs_pop_node(doc,err_stack);
403 prevNode = qs_pop_node(doc, err_stack)) {
404 qs_push_node(doc, prevNode, node_stack);
408 for (prevNode = qs_pop_node(doc,err_stack);
410 prevNode = qs_pop_node(doc, err_stack)) {
412 ERR(doc->r, "tag parse error (perhaps, not close). tag_name:[%s] line:[%d]", prevNode->name, prevNode->line);
414 fprintf(stderr, "error :tag parse error (perhaps, not close). tag_name:[%s] line:[%d]\n", prevNode->name, prevNode->line);
416 qs_push_node(doc, tmpNode, node_stack);
424 qs_dump_node(Doc *doc, Node *node, int indent)
426 Node *p = (Node *)qs_get_child_node(doc,node);
428 for (;p;p = (Node *)qs_get_next_node(doc,p)) {
430 if ((char *)qs_get_node_value(doc,p) != NULL) {
431 DBG(doc->r,"%*.*sNode:[%s][%s]\n", indent,indent," ",
432 (char *)qs_get_node_name(doc,p),
433 (char *)qs_get_node_value(doc,p));
436 DBG(doc->r,"%*.*sNode:[%s]\n", indent,indent," ", qs_get_node_name(doc,p));
438 for (attr = (Attr *)qs_get_attr(doc,p); attr; attr = (Attr *)qs_get_next_attr(doc,attr)) {
439 DBG(doc->r,"%*.*s ATTR:[%s]\n", indent,indent," ", (char *)qs_get_attr_name(doc,attr));
440 DBG(doc->r,"%*.*s VAL :[%s]\n", indent,indent," ", (char *)qs_get_attr_value(doc,attr));
442 qs_dump_node(doc,p, indent+4);
449 chxj_cut_tag(const char *s, int len)
451 return s_cut_tag(s, len);
456 s_cut_tag(const char *s, int len)
463 if (strncmp("<!--", s, 4) == 0) {
466 else if (strncasecmp("<![CDATA[", s, sizeof("<![CDATA[")-1) == 0) {
470 for (ii=0;ii<len; ii++) {
471 if (is_sjis_kanji(s[ii])) {
475 if (is_sjis_kana(s[ii]))
478 if (is_white_space(s[ii]))
485 if (comment && s[ii] == '-') {
486 if (strncmp(&s[ii], "-->", 3) == 0) {
491 if (cdata && s[ii] == ']') {
492 if (strncmp(&s[ii], "]]>", 3) == 0) {
498 if (!cdata && !comment && s[ii] == '>') {
509 s_cut_text(const char *s, int len, int script)
515 for (ii=0;ii<len; ii++) {
516 if (is_sjis_kanji(s[ii])) {
520 if (is_sjis_kana(s[ii]))
523 if (is_white_space(s[ii]))
532 if (s[ii] == '\'' && !dq) {
537 if (s[ii] == '"' && !sq) {
543 if (!sq && !dq && s[ii] == '<')
553 qs_init_root_node(Doc *doc)
555 doc->root_node = (Node *)apr_palloc(doc->pool,sizeof(struct Node));
556 if (doc->root_node == NULL) {
557 QX_LOGGER_FATAL("Out Of Memory");
560 doc->root_node->next = NULL;
561 doc->root_node->parent = NULL;
562 doc->root_node->child = NULL;
563 doc->root_node->attr = NULL;
565 doc->root_node->name = (char *)apr_palloc(doc->pool, 5);
566 if (doc->root_node->name == NULL) {
567 QX_LOGGER_FATAL("Out Of Memory");
569 memset(doc->root_node->name, 0, 5);
570 strcpy(doc->root_node->name, "ROOT");
572 return doc->root_node;
578 qs_add_child_node(Doc *doc,Node *node)
582 node->child_tail = NULL;
583 node->parent = doc->now_parent_node;
584 if (doc->now_parent_node->child == NULL) {
585 doc->now_parent_node->child = node;
586 doc->now_parent_node->child_tail = node;
590 QX_LOGGER_DEBUG("search child free node");
592 doc->now_parent_node->child_tail->next = node;
593 doc->now_parent_node->child_tail = node;
601 qs_get_root(Doc *doc) {
602 return doc->root_node;
609 qs_get_node_value(Doc *UNUSED(doc), Node *node) {
617 qs_get_node_name(Doc *UNUSED(doc), Node *node) {
624 qs_get_child_node(Doc *UNUSED(doc), Node *node) {
632 qs_get_next_node(Doc *UNUSED(doc), Node *node) {
639 qs_get_attr(Doc *UNUSED(doc), Node *node) {
647 qs_get_next_attr(Doc *UNUSED(doc), Attr *attr) {
654 qs_get_attr_name(Doc *UNUSED(doc), Attr *attr) {
661 qs_get_attr_value(Doc *UNUSED(doc), Attr *attr) {
666 qs_get_node_size(Doc *UNUSED(doc), Node *node) {
671 #define list_insert(node, point) do { \
672 node->ref = point->ref; \
674 node->next = point; \
675 point->ref = &node->next; \
678 #define list_remove(node) do { \
679 *node->ref = node->next; \
680 node->next->ref = node->ref; \
685 qs_push_node(Doc *doc, Node *node, NodeStack stack)
687 NodeStackElement elem;
688 if (doc->r != NULL) {
689 elem = apr_palloc(doc->r->pool, sizeof(struct node_stack_element));
690 memset(elem, 0, sizeof(struct node_stack_element));
693 elem = malloc(sizeof(struct node_stack_element));
694 memset(elem, 0, sizeof(struct node_stack_element));
697 if (stack->head == NULL) {
699 if (doc->r != NULL) {
700 stack->head = apr_palloc(doc->r->pool, sizeof(struct node_stack_element));
701 memset(stack->head, 0, sizeof(struct node_stack_element));
704 stack->head = malloc(sizeof(struct node_stack_element));
705 memset(stack->head, 0, sizeof(struct node_stack_element));
707 stack->head->next = stack->head;
708 stack->head->ref = &stack->head->next;
710 list_insert(elem, stack->head);
714 #include "apr_ring.h"
717 qs_pop_node(Doc *doc, NodeStack stack)
719 NodeStackElement tail = stack->tail;
722 if (tail == NULL) return NULL;
723 if (tail == stack->head) return NULL;
728 stack->tail = (NodeStackElement)((apr_size_t)stack->head->ref - (apr_size_t)APR_OFFSETOF(struct node_stack_element, next));
735 #ifdef DUMP_NODE_STACK
737 qs_dump_node_stack(Doc *doc, NodeStack stack)
739 NodeStackElement elm;
740 for (elm = stack->head->next;elm != stack->head; elm = elm->next) {
741 if (doc->r) DBG(doc->r, "name:[%s]", elm->node->name);
742 else fprintf(stderr, "[%x] name:[%s] next:[%x]\n", (apr_size_t)elm, elm->node->name, (apr_size_t)elm->next);
748 qs_free_node_stack(Doc *doc, NodeStack stack)
750 if (doc->r == NULL && stack != NULL) {
752 for (elm = qs_pop_node(doc, stack);elm; elm = qs_pop_node(doc,stack))
762 qs_new_nl_node(Doc *doc)
764 Node *node = (Node *)qs_new_tag(doc);
766 QX_LOGGER_DEBUG("runtime exception: qs_parse_tag(): Out of memory.");
769 node->name = apr_pstrdup(doc->pool, QS_PARSE_NL_MARK);