1 /*--------------------------------------------------------------------------
2 * Copyright 2009 Taro L. Saito
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *--------------------------------------------------------------------------*/
16 //--------------------------------------
19 // XMLStreamWalker.java
20 // Since: Mar 30, 2009 5:27:15 PM
24 //--------------------------------------
25 package org.xerial.util.xml;
27 import static org.xmlpull.v1.XmlPullParser.*;
29 import java.io.IOException;
30 import java.io.Reader;
32 import org.xerial.core.XerialErrorCode;
33 import org.xerial.core.XerialException;
34 import org.xerial.util.ArrayDeque;
35 import org.xerial.util.Deque;
36 import org.xerial.util.tree.TreeEvent;
37 import org.xerial.util.tree.TreeStreamReader;
38 import org.xerial.util.tree.TreeEvent.EventType;
39 import org.xerial.util.xml.impl.TreeEventQueue;
40 import org.xerial.util.xml.pullparser.PullParserUtil;
41 import org.xmlpull.v1.XmlPullParser;
42 import org.xmlpull.v1.XmlPullParserException;
45 * {@link TreeStreamReader} of XML data
50 public class XMLStreamReader implements TreeStreamReader
52 private final XmlPullParser pullParser;
53 private final Deque<StringBuilder> textStack = new ArrayDeque<StringBuilder>();
54 private final static StringBuilder EMPTY_STRING = new StringBuilder(0);
55 private int TEXT_BUFFER_MAX = 8192;
57 private int parseState = START_DOCUMENT;
58 private boolean convertValueAttribute = false;
60 private final TreeEventQueue eventQueue = new TreeEventQueue();
62 public XMLStreamReader(Reader reader)
65 throw new IllegalArgumentException("XML reader is null");
67 pullParser = PullParserUtil.newParser(reader);
71 public void useValueAttributeAsNodeValue(boolean enable)
73 this.convertValueAttribute = enable;
76 public TreeEvent peekNext() throws XerialException
78 if (!eventQueue.isEmpty())
80 return eventQueue.peekFirst();
83 if (parseState == END_DOCUMENT)
91 public TreeEvent next() throws XerialException
93 if (!eventQueue.isEmpty())
95 return eventQueue.pop();
98 if (parseState == END_DOCUMENT)
106 public void readNext() throws XerialException
108 if (parseState == END_DOCUMENT)
113 parseState = pullParser.next();
119 textStack.addLast(EMPTY_STRING);
120 String tagName = pullParser.getName();
121 String immediateNodeValue = null;
123 Deque<TreeEvent> startEventQueue = new ArrayDeque<TreeEvent>(pullParser.getAttributeCount() + 1);
125 for (int i = 0; i < pullParser.getAttributeCount(); i++)
127 String attributeName = pullParser.getAttributeName(i);
128 String attributeValue = pullParser.getAttributeValue(i);
130 // assign the value attribute as a node value of the start tag
131 if (convertValueAttribute && attributeName.equals("value"))
133 immediateNodeValue = attributeValue;
137 startEventQueue.addLast(TreeEvent.newVisitEvent(attributeName, attributeValue));
138 startEventQueue.addLast(TreeEvent.newLeaveEvent(attributeName));
141 // push a new start tag event to the front of the queue
142 startEventQueue.addFirst(TreeEvent.newVisitEvent(tagName, immediateNodeValue));
143 eventQueue.push(startEventQueue);
145 // pre-fetch the next event
151 if (textStack.getLast() == EMPTY_STRING)
153 eventQueue.push(TreeEvent.newLeaveEvent(pullParser.getName()));
157 StringBuilder textBuffer = textStack.getLast();
158 if (!eventQueue.isEmpty() && eventQueue.peekLast().event == EventType.VISIT)
160 // attach the text value to the the previous visit event
161 eventQueue.replaceLast(TreeEvent.newVisitEvent(pullParser.getName(), sanitize(textBuffer
165 reportTextEvent(textBuffer);
167 eventQueue.push(TreeEvent.newLeaveEvent(pullParser.getName()));
169 textStack.removeLast();
174 String textData = pullParser.getText();
175 StringBuilder textBuffer = textStack.getLast();
177 if (textData.length() <= 0)
180 if (textBuffer == EMPTY_STRING)
182 textBuffer = replaceLastTextBuffer();
184 else if (textBuffer.length() + textData.length() > TEXT_BUFFER_MAX)
186 // add the previous text data to the event queue
187 reportTextEvent(textBuffer);
189 // replace the text buffer
190 textBuffer = replaceLastTextBuffer();
192 textBuffer.append(textData);
194 boolean needPrefetch = eventQueue.isEmpty() ? false : eventQueue.peekLast().event == EventType.VISIT;
205 catch (XmlPullParserException e)
207 throw new XerialException(XMLErrorCode.PARSE_ERROR, e);
209 catch (IOException e)
211 throw new XerialException(XerialErrorCode.IO_EXCEPTION, e);
216 private StringBuilder replaceLastTextBuffer()
218 textStack.removeLast();
219 StringBuilder textBuffer = new StringBuilder();
220 textStack.addLast(textBuffer);
224 private String sanitize(String s)
229 private void reportTextEvent(StringBuilder buffer)
231 reportTextEvent(buffer.toString());
234 private void reportTextEvent(String textData)
236 textData = sanitize(textData);
238 if (textData.length() > 0)
239 eventQueue.push(TreeEvent.newTextEvent(eventQueue.getContextNodeName(), textData));