OSDN Git Service

5b905d5e330b8ced78c1e7fa819cb91283597f53
[xerial/xerial-core.git] / src / main / java / org / xerial / util / xml / XMLStreamReader.java
1 /*--------------------------------------------------------------------------\r
2  *  Copyright 2009 Taro L. Saito\r
3  *\r
4  *  Licensed under the Apache License, Version 2.0 (the "License");\r
5  *  you may not use this file except in compliance with the License.\r
6  *  You may obtain a copy of the License at\r
7  *\r
8  *     http://www.apache.org/licenses/LICENSE-2.0\r
9  *\r
10  *  Unless required by applicable law or agreed to in writing, software\r
11  *  distributed under the License is distributed on an "AS IS" BASIS,\r
12  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
13  *  See the License for the specific language governing permissions and\r
14  *  limitations under the License.\r
15  *--------------------------------------------------------------------------*/\r
16 //--------------------------------------\r
17 // XerialJ\r
18 //\r
19 // XMLStreamWalker.java\r
20 // Since: Mar 30, 2009 5:27:15 PM\r
21 //\r
22 // $URL: http://www.xerial.org/svn/project/XerialJ/trunk/xerial-core/src/main/java/org/xerial/util/xml/XMLStreamReader.java $\r
23 // $Author: leo $\r
24 //--------------------------------------\r
25 package org.xerial.util.xml;\r
26 \r
27 import static org.xmlpull.v1.XmlPullParser.*;\r
28 \r
29 import java.io.IOException;\r
30 import java.io.Reader;\r
31 \r
32 import org.xerial.core.XerialErrorCode;\r
33 import org.xerial.core.XerialException;\r
34 import org.xerial.util.ArrayDeque;\r
35 import org.xerial.util.Deque;\r
36 import org.xerial.util.tree.TreeEvent;\r
37 import org.xerial.util.tree.TreeStreamReader;\r
38 import org.xerial.util.tree.TreeEvent.EventType;\r
39 import org.xerial.util.xml.impl.TreeEventQueue;\r
40 import org.xerial.util.xml.pullparser.PullParserUtil;\r
41 import org.xmlpull.v1.XmlPullParser;\r
42 import org.xmlpull.v1.XmlPullParserException;\r
43 \r
44 /**\r
45  * {@link TreeStreamReader} of XML data\r
46  * \r
47  * @author leo\r
48  * \r
49  */\r
50 public class XMLStreamReader implements TreeStreamReader\r
51 {\r
52     private final XmlPullParser pullParser;\r
53     private final Deque<StringBuilder> textStack = new ArrayDeque<StringBuilder>();\r
54     private final static StringBuilder EMPTY_STRING = new StringBuilder(0);\r
55     private int TEXT_BUFFER_MAX = 8192;\r
56 \r
57     private int parseState = START_DOCUMENT;\r
58     private boolean convertValueAttribute = false;\r
59 \r
60     private final TreeEventQueue eventQueue = new TreeEventQueue();\r
61 \r
62     public XMLStreamReader(Reader reader)\r
63     {\r
64         if (reader == null)\r
65             throw new IllegalArgumentException("XML reader is null");\r
66 \r
67         pullParser = PullParserUtil.newParser(reader);\r
68 \r
69     }\r
70 \r
71     public void useValueAttributeAsNodeValue(boolean enable)\r
72     {\r
73         this.convertValueAttribute = enable;\r
74     }\r
75 \r
76     public TreeEvent peekNext() throws XerialException\r
77     {\r
78         if (!eventQueue.isEmpty())\r
79         {\r
80             return eventQueue.peekFirst();\r
81         }\r
82 \r
83         if (parseState == END_DOCUMENT)\r
84             return null;\r
85 \r
86         readNext();\r
87 \r
88         return peekNext();\r
89     }\r
90 \r
91     public TreeEvent next() throws XerialException\r
92     {\r
93         if (!eventQueue.isEmpty())\r
94         {\r
95             return eventQueue.pop();\r
96         }\r
97 \r
98         if (parseState == END_DOCUMENT)\r
99             return null;\r
100 \r
101         readNext();\r
102 \r
103         return next();\r
104     }\r
105 \r
106     public void readNext() throws XerialException\r
107     {\r
108         if (parseState == END_DOCUMENT)\r
109             return;\r
110 \r
111         try\r
112         {\r
113             parseState = pullParser.next();\r
114 \r
115             switch (parseState)\r
116             {\r
117             case START_TAG:\r
118             {\r
119                 textStack.addLast(EMPTY_STRING);\r
120                 String tagName = pullParser.getName();\r
121                 String immediateNodeValue = null;\r
122 \r
123                 Deque<TreeEvent> startEventQueue = new ArrayDeque<TreeEvent>(pullParser.getAttributeCount() + 1);\r
124                 // read attributes\r
125                 for (int i = 0; i < pullParser.getAttributeCount(); i++)\r
126                 {\r
127                     String attributeName = pullParser.getAttributeName(i);\r
128                     String attributeValue = pullParser.getAttributeValue(i);\r
129 \r
130                     // assign the value attribute as a node value of the start tag \r
131                     if (convertValueAttribute && attributeName.equals("value"))\r
132                     {\r
133                         immediateNodeValue = attributeValue;\r
134                         continue;\r
135                     }\r
136 \r
137                     startEventQueue.addLast(TreeEvent.newVisitEvent(attributeName, attributeValue));\r
138                     startEventQueue.addLast(TreeEvent.newLeaveEvent(attributeName));\r
139                 }\r
140 \r
141                 // push a new start tag event to the front of the queue\r
142                 startEventQueue.addFirst(TreeEvent.newVisitEvent(tagName, immediateNodeValue));\r
143                 eventQueue.push(startEventQueue);\r
144 \r
145                 // pre-fetch the next event\r
146                 readNext();\r
147             }\r
148                 break;\r
149             case END_TAG:\r
150             {\r
151                 if (textStack.getLast() == EMPTY_STRING)\r
152                 {\r
153                     eventQueue.push(TreeEvent.newLeaveEvent(pullParser.getName()));\r
154                 }\r
155                 else\r
156                 {\r
157                     StringBuilder textBuffer = textStack.getLast();\r
158                     if (!eventQueue.isEmpty() && eventQueue.peekLast().event == EventType.VISIT)\r
159                     {\r
160                         // attach the text value to the the previous visit event\r
161                         eventQueue.replaceLast(TreeEvent.newVisitEvent(pullParser.getName(), sanitize(textBuffer\r
162                                 .toString())));\r
163                     }\r
164                     else\r
165                         reportTextEvent(textBuffer);\r
166 \r
167                     eventQueue.push(TreeEvent.newLeaveEvent(pullParser.getName()));\r
168                 }\r
169                 textStack.removeLast();\r
170             }\r
171                 break;\r
172             case TEXT:\r
173             {\r
174                 String textData = pullParser.getText();\r
175                 StringBuilder textBuffer = textStack.getLast();\r
176 \r
177                 if (textData.length() <= 0)\r
178                     break;\r
179 \r
180                 if (textBuffer == EMPTY_STRING)\r
181                 {\r
182                     textBuffer = replaceLastTextBuffer();\r
183                 }\r
184                 else if (textBuffer.length() + textData.length() > TEXT_BUFFER_MAX)\r
185                 {\r
186                     // add the previous text data to the event queue\r
187                     reportTextEvent(textBuffer);\r
188 \r
189                     // replace the text buffer\r
190                     textBuffer = replaceLastTextBuffer();\r
191                 }\r
192                 textBuffer.append(textData);\r
193 \r
194                 boolean needPrefetch = eventQueue.isEmpty() ? false : eventQueue.peekLast().event == EventType.VISIT;\r
195                 if (needPrefetch)\r
196                     readNext();\r
197             }\r
198                 break;\r
199             default:\r
200                 // do nothing\r
201                 break;\r
202             }\r
203 \r
204         }\r
205         catch (XmlPullParserException e)\r
206         {\r
207             throw new XerialException(XMLErrorCode.PARSE_ERROR, e);\r
208         }\r
209         catch (IOException e)\r
210         {\r
211             throw new XerialException(XerialErrorCode.IO_EXCEPTION, e);\r
212         }\r
213 \r
214     }\r
215 \r
216     private StringBuilder replaceLastTextBuffer()\r
217     {\r
218         textStack.removeLast();\r
219         StringBuilder textBuffer = new StringBuilder();\r
220         textStack.addLast(textBuffer);\r
221         return textBuffer;\r
222     }\r
223 \r
224     private String sanitize(String s)\r
225     {\r
226         return s.trim();\r
227     }\r
228 \r
229     private void reportTextEvent(StringBuilder buffer)\r
230     {\r
231         reportTextEvent(buffer.toString());\r
232     }\r
233 \r
234     private void reportTextEvent(String textData)\r
235     {\r
236         textData = sanitize(textData);\r
237 \r
238         if (textData.length() > 0)\r
239             eventQueue.push(TreeEvent.newTextEvent(eventQueue.getContextNodeName(), textData));\r
240     }\r
241 \r
242 }\r