OSDN Git Service

how to mirror to SourceForge.jp
[xerial/xerial-core.git] / src / main / java / org / xerial / util / xml / XMLStreamReader.java
1 /*--------------------------------------------------------------------------
2  *  Copyright 2009 Taro L. Saito
3  *
4  *  Licensed under the Apache License, Version 2.0 (the "License");
5  *  you may not use this file except in compliance with the License.
6  *  You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  *  Unless required by applicable law or agreed to in writing, software
11  *  distributed under the License is distributed on an "AS IS" BASIS,
12  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  *  See the License for the specific language governing permissions and
14  *  limitations under the License.
15  *--------------------------------------------------------------------------*/
16 //--------------------------------------
17 // XerialJ
18 //
19 // XMLStreamWalker.java
20 // Since: Mar 30, 2009 5:27:15 PM
21 //
22 // $URL$
23 // $Author$
24 //--------------------------------------
25 package org.xerial.util.xml;
26
27 import static org.xmlpull.v1.XmlPullParser.*;
28
29 import java.io.IOException;
30 import java.io.Reader;
31
32 import org.xerial.core.XerialErrorCode;
33 import org.xerial.core.XerialException;
34 import org.xerial.util.ArrayDeque;
35 import org.xerial.util.Deque;
36 import org.xerial.util.tree.TreeEvent;
37 import org.xerial.util.tree.TreeStreamReader;
38 import org.xerial.util.tree.TreeEvent.EventType;
39 import org.xerial.util.xml.impl.TreeEventQueue;
40 import org.xerial.util.xml.pullparser.PullParserUtil;
41 import org.xmlpull.v1.XmlPullParser;
42 import org.xmlpull.v1.XmlPullParserException;
43
44 /**
45  * {@link TreeStreamReader} of XML data
46  * 
47  * @author leo
48  * 
49  */
50 public class XMLStreamReader implements TreeStreamReader
51 {
52     private final XmlPullParser pullParser;
53     private final Deque<StringBuilder> textStack = new ArrayDeque<StringBuilder>();
54     private final static StringBuilder EMPTY_STRING = new StringBuilder(0);
55     private int TEXT_BUFFER_MAX = 8192;
56
57     private int parseState = START_DOCUMENT;
58     private boolean convertValueAttribute = false;
59
60     private final TreeEventQueue eventQueue = new TreeEventQueue();
61
62     public XMLStreamReader(Reader reader)
63     {
64         if (reader == null)
65             throw new IllegalArgumentException("XML reader is null");
66
67         pullParser = PullParserUtil.newParser(reader);
68
69     }
70
71     public void useValueAttributeAsNodeValue(boolean enable)
72     {
73         this.convertValueAttribute = enable;
74     }
75
76     public TreeEvent peekNext() throws XerialException
77     {
78         if (!eventQueue.isEmpty())
79         {
80             return eventQueue.peekFirst();
81         }
82
83         if (parseState == END_DOCUMENT)
84             return null;
85
86         readNext();
87
88         return peekNext();
89     }
90
91     public TreeEvent next() throws XerialException
92     {
93         if (!eventQueue.isEmpty())
94         {
95             return eventQueue.pop();
96         }
97
98         if (parseState == END_DOCUMENT)
99             return null;
100
101         readNext();
102
103         return next();
104     }
105
106     public void readNext() throws XerialException
107     {
108         if (parseState == END_DOCUMENT)
109             return;
110
111         try
112         {
113             parseState = pullParser.next();
114
115             switch (parseState)
116             {
117             case START_TAG:
118             {
119                 textStack.addLast(EMPTY_STRING);
120                 String tagName = pullParser.getName();
121                 String immediateNodeValue = null;
122
123                 Deque<TreeEvent> startEventQueue = new ArrayDeque<TreeEvent>(pullParser.getAttributeCount() + 1);
124                 // read attributes
125                 for (int i = 0; i < pullParser.getAttributeCount(); i++)
126                 {
127                     String attributeName = pullParser.getAttributeName(i);
128                     String attributeValue = pullParser.getAttributeValue(i);
129
130                     // assign the value attribute as a node value of the start tag 
131                     if (convertValueAttribute && attributeName.equals("value"))
132                     {
133                         immediateNodeValue = attributeValue;
134                         continue;
135                     }
136
137                     startEventQueue.addLast(TreeEvent.newVisitEvent(attributeName, attributeValue));
138                     startEventQueue.addLast(TreeEvent.newLeaveEvent(attributeName));
139                 }
140
141                 // push a new start tag event to the front of the queue
142                 startEventQueue.addFirst(TreeEvent.newVisitEvent(tagName, immediateNodeValue));
143                 eventQueue.push(startEventQueue);
144
145                 // pre-fetch the next event
146                 readNext();
147             }
148                 break;
149             case END_TAG:
150             {
151                 if (textStack.getLast() == EMPTY_STRING)
152                 {
153                     eventQueue.push(TreeEvent.newLeaveEvent(pullParser.getName()));
154                 }
155                 else
156                 {
157                     StringBuilder textBuffer = textStack.getLast();
158                     if (!eventQueue.isEmpty() && eventQueue.peekLast().event == EventType.VISIT)
159                     {
160                         // attach the text value to the the previous visit event
161                         eventQueue.replaceLast(TreeEvent.newVisitEvent(pullParser.getName(), sanitize(textBuffer
162                                 .toString())));
163                     }
164                     else
165                         reportTextEvent(textBuffer);
166
167                     eventQueue.push(TreeEvent.newLeaveEvent(pullParser.getName()));
168                 }
169                 textStack.removeLast();
170             }
171                 break;
172             case TEXT:
173             {
174                 String textData = pullParser.getText();
175                 StringBuilder textBuffer = textStack.getLast();
176
177                 if (textData.length() <= 0)
178                     break;
179
180                 if (textBuffer == EMPTY_STRING)
181                 {
182                     textBuffer = replaceLastTextBuffer();
183                 }
184                 else if (textBuffer.length() + textData.length() > TEXT_BUFFER_MAX)
185                 {
186                     // add the previous text data to the event queue
187                     reportTextEvent(textBuffer);
188
189                     // replace the text buffer
190                     textBuffer = replaceLastTextBuffer();
191                 }
192                 textBuffer.append(textData);
193
194                 boolean needPrefetch = eventQueue.isEmpty() ? false : eventQueue.peekLast().event == EventType.VISIT;
195                 if (needPrefetch)
196                     readNext();
197             }
198                 break;
199             default:
200                 // do nothing
201                 break;
202             }
203
204         }
205         catch (XmlPullParserException e)
206         {
207             throw new XerialException(XMLErrorCode.PARSE_ERROR, e);
208         }
209         catch (IOException e)
210         {
211             throw new XerialException(XerialErrorCode.IO_EXCEPTION, e);
212         }
213
214     }
215
216     private StringBuilder replaceLastTextBuffer()
217     {
218         textStack.removeLast();
219         StringBuilder textBuffer = new StringBuilder();
220         textStack.addLast(textBuffer);
221         return textBuffer;
222     }
223
224     private String sanitize(String s)
225     {
226         return s.trim();
227     }
228
229     private void reportTextEvent(StringBuilder buffer)
230     {
231         reportTextEvent(buffer.toString());
232     }
233
234     private void reportTextEvent(String textData)
235     {
236         textData = sanitize(textData);
237
238         if (textData.length() > 0)
239             eventQueue.push(TreeEvent.newTextEvent(eventQueue.getContextNodeName(), textData));
240     }
241
242 }