+++ /dev/null
-/*--------------------------------------------------------------------------\r
- * Copyright 2009 Taro L. Saito\r
- *\r
- * Licensed under the Apache License, Version 2.0 (the "License");\r
- * you may not use this file except in compliance with the License.\r
- * You may obtain a copy of the License at\r
- *\r
- * http://www.apache.org/licenses/LICENSE-2.0\r
- *\r
- * Unless required by applicable law or agreed to in writing, software\r
- * distributed under the License is distributed on an "AS IS" BASIS,\r
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
- * See the License for the specific language governing permissions and\r
- * limitations under the License.\r
- *--------------------------------------------------------------------------*/\r
-//--------------------------------------\r
-// XerialJ\r
-//\r
-// XMLTreeParser.java\r
-// Since: Jun 5, 2009 6:55:11 PM\r
-//\r
-// $URL: http://www.xerial.org/svn/project/XerialJ/trunk/xerial-core/src/main/java/org/xerial/util/xml/XMLTreeParser.java $\r
-// $Author: leo $\r
-//--------------------------------------\r
-package org.xerial.util.xml;\r
-\r
-import static org.xmlpull.v1.XmlPullParser.*;\r
-\r
-import java.io.Reader;\r
-\r
-import javax.xml.parsers.SAXParser;\r
-import javax.xml.parsers.SAXParserFactory;\r
-\r
-import org.xerial.core.XerialErrorCode;\r
-import org.xerial.core.XerialException;\r
-import org.xerial.util.ArrayDeque;\r
-import org.xerial.util.Deque;\r
-import org.xerial.util.tree.TreeEvent;\r
-import org.xerial.util.tree.TreeEventHandler;\r
-import org.xerial.util.tree.TreeParser;\r
-import org.xerial.util.tree.TreeEvent.EventType;\r
-import org.xerial.util.xml.impl.TreeEventQueue;\r
-import org.xml.sax.Attributes;\r
-import org.xml.sax.ContentHandler;\r
-import org.xml.sax.InputSource;\r
-import org.xml.sax.Locator;\r
-import org.xml.sax.SAXException;\r
-import org.xml.sax.XMLReader;\r
-\r
-public class XMLTreeParser implements TreeParser\r
-{\r
- private final Deque<StringBuilder> textStack = new ArrayDeque<StringBuilder>();\r
- private final static StringBuilder EMPTY_STRING = new StringBuilder(0);\r
- private int TEXT_BUFFER_MAX = 8192;\r
-\r
- private int parseState = START_DOCUMENT;\r
- private boolean convertValueAttribute = false;\r
-\r
- private final TreeEventQueue eventQueue = new TreeEventQueue();\r
-\r
- private final Reader input;\r
- private final XMLReader xmlReader;\r
-\r
- public XMLTreeParser(Reader reader) throws XerialException\r
- {\r
- try\r
- {\r
- this.input = reader;\r
- SAXParserFactory spf = SAXParserFactory.newInstance();\r
-\r
- // Set namespaceAware to true to get a parser that corresponds to\r
- // the default SAX2 namespace feature setting. This is necessary\r
- // because the default value from JAXP 1.0 was defined to be false.\r
- spf.setNamespaceAware(true);\r
-\r
- // Validation part 1: set whether validation is on\r
- //spf.setValidating(dtdValidate || xsdValidate);\r
-\r
- // Create a JAXP SAXParser\r
- SAXParser saxParser = spf.newSAXParser();\r
-\r
- // Get the encapsulated SAX XMLReader\r
- xmlReader = saxParser.getXMLReader();\r
- // Set the ContentHandler of the XMLReader\r
- xmlReader.setContentHandler(new SAXHandler());\r
-\r
- }\r
- catch (Exception e)\r
- {\r
- throw new XerialException(XerialErrorCode.INVALID_STATE, "failed to instantiate the XML parser: " + e);\r
- }\r
-\r
- }\r
-\r
- private void flushEvent() throws Exception\r
- {\r
- while (!eventQueue.isEmpty())\r
- {\r
- TreeEvent e = eventQueue.pop();\r
- switch (e.event)\r
- {\r
- case VISIT:\r
- handler.visitNode(e.nodeName, e.nodeValue);\r
- break;\r
- case LEAVE:\r
- handler.leaveNode(e.nodeName);\r
- break;\r
- case TEXT:\r
- handler.text(e.nodeName, e.nodeValue);\r
- break;\r
- }\r
-\r
- }\r
- }\r
-\r
- private class SAXHandler implements ContentHandler\r
- {\r
-\r
- public void characters(char[] ch, int start, int length) throws SAXException\r
- {\r
- try\r
- {\r
- String textData = new String(ch, start, length);\r
- StringBuilder textBuffer = textStack.getLast();\r
-\r
- if (textData.length() <= 0)\r
- return;\r
-\r
- if (textBuffer == EMPTY_STRING)\r
- {\r
- textBuffer = replaceLastTextBuffer();\r
- }\r
- else if (textBuffer.length() + textData.length() > TEXT_BUFFER_MAX)\r
- {\r
- // add the previous text data to the event queue\r
- reportTextEvent(textBuffer);\r
-\r
- // replace the text buffer\r
- textBuffer = replaceLastTextBuffer();\r
- }\r
- textBuffer.append(textData);\r
-\r
- boolean needPrefetch = eventQueue.isEmpty() ? false : eventQueue.peekLast().event == EventType.VISIT;\r
-\r
- if (!needPrefetch)\r
- flushEvent();\r
-\r
- }\r
- catch (Exception e)\r
- {\r
- throw new SAXException(e);\r
- }\r
-\r
- }\r
-\r
- public void endDocument() throws SAXException\r
- {\r
- try\r
- {\r
- flushEvent();\r
- handler.finish();\r
- }\r
- catch (Exception e)\r
- {\r
- throw new SAXException(e);\r
- }\r
- }\r
-\r
- public void endElement(String uri, String localName, String name) throws SAXException\r
- {\r
- try\r
- {\r
- if (textStack.getLast() == EMPTY_STRING)\r
- {\r
- eventQueue.push(TreeEvent.newLeaveEvent(name));\r
- }\r
- else\r
- {\r
- StringBuilder textBuffer = textStack.getLast();\r
- if (!eventQueue.isEmpty() && eventQueue.peekLast().event == EventType.VISIT)\r
- {\r
- // attach the text value to the the previous visit event\r
- eventQueue.replaceLast(TreeEvent.newVisitEvent(name, sanitize(textBuffer.toString())));\r
- }\r
- else\r
- reportTextEvent(textBuffer);\r
-\r
- eventQueue.push(TreeEvent.newLeaveEvent(name));\r
- }\r
- textStack.removeLast();\r
-\r
- flushEvent();\r
- }\r
- catch (Exception e)\r
- {\r
- throw new SAXException(e);\r
- }\r
-\r
- }\r
-\r
- public void endPrefixMapping(String prefix) throws SAXException\r
- {\r
-\r
- }\r
-\r
- public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException\r
- {\r
-\r
- }\r
-\r
- public void processingInstruction(String target, String data) throws SAXException\r
- {\r
-\r
- }\r
-\r
- public void setDocumentLocator(Locator locator)\r
- {\r
-\r
- }\r
-\r
- public void skippedEntity(String name) throws SAXException\r
- {\r
-\r
- }\r
-\r
- public void startDocument() throws SAXException\r
- {\r
- try\r
- {\r
- handler.init();\r
- }\r
- catch (Exception e)\r
- {\r
- throw new SAXException(e);\r
- }\r
-\r
- }\r
-\r
- public void startElement(String uri, String localName, String name, Attributes atts) throws SAXException\r
- {\r
- textStack.addLast(EMPTY_STRING);\r
- String tagName = localName;\r
- String immediateNodeValue = null;\r
-\r
- Deque<TreeEvent> startEventQueue = new ArrayDeque<TreeEvent>(atts.getLength());\r
- // read attributes\r
- for (int i = 0; i < atts.getLength(); i++)\r
- {\r
- String attributeName = atts.getQName(i);\r
- String attributeValue = atts.getValue(i);\r
-\r
- // assign the value attribute as a node value of the start tag \r
- if (convertValueAttribute && attributeName.equals("value"))\r
- {\r
- immediateNodeValue = attributeValue;\r
- continue;\r
- }\r
-\r
- startEventQueue.addLast(TreeEvent.newVisitEvent(attributeName, attributeValue));\r
- startEventQueue.addLast(TreeEvent.newLeaveEvent(attributeName));\r
- }\r
-\r
- // push a new start tag event to the front of the queue\r
- startEventQueue.addFirst(TreeEvent.newVisitEvent(tagName, immediateNodeValue));\r
- eventQueue.push(startEventQueue);\r
- }\r
-\r
- public void startPrefixMapping(String prefix, String uri) throws SAXException\r
- {\r
- // TODO Auto-generated method stub\r
-\r
- }\r
-\r
- }\r
-\r
- private TreeEventHandler handler;\r
-\r
- public void parse(TreeEventHandler handler) throws Exception\r
- {\r
- this.handler = handler;\r
- xmlReader.parse(new InputSource(input));\r
- }\r
-\r
- private StringBuilder replaceLastTextBuffer()\r
- {\r
- textStack.removeLast();\r
- StringBuilder textBuffer = new StringBuilder();\r
- textStack.addLast(textBuffer);\r
- return textBuffer;\r
- }\r
-\r
- private String sanitize(String s)\r
- {\r
- return s.trim();\r
- }\r
-\r
- private void reportTextEvent(StringBuilder buffer)\r
- {\r
- reportTextEvent(buffer.toString());\r
- }\r
-\r
- private void reportTextEvent(String textData)\r
- {\r
- textData = sanitize(textData);\r
-\r
- if (textData.length() > 0)\r
- eventQueue.push(TreeEvent.newTextEvent(eventQueue.getContextNodeName(), textData));\r
- }\r
-\r
-}\r