1 /*--------------------------------------------------------------------------
2 * Copyright 2004 Taro L. Saito
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *--------------------------------------------------------------------------*/
16 //--------------------------------------
19 // StrongDataGuide.java
24 //--------------------------------------
25 package org.xerial.util.xml.index;
27 import static org.xmlpull.v1.XmlPullParser.END_DOCUMENT;
28 import static org.xmlpull.v1.XmlPullParser.END_TAG;
29 import static org.xmlpull.v1.XmlPullParser.START_TAG;
30 import static org.xmlpull.v1.XmlPullParser.TEXT;
32 import java.io.BufferedReader;
33 import java.io.FileNotFoundException;
34 import java.io.FileReader;
35 import java.io.IOException;
36 import java.io.OutputStream;
37 import java.io.PrintWriter;
38 import java.io.Reader;
39 import java.util.Collection;
40 import java.util.Stack;
42 import org.xerial.core.XerialException;
43 import org.xerial.util.StringUtil;
44 import org.xerial.util.cui.OptionParser;
45 import org.xerial.util.cui.OptionParserException;
46 import org.xerial.util.graph.AdjacencyList;
47 import org.xerial.util.graph.Edge;
48 import org.xerial.util.graph.Graph;
49 import org.xerial.util.log.Logger;
50 import org.xerial.util.xml.SinglePath;
51 import org.xerial.util.xml.XMLErrorCode;
52 import org.xerial.util.xml.XMLException;
53 import org.xerial.util.xml.pullparser.PullParserUtil;
54 import org.xmlpull.v1.XmlPullParser;
55 import org.xmlpull.v1.XmlPullParserException;
58 * StrongDataGuide, which aggregates XML nodes based on same paths
63 public class StrongDataGuide
65 private Graph<SinglePath, String> _graph = new AdjacencyList<SinglePath, String>();
66 private int _currentPathID = 0;
67 private Stack<Integer> _cursorHistory = new Stack<Integer>();
68 private SinglePath _currentPath;
69 private SinglePath _rootPath;
70 private int _rootPathID;
71 private Logger _logger = Logger.getLogger(StrongDataGuide.class);
76 public StrongDataGuide()
79 _rootPath = SinglePath.rootPath();
80 _rootPathID = _graph.addNode(_rootPath);
84 * Generates the StrongDataGuide from the XML file
87 * @throws FileNotFoundException
88 * @throws XMLParserException
89 * @throws XMLException
91 * @throws XerialException
93 public void generateFrom(String xmlFile) throws FileNotFoundException, XMLException, IOException, XerialException
95 Reader reader = new BufferedReader(new FileReader(xmlFile));
100 * Generates the {@link StrongDataGuide} from the {@link Reader}
103 * @throws FileNotFoundException
104 * @throws XMLParserException
106 public void generateFrom(Reader xmlReader) throws XMLException, IOException, XerialException
109 _currentPathID = _rootPathID;
110 _currentPath = _rootPath;
112 XmlPullParser parser = PullParserUtil.newParser(xmlReader);
116 while ((state = parser.next()) != END_DOCUMENT)
121 String name = parser.getName();
122 _logger.trace("start tag: " + name);
123 int pathID = getPathID(name);
124 _logger.trace("path ID : " + pathID);
126 // process attributes
127 for (int i = 0; i < parser.getAttributeCount(); i++)
129 int attributeID = getPathID(String.format("%s@%s", name, parser.getAttributeName(i)));
130 moveCursor(attributeID);
143 catch (XmlPullParserException e)
145 throw new XMLException(XMLErrorCode.PARSE_ERROR, e);
149 private void moveCursor(int pathID)
151 Collection<Integer> destNodeID = _graph.getDestNodeIDSetOf(_currentPathID);
152 if (!destNodeID.contains(pathID))
154 _graph.addEdge(new Edge(_currentPathID, pathID), "edge");
156 _cursorHistory.push(_currentPathID);
157 _currentPathID = pathID;
158 _currentPath = _graph.getNodeLabel(pathID);
161 private void traceBack()
163 assert !_cursorHistory.empty();
164 _currentPathID = _cursorHistory.pop();
165 _currentPath = _graph.getNodeLabel(_currentPathID);
168 private int getPathID(String tagName)
170 SinglePath path = new SinglePath(_currentPath, tagName);
171 int pathID = _graph.getNodeID(path);
174 pathID = _graph.addNode(path);
179 public void outputGraphviz(OutputStream out)
181 PrintWriter gout = new PrintWriter(out);
182 gout.println("digraph G {");
183 // output node labels
184 for (int pathID : _graph.getNodeIDSet())
186 SinglePath path = _graph.getNodeLabel(pathID);
187 gout.println(pathID + " [label=" + StringUtil.quote(path.getLeaf(), "\"") + "];");
189 for (int pathID : _graph.getNodeIDSet())
191 for (int destNodeID : _graph.getDestNodeIDSetOf(pathID))
193 gout.println(pathID + " -> " + destNodeID + ";");
201 public static enum Opt {
205 public static void main(String[] args) throws OptionParserException
207 OptionParser<Opt> opt = new OptionParser<Opt>();
208 opt.addOption(Opt.help, "h", "help", "display help messages");
211 if (opt.isSet(Opt.help) || opt.getArgumentLength() < 1)
213 printHelpMessage(opt);
217 StrongDataGuide sdg = new StrongDataGuide();
220 sdg.generateFrom(opt.getArgument(0));
221 sdg.outputGraphviz(System.out);
223 catch (XerialException e)
225 System.err.println(e.getMessage());
227 catch (IOException e)
229 System.err.println(e.getMessage());
233 private static void printHelpMessage(OptionParser<Opt> opt)
235 System.out.println("usage: > java -jar StrongDataGuide.jar [option] xml_file");
236 System.out.println(opt.helpMessage());