OSDN Git Service

1011d7020777c492229724bad4bdef5727e2c6b8
[xerial/xerial-core.git] / src / main / java / org / xerial / util / xml / index / StrongDataGuide.java
1 /*--------------------------------------------------------------------------
2  *  Copyright 2004 Taro L. Saito
3  *
4  *  Licensed under the Apache License, Version 2.0 (the "License");
5  *  you may not use this file except in compliance with the License.
6  *  You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  *  Unless required by applicable law or agreed to in writing, software
11  *  distributed under the License is distributed on an "AS IS" BASIS,
12  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  *  See the License for the specific language governing permissions and
14  *  limitations under the License.
15  *--------------------------------------------------------------------------*/
16 //--------------------------------------
17 // XerialJ Project
18 //
19 // StrongDataGuide.java
20 // Since: 2005/06/02
21 //
22 // $URL$ 
23 // $Author$
24 //--------------------------------------
25 package org.xerial.util.xml.index;
26
27 import static org.xmlpull.v1.XmlPullParser.END_DOCUMENT;
28 import static org.xmlpull.v1.XmlPullParser.END_TAG;
29 import static org.xmlpull.v1.XmlPullParser.START_TAG;
30 import static org.xmlpull.v1.XmlPullParser.TEXT;
31
32 import java.io.BufferedReader;
33 import java.io.FileNotFoundException;
34 import java.io.FileReader;
35 import java.io.IOException;
36 import java.io.OutputStream;
37 import java.io.PrintWriter;
38 import java.io.Reader;
39 import java.util.Collection;
40 import java.util.Stack;
41
42 import org.xerial.core.XerialException;
43 import org.xerial.util.StringUtil;
44 import org.xerial.util.cui.OptionParser;
45 import org.xerial.util.cui.OptionParserException;
46 import org.xerial.util.graph.AdjacencyList;
47 import org.xerial.util.graph.Edge;
48 import org.xerial.util.graph.Graph;
49 import org.xerial.util.log.Logger;
50 import org.xerial.util.xml.SinglePath;
51 import org.xerial.util.xml.XMLErrorCode;
52 import org.xerial.util.xml.XMLException;
53 import org.xerial.util.xml.pullparser.PullParserUtil;
54 import org.xmlpull.v1.XmlPullParser;
55 import org.xmlpull.v1.XmlPullParserException;
56
57 /**
58  * StrongDataGuide, which aggregates XML nodes based on same paths
59  * 
60  * @author leo
61  * 
62  */
63 public class StrongDataGuide
64 {
65     private Graph<SinglePath, String> _graph         = new AdjacencyList<SinglePath, String>();
66     private int                       _currentPathID = 0;
67     private Stack<Integer>            _cursorHistory = new Stack<Integer>();
68     private SinglePath                _currentPath;
69     private SinglePath                _rootPath;
70     private int                       _rootPathID;
71     private Logger                    _logger        = Logger.getLogger(StrongDataGuide.class);
72
73     /**
74      * 
75      */
76     public StrongDataGuide()
77     {
78         super();
79         _rootPath = SinglePath.rootPath();
80         _rootPathID = _graph.addNode(_rootPath);
81     }
82
83     /**
84      * Generates the StrongDataGuide from the XML file
85      * 
86      * @param xmlFile
87      * @throws FileNotFoundException
88      * @throws XMLParserException
89      * @throws XMLException
90      * @throws IOException
91      * @throws XerialException
92      */
93     public void generateFrom(String xmlFile) throws FileNotFoundException, XMLException, IOException, XerialException
94     {
95         Reader reader = new BufferedReader(new FileReader(xmlFile));
96         generateFrom(reader);
97     }
98
99     /**
100      * Generates the {@link StrongDataGuide} from the {@link Reader}
101      * 
102      * @param xmlReader
103      * @throws FileNotFoundException
104      * @throws XMLParserException
105      */
106     public void generateFrom(Reader xmlReader) throws XMLException, IOException, XerialException
107     {
108         // initialize
109         _currentPathID = _rootPathID;
110         _currentPath = _rootPath;
111
112         XmlPullParser parser = PullParserUtil.newParser(xmlReader);
113         try
114         {
115             int state;
116             while ((state = parser.next()) != END_DOCUMENT)
117             {
118                 switch (state)
119                 {
120                 case START_TAG:
121                     String name = parser.getName();
122                     _logger.trace("start tag: " + name);
123                     int pathID = getPathID(name);
124                     _logger.trace("path ID  : " + pathID);
125                     moveCursor(pathID);
126                     // process attributes
127                     for (int i = 0; i < parser.getAttributeCount(); i++)
128                     {
129                         int attributeID = getPathID(String.format("%s@%s", name, parser.getAttributeName(i)));
130                         moveCursor(attributeID);
131                         traceBack();
132                     }
133                     break;
134                 case END_TAG:
135                     traceBack();
136                     break;
137                 case TEXT:
138                     break;
139                 }
140             }
141
142         }
143         catch (XmlPullParserException e)
144         {
145             throw new XMLException(XMLErrorCode.PARSE_ERROR, e);
146         }
147     }
148
149     private void moveCursor(int pathID)
150     {
151         Collection<Integer> destNodeID = _graph.getDestNodeIDSetOf(_currentPathID);
152         if (!destNodeID.contains(pathID))
153         {
154             _graph.addEdge(new Edge(_currentPathID, pathID), "edge");
155         }
156         _cursorHistory.push(_currentPathID);
157         _currentPathID = pathID;
158         _currentPath = _graph.getNodeLabel(pathID);
159     }
160
161     private void traceBack()
162     {
163         assert !_cursorHistory.empty();
164         _currentPathID = _cursorHistory.pop();
165         _currentPath = _graph.getNodeLabel(_currentPathID);
166     }
167
168     private int getPathID(String tagName)
169     {
170         SinglePath path = new SinglePath(_currentPath, tagName);
171         int pathID = _graph.getNodeID(path);
172         if (pathID == -1)
173         {
174             pathID = _graph.addNode(path);
175         }
176         return pathID;
177     }
178
179     public void outputGraphviz(OutputStream out)
180     {
181         PrintWriter gout = new PrintWriter(out);
182         gout.println("digraph G {");
183         // output node labels
184         for (int pathID : _graph.getNodeIDSet())
185         {
186             SinglePath path = _graph.getNodeLabel(pathID);
187             gout.println(pathID + " [label=" + StringUtil.quote(path.getLeaf(), "\"") + "];");
188         }
189         for (int pathID : _graph.getNodeIDSet())
190         {
191             for (int destNodeID : _graph.getDestNodeIDSetOf(pathID))
192             {
193                 gout.println(pathID + " -> " + destNodeID + ";");
194             }
195         }
196
197         gout.println("}");
198         gout.flush();
199     }
200
201     public static enum Opt {
202         help
203     }
204
205     public static void main(String[] args) throws OptionParserException
206     {
207         OptionParser<Opt> opt = new OptionParser<Opt>();
208         opt.addOption(Opt.help, "h", "help", "display help messages");
209
210         opt.parse(args);
211         if (opt.isSet(Opt.help) || opt.getArgumentLength() < 1)
212         {
213             printHelpMessage(opt);
214             return;
215         }
216
217         StrongDataGuide sdg = new StrongDataGuide();
218         try
219         {
220             sdg.generateFrom(opt.getArgument(0));
221             sdg.outputGraphviz(System.out);
222         }
223         catch (XerialException e)
224         {
225             System.err.println(e.getMessage());
226         }
227         catch (IOException e)
228         {
229             System.err.println(e.getMessage());
230         }
231     }
232
233     private static void printHelpMessage(OptionParser<Opt> opt)
234     {
235         System.out.println("usage: > java -jar StrongDataGuide.jar [option] xml_file");
236         System.out.println(opt.helpMessage());
237
238     }
239
240 }