+++ /dev/null
-/*--------------------------------------------------------------------------\r
- * Copyright 2009 Taro L. Saito\r
- *\r
- * Licensed under the Apache License, Version 2.0 (the "License");\r
- * you may not use this file except in compliance with the License.\r
- * You may obtain a copy of the License at\r
- *\r
- * http://www.apache.org/licenses/LICENSE-2.0\r
- *\r
- * Unless required by applicable law or agreed to in writing, software\r
- * distributed under the License is distributed on an "AS IS" BASIS,\r
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
- * See the License for the specific language governing permissions and\r
- * limitations under the License.\r
- *--------------------------------------------------------------------------*/\r
-//--------------------------------------\r
-// XerialJ\r
-//\r
-// Scan.java\r
-// Since: Apr 23, 2009 6:50:10 PM\r
-//\r
-// $URL: http://www.xerial.org/svn/project/XerialJ/trunk/xerial-core/src/main/java/org/xerial/silk/cui/Scan.java $\r
-// $Author: leo $\r
-//--------------------------------------\r
-package org.xerial.silk.cui;\r
-\r
-import java.io.BufferedReader;\r
-import java.io.File;\r
-import java.io.FileReader;\r
-\r
-import org.xerial.lens.ObjectLens;\r
-import org.xerial.silk.SilkEvent;\r
-import org.xerial.silk.SilkEventHandler;\r
-import org.xerial.silk.SilkEventType;\r
-import org.xerial.silk.SilkLineFastParser;\r
-import org.xerial.silk.SilkLinePushParser;\r
-import org.xerial.silk.SilkParser;\r
-import org.xerial.silk.SilkParserConfig;\r
-import org.xerial.util.StopWatch;\r
-import org.xerial.util.log.Logger;\r
-import org.xerial.util.opt.Argument;\r
-import org.xerial.util.opt.Option;\r
-import org.xerial.util.opt.Usage;\r
-import org.xerial.util.tree.TreeEventHandlerBase;\r
-\r
-/**\r
- * Scan command\r
- * \r
- * @author leo\r
- * \r
- */\r
-@Usage(command = "silk scan", description = "silk file scanner")\r
-public class Scan implements SilkCommand\r
-{\r
- private static Logger _logger = Logger.getLogger(Scan.class);\r
-\r
- public static enum ScanMode {\r
- LINE, NODE, FASTLINE, READONLY\r
- }\r
-\r
- @Argument(index = 0)\r
- private String inputSilkFile = null;\r
-\r
- @Option(symbol = "m", longName = "mode", description = "scan mode: line, fastline, node, readonly")\r
- private ScanMode mode = ScanMode.NODE;\r
-\r
- @Option(symbol = "b", longName = "buffer", description = "buffer size in MB (default = 1)")\r
- private int bufferSizeInMB = 1;\r
-\r
- @Option(symbol = "n", longName = "thread", description = "num workder threads")\r
- private int numThreads = 2;\r
-\r
- @Option(symbol = "c", longName = "lines", description = "num assigned lines for each worker threads")\r
- private int numLines = 1000;\r
-\r
- private void reportReadSpeed(double time, long fileSize)\r
- {\r
- double speedInMBS = fileSize / 1024 / 1024 / time;\r
- _logger.info(String.format("\ntime=%.2f, %3.2f MB/s", time, speedInMBS));\r
-\r
- }\r
-\r
- private void reportLinesPerSec(double time, long lineCount)\r
- {\r
- double speed = lineCount / time;\r
- System.err.print(String.format("time=%5.2f line=%,10d %,10.0f lines/s\r", time, lineCount, speed));\r
- }\r
-\r
- public void execute() throws Exception\r
- {\r
- File f = new File(inputSilkFile);\r
- final long fileSize = f.length();\r
-\r
- final SilkParserConfig config = new SilkParserConfig();\r
- config.bufferSize = bufferSizeInMB * 1024 * 1024;\r
- config.numWorkers = numThreads;\r
- config.numLinesInBlock = numLines;\r
-\r
- _logger.info("config: " + ObjectLens.toJSON(config));\r
-\r
- switch (mode)\r
- {\r
- case NODE:\r
- {\r
- SilkParser parser = new SilkParser(f.toURL(), config);\r
-\r
- parser.parse(new TreeEventHandlerBase() {\r
-\r
- int count = 0;\r
- StopWatch timer = new StopWatch();\r
-\r
- @Override\r
- public void init() throws Exception\r
- {\r
- timer.reset();\r
- }\r
-\r
- @Override\r
- public void visitNode(String nodeName, String immediateNodeValue) throws Exception\r
- {\r
- count++;\r
- if (count % 1000000 == 0)\r
- {\r
- double time = timer.getElapsedTime();\r
- double speed = count / time;\r
- System.err.print(String.format("node=%,15d time=%5.2f %,10.0f nodes/s\r", count, time, speed));\r
- }\r
-\r
- }\r
-\r
- @Override\r
- public void finish() throws Exception\r
- {\r
- double time = timer.getElapsedTime();\r
- double speedPerNode = ((double) count) / time;\r
- double speedInMBS = fileSize / 1024 / 1024 / time;\r
- _logger.info(String.format("\ntime=%.2f %,10.0f nodes/s, %3.2f MB/s", time, speedPerNode,\r
- speedInMBS));\r
- }\r
-\r
- });\r
- break;\r
- }\r
- case LINE:\r
- {\r
- SilkLinePushParser parser = new SilkLinePushParser(f.toURL(), config);\r
- parser.parse(new SilkEventHandler() {\r
-\r
- int lineCount = 0;\r
- StopWatch timer = new StopWatch();\r
-\r
- public void handle(SilkEvent event) throws Exception\r
- {\r
- if (event.getType() == SilkEventType.END_OF_FILE)\r
- {\r
- reportReadSpeed(timer.getElapsedTime(), fileSize);\r
- return;\r
- }\r
-\r
- lineCount++;\r
- if (lineCount % 100000 == 0)\r
- {\r
- reportLinesPerSec(timer.getElapsedTime(), lineCount);\r
- }\r
-\r
- }\r
- });\r
-\r
- break;\r
- }\r
- case FASTLINE:\r
- {\r
- SilkLineFastParser parser = new SilkLineFastParser(f.toURL(), config);\r
- parser.parse(new SilkEventHandler() {\r
-\r
- int lineCount = 0;\r
- StopWatch timer = new StopWatch();\r
-\r
- public void handle(SilkEvent event) throws Exception\r
- {\r
- if (event.getType() == SilkEventType.END_OF_FILE)\r
- {\r
- reportReadSpeed(timer.getElapsedTime(), fileSize);\r
- return;\r
- }\r
-\r
- lineCount++;\r
- if (lineCount % 100000 == 0)\r
- {\r
- reportLinesPerSec(timer.getElapsedTime(), lineCount);\r
- }\r
-\r
- }\r
- });\r
-\r
- break;\r
- }\r
- case READONLY:\r
- {\r
- BufferedReader reader = new BufferedReader(new FileReader(f), config.bufferSize);\r
- String line;\r
-\r
- int lineCount = 0;\r
- StopWatch timer = new StopWatch();\r
-\r
- while ((line = reader.readLine()) != null)\r
- {\r
- lineCount++;\r
- if (lineCount % 100000 == 0)\r
- {\r
- reportLinesPerSec(timer.getElapsedTime(), lineCount);\r
- }\r
-\r
- }\r
-\r
- reportReadSpeed(timer.getElapsedTime(), fileSize);\r
-\r
- break;\r
- }\r
- }\r
-\r
- }\r
-\r
- public String getName()\r
- {\r
- return "scan";\r
- }\r
-\r
- public String getOneLineDescription()\r
- {\r
- return "scanning the input Silk file";\r
- }\r
-\r
-}\r