From b51bb4b7741ad72767cc5f4a85941358f3a0ffe3 Mon Sep 17 00:00:00 2001 From: leo Date: Thu, 2 Jul 2009 06:22:10 +0000 Subject: [PATCH] git-svn-id: http://www.xerial.org/svn/project/XerialJ/trunk/xerial-core@3403 ae02f08e-27ec-0310-ae8c-8ba02fe2eafd --- .../java/org/xerial/silk/schema/impl/SilkSchema.g | 54 ++++++++++-- .../java/org/xerial/silk/RelationExtractTest.java | 97 ++++++++++++++++++++++ src/test/java/org/xerial/silk/schema/schema.silk | 45 ++++++---- src/test/java/org/xerial/silk/sequence.silk | 3 + 4 files changed, 177 insertions(+), 22 deletions(-) create mode 100644 src/test/java/org/xerial/silk/RelationExtractTest.java diff --git a/src/main/java/org/xerial/silk/schema/impl/SilkSchema.g b/src/main/java/org/xerial/silk/schema/impl/SilkSchema.g index 2fe43b3..dda0a9a 100644 --- a/src/main/java/org/xerial/silk/schema/impl/SilkSchema.g +++ b/src/main/java/org/xerial/silk/schema/impl/SilkSchema.g @@ -95,13 +95,21 @@ package org.xerial.silk.schema.impl; } -// lexer rules +// lexer rules + +// Line Comment +fragment LineBreakChar: '\n' | '\r'; // r: n : +LineComment: '#' ~(LineBreakChar)* { $channel=HIDDEN; }; + + +LineBreak: ('\r' '\n' | '\r' | '\n' ); WhiteSpace: (' ' | '\r' | '\t' | '\u000C' | '\n') { $channel=HIDDEN; }; fragment Digit: '0' .. '9'; fragment Letter: 'A' .. 'F' | 'a' .. 'f'; fragment HexDigit: Digit | Letter; +fragment NonWhiteUnicodeChar: ~('"' | '\\' | fragment UnicodeChar: ~('"'| '\\'); fragment EscapeSequence : '\\' ('\"' | '\\' | '/' | 'b' | 'f' | 'n' | 'r' | 't' | 'u' HexDigit HexDigit HexDigit HexDigit) @@ -113,7 +121,6 @@ fragment StringChar_s: StringChar*; String: '"' s=StringChar_s '"' { setText($s.text); }; -Colon: ':'; Comma: ','; Integer: '-'? ('0' | '1'..'9' Digit*); fragment Frac: '.' Digit+; @@ -125,18 +132,55 @@ RBrace: '}' ; LBracket: '[' ; RBracket: ']' ; +Lt: '<'; +Eq: '='; + LParen: '('; RParen: ')'; +Star: '*'; + +fragment +UnsafeUnicodeChar: '(' | ')' | [' | ']' | '{' | '}' | ',' | ':' | '#' | '<' | '>' | '|' | '*' | '\'' | '"' | '@' | '%' | '\\'; + +fragment: +NonWhiteSpaceChar: ~(UnsafeUncodeChar | WhiteSpace); + + +Symbol: { ':' NonWhiteSpaceChar} => ':' NonWhiteSpaceChar+ ; + Class: 'class'; +Includes: 'includes'; +End: 'end'; + +fragment SafeFirstLetter: 'A' .. 'Z' | 'a' .. 'z'; +fragment SafeLetter: SafeFirstLetter | '0' .. '9' | '-' | '_'; + +ModuleDef: 'module' WhiteSpace* SafeFistLetter SafeLetter* ('.' SafeFirstLetter SafeLetter)*; + +QName: ~(UnsafeUnicodeChar | WhiteSpace); + // parser rules schema - : (classDefinition)* + : (classDefinition | moduleDefinition )* ; +moduleDefinition: + ModuleDef LineBreak + ; + classDefinition - : Class LBrace RBrace - ; + : Class LineBreak + ((includeStatment | attributes) LineBreak)* + End LineBreak + ; + +includeStatement: Includes QName (',' QName); + + + + + diff --git a/src/test/java/org/xerial/silk/RelationExtractTest.java b/src/test/java/org/xerial/silk/RelationExtractTest.java new file mode 100644 index 0000000..a217cd6 --- /dev/null +++ b/src/test/java/org/xerial/silk/RelationExtractTest.java @@ -0,0 +1,97 @@ +/*-------------------------------------------------------------------------- + * Copyright 2009 Taro L. Saito + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------*/ +//-------------------------------------- +// XerialJ +// +// RelationExtractTest.java +// Since: Jul 2, 2009 2:38:03 PM +// +// $URL$ +// $Author$ +//-------------------------------------- +package org.xerial.silk; + +import java.util.ArrayList; +import java.util.List; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.xerial.lens.Lens; +import org.xerial.lens.ObjectLens; +import org.xerial.util.FileResource; +import org.xerial.util.log.Logger; + +public class RelationExtractTest +{ + private static Logger _logger = Logger.getLogger(RelationExtractTest.class); + + @Before + public void setUp() throws Exception + {} + + @After + public void tearDown() throws Exception + {} + + public static class Coordinate + { + public String name; + public String species; + + } + + public static class GeneList + { + private List geneList = new ArrayList(); + + public void add(Coordinate coordinate, Gene gene) + { + _logger.info(String + .format("corrdinate %s, gene %s", ObjectLens.toJSON(coordinate), ObjectLens.toJSON(gene))); + } + + public void add(Gene g) + { + geneList.add(g); + } + + public List getGeneList() + { + return geneList; + } + + } + + public static class Gene + { + public String name; + public long start; + public String strand; + public String sequence; + + } + + @Test + public void test() throws Exception + { + GeneList geneList = Lens + .loadSilk(GeneList.class, FileResource.open(RelationExtractTest.class, "sequence.silk")); + + _logger.info(ObjectLens.toJSON(geneList)); + } + +} diff --git a/src/test/java/org/xerial/silk/schema/schema.silk b/src/test/java/org/xerial/silk/schema/schema.silk index 2d4dc14..0d83997 100644 --- a/src/test/java/org/xerial/silk/schema/schema.silk +++ b/src/test/java/org/xerial/silk/schema/schema.silk @@ -1,34 +1,45 @@ -module org.utgenome. +%silk(version:1.0, type:schema) +module org.utgenome + +# coordinate system informatio class Coordinate - group - species - revision - name + string :group = "utgb" + :species + :revision + :name end +# region on a genome sequence class Locus - start: integer - end: integer + integer :start + integer :end end +# gene data with sequence data class Gene < Locus - id: integer - name + integer :id + sequence :sequence end - -class Reference - mix in Locus - sequence: string - read*: Read +# reference sequence +class Reference < Locus + Coordinate :coodinate + sequence :sequence end +# short-read data class Read - include Locus - sequence: string - qv*: integer + includes Locus + string :sequence + integer* :qv end +# Alignment of reads to a reference genome +class Alignment + Reference :reference + Read* :read +end +end diff --git a/src/test/java/org/xerial/silk/sequence.silk b/src/test/java/org/xerial/silk/sequence.silk index 336bc29..5e8e03e 100644 --- a/src/test/java/org/xerial/silk/sequence.silk +++ b/src/test/java/org/xerial/silk/sequence.silk @@ -5,6 +5,9 @@ TAG1 100 + ACCCGGTTTTGGCGCTTTCCTTTC[----]TTGGCCTTGGGGCCCATCB {"link":"http://somewhere.org/", "description":"reference sequence"} TAG2 100 - ACCCGG---GGCGCTTTCCCTTTC --AC TTGGCCTTGGGGCCCATCG + + + -(group:utgb, type:chromosome, species:human, revision:hg18) -coordinate(name:chr2) -gene(name, start, strand, sequence, param[json])| -- 2.11.0