1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package ebnf is a library for EBNF grammars. The input is text ([]byte)
6 // satisfying the following grammar (represented itself in EBNF):
8 // Production = name "=" [ Expression ] "." .
9 // Expression = Alternative { "|" Alternative } .
10 // Alternative = Term { Term } .
11 // Term = name | token [ "…" token ] | Group | Option | Repetition .
12 // Group = "(" Expression ")" .
13 // Option = "[" Expression "]" .
14 // Repetition = "{" Expression "}" .
16 // A name is a Go identifier, a token is a Go string, and comments
17 // and white space follow the same rules as for the Go language.
18 // Production names starting with an uppercase Unicode letter denote
19 // non-terminal productions (i.e., productions which allow white-space
20 // and comments between tokens); all other production names denote
21 // lexical productions.
33 // ----------------------------------------------------------------------------
34 // Internal representation
37 // An Expression node represents a production expression.
38 Expression interface {
39 // Pos is the position of the first character of the syntactic construct
43 // An Alternative node represents a non-empty list of alternative expressions.
44 Alternative []Expression // x | y | z
46 // A Sequence node represents a non-empty list of sequential expressions.
47 Sequence []Expression // x y z
49 // A Name node represents a production name.
55 // A Token node represents a literal.
61 // A List node represents a range of characters.
63 Begin, End *Token // begin ... end
66 // A Group node represents a grouped expression.
69 Body Expression // (body)
72 // An Option node represents an optional expression.
75 Body Expression // [body]
78 // A Repetition node represents a repeated expression.
81 Body Expression // {body}
84 // A Bad node stands for pieces of source code that lead to a parse error.
87 Error string // parser error message
90 // A Production node represents an EBNF production.
96 // A Grammar is a set of EBNF productions. The map
97 // is indexed by production name.
99 Grammar map[string]*Production
102 func (x Alternative) Pos() token.Pos { return x[0].Pos() } // the parser always generates non-empty Alternative
103 func (x Sequence) Pos() token.Pos { return x[0].Pos() } // the parser always generates non-empty Sequences
104 func (x *Name) Pos() token.Pos { return x.StringPos }
105 func (x *Token) Pos() token.Pos { return x.StringPos }
106 func (x *Range) Pos() token.Pos { return x.Begin.Pos() }
107 func (x *Group) Pos() token.Pos { return x.Lparen }
108 func (x *Option) Pos() token.Pos { return x.Lbrack }
109 func (x *Repetition) Pos() token.Pos { return x.Lbrace }
110 func (x *Bad) Pos() token.Pos { return x.TokPos }
111 func (x *Production) Pos() token.Pos { return x.Name.Pos() }
113 // ----------------------------------------------------------------------------
114 // Grammar verification
116 func isLexical(name string) bool {
117 ch, _ := utf8.DecodeRuneInString(name)
118 return !unicode.IsUpper(ch)
121 type verifier struct {
124 worklist []*Production
125 reached Grammar // set of productions reached from (and including) the root production
129 func (v *verifier) error(pos token.Pos, msg string) {
130 v.Error(v.fset.Position(pos), msg)
133 func (v *verifier) push(prod *Production) {
134 name := prod.Name.String
135 if _, found := v.reached[name]; !found {
136 v.worklist = append(v.worklist, prod)
137 v.reached[name] = prod
141 func (v *verifier) verifyChar(x *Token) int {
143 if utf8.RuneCountInString(s) != 1 {
144 v.error(x.Pos(), "single char expected, found "+s)
147 ch, _ := utf8.DecodeRuneInString(s)
151 func (v *verifier) verifyExpr(expr Expression, lexical bool) {
152 switch x := expr.(type) {
156 for _, e := range x {
157 v.verifyExpr(e, lexical)
160 for _, e := range x {
161 v.verifyExpr(e, lexical)
164 // a production with this name must exist;
165 // add it to the worklist if not yet processed
166 if prod, found := v.grammar[x.String]; found {
169 v.error(x.Pos(), "missing production "+x.String)
171 // within a lexical production references
172 // to non-lexical productions are invalid
173 if lexical && !isLexical(x.String) {
174 v.error(x.Pos(), "reference to non-lexical production "+x.String)
177 // nothing to do for now
179 i := v.verifyChar(x.Begin)
180 j := v.verifyChar(x.End)
182 v.error(x.Pos(), "decreasing character range")
185 v.verifyExpr(x.Body, lexical)
187 v.verifyExpr(x.Body, lexical)
189 v.verifyExpr(x.Body, lexical)
195 func (v *verifier) verify(fset *token.FileSet, grammar Grammar, start string) {
196 // find root production
197 root, found := grammar[start]
199 // token.NoPos doesn't require a file set;
200 // ok to set v.fset only afterwards
201 v.error(token.NoPos, "no start production "+start)
205 // initialize verifier
207 v.ErrorVector.Reset()
208 v.worklist = v.worklist[0:0]
209 v.reached = make(Grammar)
212 // work through the worklist
215 n := len(v.worklist) - 1
219 prod := v.worklist[n]
220 v.worklist = v.worklist[0:n]
221 v.verifyExpr(prod.Expr, isLexical(prod.Name.String))
224 // check if all productions were reached
225 if len(v.reached) < len(v.grammar) {
226 for name, prod := range v.grammar {
227 if _, found := v.reached[name]; !found {
228 v.error(prod.Pos(), name+" is unreachable")
234 // Verify checks that:
235 // - all productions used are defined
236 // - all productions defined are used when beginning at start
237 // - lexical productions refer only to other lexical productions
239 // Position information is interpreted relative to the file set fset.
241 func Verify(fset *token.FileSet, grammar Grammar, start string) os.Error {
243 v.verify(fset, grammar, start)
244 return v.GetError(scanner.Sorted)