// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+// Package syntax parses regular expressions into parse trees and compiles
+// parse trees into programs. Most clients of regular expressions will use
+// the facilities of package regexp (such as Compile and Match) instead of
+// this package.
package syntax
import (
- "os"
"sort"
"strings"
"unicode"
- "utf8"
+ "unicode/utf8"
)
// An Error describes a failure to parse a regular expression
Expr string
}
-func (e *Error) String() string {
+func (e *Error) Error() string {
return "error parsing regexp: " + e.Code.String() + ": `" + e.Expr + "`"
}
// before is the regexp suffix starting at the repetition operator.
// after is the regexp suffix following after the repetition operator.
// repeat returns an updated 'after' and an error, if any.
-func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (string, os.Error) {
+func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (string, error) {
flags := p.flags
if p.flags&PerlX != 0 {
if len(after) > 0 && after[0] == '?' {
// Parsing.
-func Parse(s string, flags Flags) (*Regexp, os.Error) {
+// Parse parses a regular expression string s, controlled by the specified
+// Flags, and returns a regular expression parse tree. The syntax is
+// described in the top-level comment for package regexp.
+func Parse(s string, flags Flags) (*Regexp, error) {
if flags&Literal != 0 {
// Trivial parser for literal string.
if err := checkUTF8(s); err != nil {
// Otherwise, must do real work.
var (
p parser
- err os.Error
+ err error
c rune
op Op
lastRepeat string
// parsePerlFlags parses a Perl flag setting or non-capturing group or both,
// like (?i) or (?: or (?i:. It removes the prefix from s and updates the parse state.
// The caller must have ensured that s begins with "(?".
-func (p *parser) parsePerlFlags(s string) (rest string, err os.Error) {
+func (p *parser) parsePerlFlags(s string) (rest string, err error) {
t := s
// Check for named captures, first introduced in Python's regexp library.
}
// parseVerticalBar handles a | in the input.
-func (p *parser) parseVerticalBar() os.Error {
+func (p *parser) parseVerticalBar() error {
p.concat()
// The concatenation we just parsed is on top of the stack.
}
// parseRightParen handles a ) in the input.
-func (p *parser) parseRightParen() os.Error {
+func (p *parser) parseRightParen() error {
p.concat()
if p.swapVerticalBar() {
// pop vertical bar
// parseEscape parses an escape sequence at the beginning of s
// and returns the rune.
-func (p *parser) parseEscape(s string) (r rune, rest string, err os.Error) {
+func (p *parser) parseEscape(s string) (r rune, rest string, err error) {
t := s[1:]
if t == "" {
return 0, "", &Error{ErrTrailingBackslash, ""}
// parseClassChar parses a character class character at the beginning of s
// and returns it.
-func (p *parser) parseClassChar(s, wholeClass string) (r rune, rest string, err os.Error) {
+func (p *parser) parseClassChar(s, wholeClass string) (r rune, rest string, err error) {
if s == "" {
return 0, "", &Error{Code: ErrMissingBracket, Expr: wholeClass}
}
// parseNamedClass parses a leading POSIX named character class like [:alnum:]
// from the beginning of s. If one is present, it appends the characters to r
// and returns the new slice r and the remainder of the string.
-func (p *parser) parseNamedClass(s string, r []rune) (out []rune, rest string, err os.Error) {
+func (p *parser) parseNamedClass(s string, r []rune) (out []rune, rest string, err error) {
if len(s) < 2 || s[0] != '[' || s[1] != ':' {
return
}
}
var anyTable = &unicode.RangeTable{
- []unicode.Range16{{0, 1<<16 - 1, 1}},
- []unicode.Range32{{1 << 16, unicode.MaxRune, 1}},
+ R16: []unicode.Range16{{Lo: 0, Hi: 1<<16 - 1, Stride: 1}},
+ R32: []unicode.Range32{{Lo: 1 << 16, Hi: unicode.MaxRune, Stride: 1}},
}
// unicodeTable returns the unicode.RangeTable identified by name
// parseUnicodeClass parses a leading Unicode character class like \p{Han}
// from the beginning of s. If one is present, it appends the characters to r
// and returns the new slice r and the remainder of the string.
-func (p *parser) parseUnicodeClass(s string, r []rune) (out []rune, rest string, err os.Error) {
+func (p *parser) parseUnicodeClass(s string, r []rune) (out []rune, rest string, err error) {
if p.flags&UnicodeGroups == 0 || len(s) < 2 || s[0] != '\\' || s[1] != 'p' && s[1] != 'P' {
return
}
// parseClass parses a character class at the beginning of s
// and pushes it onto the parse stack.
-func (p *parser) parseClass(s string) (rest string, err os.Error) {
+func (p *parser) parseClass(s string) (rest string, err error) {
t := s[1:] // chop [
re := p.newRegexp(OpCharClass)
re.Flags = p.flags
// appendNegatedClass returns the result of appending the negation of the class x to the class r.
// It assumes x is clean.
func appendNegatedClass(r []rune, x []rune) []rune {
- nextLo := rune('\u0000')
+ nextLo := '\u0000'
for i := 0; i < len(x); i += 2 {
lo, hi := x[i], x[i+1]
if nextLo <= lo-1 {
// appendNegatedTable returns the result of appending the negation of x to the class r.
func appendNegatedTable(r []rune, x *unicode.RangeTable) []rune {
- nextLo := rune('\u0000') // lo end of next class to add
+ nextLo := '\u0000' // lo end of next class to add
for _, xr := range x.R16 {
lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
if stride == 1 {
// negateClass overwrites r and returns r's negation.
// It assumes the class r is already clean.
func negateClass(r []rune) []rune {
- nextLo := rune('\u0000') // lo end of next class to add
- w := 0 // write index
+ nextLo := '\u0000' // lo end of next class to add
+ w := 0 // write index
for i := 0; i < len(r); i += 2 {
lo, hi := r[i], r[i+1]
if nextLo <= lo-1 {
p[i], p[i+1], p[j], p[j+1] = p[j], p[j+1], p[i], p[i+1]
}
-func checkUTF8(s string) os.Error {
+func checkUTF8(s string) error {
for s != "" {
rune, size := utf8.DecodeRuneInString(s)
if rune == utf8.RuneError && size == 1 {
return nil
}
-func nextRune(s string) (c rune, t string, err os.Error) {
+func nextRune(s string) (c rune, t string, err error) {
c, size := utf8.DecodeRuneInString(s)
if c == utf8.RuneError && size == 1 {
return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s}