Unicode safe by default

This commit is contained in:
Adam Scarr 2017-08-10 22:06:08 +10:00
parent a0e66b1c46
commit 8a92b5348f
4 changed files with 26 additions and 18 deletions

View File

@ -37,5 +37,5 @@ func init() {
} }
func Unmarshal(input string) (interface{}, error) { func Unmarshal(input string) (interface{}, error) {
return Run(_value, input) return Run(_value, input, ASCIIWhitespace)
} }

View File

@ -76,26 +76,14 @@ func ParsifyAll(parsers ...Parserish) []Parser {
return ret return ret
} }
// WS will consume whitespace, it should only be needed when AutoWS is turned off
func WS() Parser {
return NewParser("AutoWS", func(ps *State) Result {
ps.WS(ps)
return Result{}
})
}
// Cut prevents backtracking beyond this point. Usually used after keywords when you
// are sure this is the correct path. Improves performance and error reporting.
func Cut(ps *State) Result {
ps.Cut = ps.Pos
return Result{}
}
// Run applies some input to a parser and returns the result, failing if the input isnt fully consumed. // Run applies some input to a parser and returns the result, failing if the input isnt fully consumed.
// It is a convenience method for the most common way to invoke a parser. // It is a convenience method for the most common way to invoke a parser.
func Run(parser Parserish, input string) (result interface{}, err error) { func Run(parser Parserish, input string, ws ...VoidParser) (result interface{}, err error) {
p := Parsify(parser) p := Parsify(parser)
ps := NewState(input) ps := NewState(input)
if len(ws) > 0 {
ps.WS = ws[0]
}
ret := p(ps) ret := p(ps)
ps.AutoWS() ps.AutoWS()
@ -111,6 +99,21 @@ func Run(parser Parserish, input string) (result interface{}, err error) {
return ret.Result, nil return ret.Result, nil
} }
// WS will consume whitespace, it should only be needed when AutoWS is turned off
func WS() Parser {
return NewParser("AutoWS", func(ps *State) Result {
ps.WS(ps)
return Result{}
})
}
// Cut prevents backtracking beyond this point. Usually used after keywords when you
// are sure this is the correct path. Improves performance and error reporting.
func Cut(ps *State) Result {
ps.Cut = ps.Pos
return Result{}
}
// Regex returns a match if the regex successfully matches // Regex returns a match if the regex successfully matches
func Regex(pattern string) Parser { func Regex(pattern string) Parser {
re := regexp.MustCompile("^" + pattern) re := regexp.MustCompile("^" + pattern)

View File

@ -3,6 +3,11 @@ goparsify [![CircleCI](https://circleci.com/gh/Vektah/goparsify/tree/master.svg?
A parser-combinator library for building easy to test, read and maintain parsers using functional composition. A parser-combinator library for building easy to test, read and maintain parsers using functional composition.
Everything should be unicode safe by default, but you can opt out of unicode whitespace for a decent ~%20 performance boost.
```go
Run(parser, input, ASCIIWhitespace)
```
### benchmarks ### benchmarks
I dont have many benchmarks set up yet, but the json parser is very promising. Nearly keeping up with the stdlib for raw speed: I dont have many benchmarks set up yet, but the json parser is very promising. Nearly keeping up with the stdlib for raw speed:
``` ```

View File

@ -65,7 +65,7 @@ func UnicodeWhitespace(s *State) {
func NewState(input string) *State { func NewState(input string) *State {
return &State{ return &State{
Input: input, Input: input,
WS: ASCIIWhitespace, WS: UnicodeWhitespace,
} }
} }