From 8a92b5348f8cf6769f30237df906ee9ce71ad237 Mon Sep 17 00:00:00 2001 From: Adam Scarr Date: Thu, 10 Aug 2017 22:06:08 +1000 Subject: [PATCH] Unicode safe by default --- json/json.go | 2 +- parser.go | 35 +++++++++++++++++++---------------- readme.md | 5 +++++ state.go | 2 +- 4 files changed, 26 insertions(+), 18 deletions(-) diff --git a/json/json.go b/json/json.go index 0d78cf5..8129a8a 100644 --- a/json/json.go +++ b/json/json.go @@ -37,5 +37,5 @@ func init() { } func Unmarshal(input string) (interface{}, error) { - return Run(_value, input) + return Run(_value, input, ASCIIWhitespace) } diff --git a/parser.go b/parser.go index 7c3f866..12bb858 100644 --- a/parser.go +++ b/parser.go @@ -76,26 +76,14 @@ func ParsifyAll(parsers ...Parserish) []Parser { return ret } -// WS will consume whitespace, it should only be needed when AutoWS is turned off -func WS() Parser { - return NewParser("AutoWS", func(ps *State) Result { - ps.WS(ps) - return Result{} - }) -} - -// Cut prevents backtracking beyond this point. Usually used after keywords when you -// are sure this is the correct path. Improves performance and error reporting. -func Cut(ps *State) Result { - ps.Cut = ps.Pos - return Result{} -} - // Run applies some input to a parser and returns the result, failing if the input isnt fully consumed. // It is a convenience method for the most common way to invoke a parser. -func Run(parser Parserish, input string) (result interface{}, err error) { +func Run(parser Parserish, input string, ws ...VoidParser) (result interface{}, err error) { p := Parsify(parser) ps := NewState(input) + if len(ws) > 0 { + ps.WS = ws[0] + } ret := p(ps) ps.AutoWS() @@ -111,6 +99,21 @@ func Run(parser Parserish, input string) (result interface{}, err error) { return ret.Result, nil } +// WS will consume whitespace, it should only be needed when AutoWS is turned off +func WS() Parser { + return NewParser("AutoWS", func(ps *State) Result { + ps.WS(ps) + return Result{} + }) +} + +// Cut prevents backtracking beyond this point. Usually used after keywords when you +// are sure this is the correct path. Improves performance and error reporting. +func Cut(ps *State) Result { + ps.Cut = ps.Pos + return Result{} +} + // Regex returns a match if the regex successfully matches func Regex(pattern string) Parser { re := regexp.MustCompile("^" + pattern) diff --git a/readme.md b/readme.md index 1da05ed..5d8e78b 100644 --- a/readme.md +++ b/readme.md @@ -3,6 +3,11 @@ goparsify [![CircleCI](https://circleci.com/gh/Vektah/goparsify/tree/master.svg? A parser-combinator library for building easy to test, read and maintain parsers using functional composition. +Everything should be unicode safe by default, but you can opt out of unicode whitespace for a decent ~%20 performance boost. +```go +Run(parser, input, ASCIIWhitespace) +``` + ### benchmarks I dont have many benchmarks set up yet, but the json parser is very promising. Nearly keeping up with the stdlib for raw speed: ``` diff --git a/state.go b/state.go index 64737dc..8aab3e2 100644 --- a/state.go +++ b/state.go @@ -65,7 +65,7 @@ func UnicodeWhitespace(s *State) { func NewState(input string) *State { return &State{ Input: input, - WS: ASCIIWhitespace, + WS: UnicodeWhitespace, } }