diff --git a/combinator.go b/combinator.go index 0c4e0f5..7bcb907 100644 --- a/combinator.go +++ b/combinator.go @@ -25,9 +25,10 @@ func Seq(parsers ...Parserish) Parser { func NoAutoWS(parser Parserish) Parser { parserfied := Parsify(parser) return func(ps *State, node *Result) { - ps.NoAutoWS = true + oldWS := ps.WS + ps.WS = func(ps *State) {} parserfied(ps, node) - ps.NoAutoWS = false + ps.WS = oldWS } } diff --git a/json/profile/cpuprofile.bat b/json/profile/cpuprofile.bat deleted file mode 100644 index c293b5e..0000000 --- a/json/profile/cpuprofile.bat +++ /dev/null @@ -1,3 +0,0 @@ -go build -profile.exe -cpuprofile cpu.out -go tool pprof profile.exe cpu.out diff --git a/json/profile/memprofile.bat b/json/profile/memprofile.bat deleted file mode 100644 index 71873ac..0000000 --- a/json/profile/memprofile.bat +++ /dev/null @@ -1,3 +0,0 @@ -go build -profile.exe -memprofile mem.out -go tool pprof profile.exe mem.out diff --git a/literals.go b/literals.go index 1287001..00e39f9 100644 --- a/literals.go +++ b/literals.go @@ -12,7 +12,7 @@ import ( // - unicode sequences, eg \uBEEF func StringLit(allowedQuotes string) Parser { return NewParser("string literal", func(ps *State, node *Result) { - ps.AutoWS() + ps.WS(ps) if !stringContainsByte(allowedQuotes, ps.Input[ps.Pos]) { ps.ErrorHere(allowedQuotes) @@ -89,7 +89,7 @@ func StringLit(allowedQuotes string) Parser { // NumberLit matches a floating point or integer number and returns it as a int64 or float64 in .Result func NumberLit() Parser { return NewParser("number literal", func(ps *State, node *Result) { - ps.AutoWS() + ps.WS(ps) end := ps.Pos float := false inputLen := len(ps.Input) diff --git a/parser.go b/parser.go index a43bc19..763eff4 100644 --- a/parser.go +++ b/parser.go @@ -64,6 +64,14 @@ func Parsify(p Parserish) Parser { } case string: return Exact(p) + case VoidParser: + return func(ptr *State, node *Result) { + p(ptr) + } + case func(*State): + return func(ptr *State, node *Result) { + p(ptr) + } default: panic(fmt.Errorf("cant turn a `%T` into a parser", p)) } @@ -89,7 +97,7 @@ func Run(parser Parserish, input string, ws ...VoidParser) (result interface{}, ret := Result{} p(ps, &ret) - ps.AutoWS() + ps.WS(ps) if ps.Error.expected != "" { return ret.Result, &ps.Error @@ -102,13 +110,6 @@ func Run(parser Parserish, input string, ws ...VoidParser) (result interface{}, return ret.Result, nil } -// WS will consume whitespace, it should only be needed when AutoWS is turned off -func WS() Parser { - return NewParser("AutoWS", func(ps *State, ret *Result) { - ps.WS(ps) - }) -} - // Cut prevents backtracking beyond this point. Usually used after keywords when you // are sure this is the correct path. Improves performance and error reporting. func Cut() Parser { @@ -121,7 +122,7 @@ func Cut() Parser { func Regex(pattern string) Parser { re := regexp.MustCompile("^" + pattern) return NewParser(pattern, func(ps *State, node *Result) { - ps.AutoWS() + ps.WS(ps) if match := re.FindString(ps.Get()); match != "" { ps.Advance(len(match)) node.Token = match @@ -136,7 +137,7 @@ func Exact(match string) Parser { if len(match) == 1 { matchByte := match[0] return NewParser(match, func(ps *State, node *Result) { - ps.AutoWS() + ps.WS(ps) if ps.Pos >= len(ps.Input) || ps.Input[ps.Pos] != matchByte { ps.ErrorHere(match) return @@ -149,7 +150,7 @@ func Exact(match string) Parser { } return NewParser(match, func(ps *State, node *Result) { - ps.AutoWS() + ps.WS(ps) if !strings.HasPrefix(ps.Get(), match) { ps.ErrorHere(match) return @@ -224,7 +225,7 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser { alphabet, ranges := parseMatcher(matcher) return func(ps *State, node *Result) { - ps.AutoWS() + ps.WS(ps) matched := 0 for ps.Pos+matched < len(ps.Input) { if max != -1 && matched >= max { diff --git a/parser_test.go b/parser_test.go index c64b70a..ce5f3e7 100644 --- a/parser_test.go +++ b/parser_test.go @@ -201,13 +201,13 @@ func TestAutoWS(t *testing.T) { }) t.Run("ws is can be explicitly consumed ", func(t *testing.T) { - result, ps := runParser(" hello", NoAutoWS(Seq(WS(), "hello"))) + result, ps := runParser(" hello", NoAutoWS(Seq(ASCIIWhitespace, "hello"))) require.Equal(t, "hello", result.Child[1].Token) require.Equal(t, "", ps.Get()) }) t.Run("unicode whitespace", func(t *testing.T) { - result, ps := runParser(" \u202f hello", NoAutoWS(Seq(WS(), "hello"))) + result, ps := runParser(" \u202f hello", NoAutoWS(Seq(UnicodeWhitespace, "hello"))) require.Equal(t, "hello", result.Child[1].Token) require.Equal(t, "", ps.Get()) require.False(t, ps.Errored()) diff --git a/scripts/cpuprofile.sh b/scripts/cpuprofile.sh new file mode 100644 index 0000000..2f6f18b --- /dev/null +++ b/scripts/cpuprofile.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +set -eu + +go build ./json/profile/json.go +./json.exe -cpuprofile cpu.out +go tool pprof json.exe cpu.out diff --git a/scripts/memprofile.sh b/scripts/memprofile.sh new file mode 100644 index 0000000..0fdc5ec --- /dev/null +++ b/scripts/memprofile.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +set -eu + +go build ./json/profile/json.go +./json.exe -memprofile mem.out +go tool pprof json.exe mem.out diff --git a/state.go b/state.go index e567777..1b7e9f2 100644 --- a/state.go +++ b/state.go @@ -16,9 +16,8 @@ type State struct { // Error is a secondary return channel from parsers, but used so heavily // in backtracking that it has been inlined to avoid allocations. Error Error - // Called to determine what to ignore when WS is called, or when AutoWS fires - WS VoidParser - NoAutoWS bool + // Called to determine what to ignore when WS is called, or when WS fires + WS VoidParser } // ASCIIWhitespace matches any of the standard whitespace characters. It is faster @@ -60,14 +59,6 @@ func (s *State) Advance(i int) { s.Pos += i } -// AutoWS consumes all whitespace and advances Pos but can be disabled by the NoAutWS() parser. -func (s *State) AutoWS() { - if s.NoAutoWS { - return - } - s.WS(s) -} - // Get the remaining input. func (s *State) Get() string { if s.Pos > len(s.Input) {