Clean up autows
This commit is contained in:
parent
5716ddb5e7
commit
a4677a5834
@ -25,9 +25,10 @@ func Seq(parsers ...Parserish) Parser {
|
|||||||
func NoAutoWS(parser Parserish) Parser {
|
func NoAutoWS(parser Parserish) Parser {
|
||||||
parserfied := Parsify(parser)
|
parserfied := Parsify(parser)
|
||||||
return func(ps *State, node *Result) {
|
return func(ps *State, node *Result) {
|
||||||
ps.NoAutoWS = true
|
oldWS := ps.WS
|
||||||
|
ps.WS = func(ps *State) {}
|
||||||
parserfied(ps, node)
|
parserfied(ps, node)
|
||||||
ps.NoAutoWS = false
|
ps.WS = oldWS
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,3 +0,0 @@
|
|||||||
go build
|
|
||||||
profile.exe -cpuprofile cpu.out
|
|
||||||
go tool pprof profile.exe cpu.out
|
|
@ -1,3 +0,0 @@
|
|||||||
go build
|
|
||||||
profile.exe -memprofile mem.out
|
|
||||||
go tool pprof profile.exe mem.out
|
|
@ -12,7 +12,7 @@ import (
|
|||||||
// - unicode sequences, eg \uBEEF
|
// - unicode sequences, eg \uBEEF
|
||||||
func StringLit(allowedQuotes string) Parser {
|
func StringLit(allowedQuotes string) Parser {
|
||||||
return NewParser("string literal", func(ps *State, node *Result) {
|
return NewParser("string literal", func(ps *State, node *Result) {
|
||||||
ps.AutoWS()
|
ps.WS(ps)
|
||||||
|
|
||||||
if !stringContainsByte(allowedQuotes, ps.Input[ps.Pos]) {
|
if !stringContainsByte(allowedQuotes, ps.Input[ps.Pos]) {
|
||||||
ps.ErrorHere(allowedQuotes)
|
ps.ErrorHere(allowedQuotes)
|
||||||
@ -89,7 +89,7 @@ func StringLit(allowedQuotes string) Parser {
|
|||||||
// NumberLit matches a floating point or integer number and returns it as a int64 or float64 in .Result
|
// NumberLit matches a floating point or integer number and returns it as a int64 or float64 in .Result
|
||||||
func NumberLit() Parser {
|
func NumberLit() Parser {
|
||||||
return NewParser("number literal", func(ps *State, node *Result) {
|
return NewParser("number literal", func(ps *State, node *Result) {
|
||||||
ps.AutoWS()
|
ps.WS(ps)
|
||||||
end := ps.Pos
|
end := ps.Pos
|
||||||
float := false
|
float := false
|
||||||
inputLen := len(ps.Input)
|
inputLen := len(ps.Input)
|
||||||
|
25
parser.go
25
parser.go
@ -64,6 +64,14 @@ func Parsify(p Parserish) Parser {
|
|||||||
}
|
}
|
||||||
case string:
|
case string:
|
||||||
return Exact(p)
|
return Exact(p)
|
||||||
|
case VoidParser:
|
||||||
|
return func(ptr *State, node *Result) {
|
||||||
|
p(ptr)
|
||||||
|
}
|
||||||
|
case func(*State):
|
||||||
|
return func(ptr *State, node *Result) {
|
||||||
|
p(ptr)
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
panic(fmt.Errorf("cant turn a `%T` into a parser", p))
|
panic(fmt.Errorf("cant turn a `%T` into a parser", p))
|
||||||
}
|
}
|
||||||
@ -89,7 +97,7 @@ func Run(parser Parserish, input string, ws ...VoidParser) (result interface{},
|
|||||||
|
|
||||||
ret := Result{}
|
ret := Result{}
|
||||||
p(ps, &ret)
|
p(ps, &ret)
|
||||||
ps.AutoWS()
|
ps.WS(ps)
|
||||||
|
|
||||||
if ps.Error.expected != "" {
|
if ps.Error.expected != "" {
|
||||||
return ret.Result, &ps.Error
|
return ret.Result, &ps.Error
|
||||||
@ -102,13 +110,6 @@ func Run(parser Parserish, input string, ws ...VoidParser) (result interface{},
|
|||||||
return ret.Result, nil
|
return ret.Result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// WS will consume whitespace, it should only be needed when AutoWS is turned off
|
|
||||||
func WS() Parser {
|
|
||||||
return NewParser("AutoWS", func(ps *State, ret *Result) {
|
|
||||||
ps.WS(ps)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cut prevents backtracking beyond this point. Usually used after keywords when you
|
// Cut prevents backtracking beyond this point. Usually used after keywords when you
|
||||||
// are sure this is the correct path. Improves performance and error reporting.
|
// are sure this is the correct path. Improves performance and error reporting.
|
||||||
func Cut() Parser {
|
func Cut() Parser {
|
||||||
@ -121,7 +122,7 @@ func Cut() Parser {
|
|||||||
func Regex(pattern string) Parser {
|
func Regex(pattern string) Parser {
|
||||||
re := regexp.MustCompile("^" + pattern)
|
re := regexp.MustCompile("^" + pattern)
|
||||||
return NewParser(pattern, func(ps *State, node *Result) {
|
return NewParser(pattern, func(ps *State, node *Result) {
|
||||||
ps.AutoWS()
|
ps.WS(ps)
|
||||||
if match := re.FindString(ps.Get()); match != "" {
|
if match := re.FindString(ps.Get()); match != "" {
|
||||||
ps.Advance(len(match))
|
ps.Advance(len(match))
|
||||||
node.Token = match
|
node.Token = match
|
||||||
@ -136,7 +137,7 @@ func Exact(match string) Parser {
|
|||||||
if len(match) == 1 {
|
if len(match) == 1 {
|
||||||
matchByte := match[0]
|
matchByte := match[0]
|
||||||
return NewParser(match, func(ps *State, node *Result) {
|
return NewParser(match, func(ps *State, node *Result) {
|
||||||
ps.AutoWS()
|
ps.WS(ps)
|
||||||
if ps.Pos >= len(ps.Input) || ps.Input[ps.Pos] != matchByte {
|
if ps.Pos >= len(ps.Input) || ps.Input[ps.Pos] != matchByte {
|
||||||
ps.ErrorHere(match)
|
ps.ErrorHere(match)
|
||||||
return
|
return
|
||||||
@ -149,7 +150,7 @@ func Exact(match string) Parser {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return NewParser(match, func(ps *State, node *Result) {
|
return NewParser(match, func(ps *State, node *Result) {
|
||||||
ps.AutoWS()
|
ps.WS(ps)
|
||||||
if !strings.HasPrefix(ps.Get(), match) {
|
if !strings.HasPrefix(ps.Get(), match) {
|
||||||
ps.ErrorHere(match)
|
ps.ErrorHere(match)
|
||||||
return
|
return
|
||||||
@ -224,7 +225,7 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser {
|
|||||||
alphabet, ranges := parseMatcher(matcher)
|
alphabet, ranges := parseMatcher(matcher)
|
||||||
|
|
||||||
return func(ps *State, node *Result) {
|
return func(ps *State, node *Result) {
|
||||||
ps.AutoWS()
|
ps.WS(ps)
|
||||||
matched := 0
|
matched := 0
|
||||||
for ps.Pos+matched < len(ps.Input) {
|
for ps.Pos+matched < len(ps.Input) {
|
||||||
if max != -1 && matched >= max {
|
if max != -1 && matched >= max {
|
||||||
|
@ -201,13 +201,13 @@ func TestAutoWS(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("ws is can be explicitly consumed ", func(t *testing.T) {
|
t.Run("ws is can be explicitly consumed ", func(t *testing.T) {
|
||||||
result, ps := runParser(" hello", NoAutoWS(Seq(WS(), "hello")))
|
result, ps := runParser(" hello", NoAutoWS(Seq(ASCIIWhitespace, "hello")))
|
||||||
require.Equal(t, "hello", result.Child[1].Token)
|
require.Equal(t, "hello", result.Child[1].Token)
|
||||||
require.Equal(t, "", ps.Get())
|
require.Equal(t, "", ps.Get())
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("unicode whitespace", func(t *testing.T) {
|
t.Run("unicode whitespace", func(t *testing.T) {
|
||||||
result, ps := runParser(" \u202f hello", NoAutoWS(Seq(WS(), "hello")))
|
result, ps := runParser(" \u202f hello", NoAutoWS(Seq(UnicodeWhitespace, "hello")))
|
||||||
require.Equal(t, "hello", result.Child[1].Token)
|
require.Equal(t, "hello", result.Child[1].Token)
|
||||||
require.Equal(t, "", ps.Get())
|
require.Equal(t, "", ps.Get())
|
||||||
require.False(t, ps.Errored())
|
require.False(t, ps.Errored())
|
||||||
|
7
scripts/cpuprofile.sh
Normal file
7
scripts/cpuprofile.sh
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
go build ./json/profile/json.go
|
||||||
|
./json.exe -cpuprofile cpu.out
|
||||||
|
go tool pprof json.exe cpu.out
|
7
scripts/memprofile.sh
Normal file
7
scripts/memprofile.sh
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
go build ./json/profile/json.go
|
||||||
|
./json.exe -memprofile mem.out
|
||||||
|
go tool pprof json.exe mem.out
|
13
state.go
13
state.go
@ -16,9 +16,8 @@ type State struct {
|
|||||||
// Error is a secondary return channel from parsers, but used so heavily
|
// Error is a secondary return channel from parsers, but used so heavily
|
||||||
// in backtracking that it has been inlined to avoid allocations.
|
// in backtracking that it has been inlined to avoid allocations.
|
||||||
Error Error
|
Error Error
|
||||||
// Called to determine what to ignore when WS is called, or when AutoWS fires
|
// Called to determine what to ignore when WS is called, or when WS fires
|
||||||
WS VoidParser
|
WS VoidParser
|
||||||
NoAutoWS bool
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ASCIIWhitespace matches any of the standard whitespace characters. It is faster
|
// ASCIIWhitespace matches any of the standard whitespace characters. It is faster
|
||||||
@ -60,14 +59,6 @@ func (s *State) Advance(i int) {
|
|||||||
s.Pos += i
|
s.Pos += i
|
||||||
}
|
}
|
||||||
|
|
||||||
// AutoWS consumes all whitespace and advances Pos but can be disabled by the NoAutWS() parser.
|
|
||||||
func (s *State) AutoWS() {
|
|
||||||
if s.NoAutoWS {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
s.WS(s)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the remaining input.
|
// Get the remaining input.
|
||||||
func (s *State) Get() string {
|
func (s *State) Get() string {
|
||||||
if s.Pos > len(s.Input) {
|
if s.Pos > len(s.Input) {
|
||||||
|
Loading…
Reference in New Issue
Block a user