From 79b7cc082cac1904b87bbe7996df5a8824f7c2d8 Mon Sep 17 00:00:00 2001 From: Adam Scarr Date: Sun, 13 Aug 2017 15:28:43 +1000 Subject: [PATCH] Add one byte prediction to Any() --- combinator.go | 31 ++++++++++++++++++++++++++++-- combinator_test.go | 35 +++++++++++++++++++++++++++++++++ perf_test.go | 14 ++++++++++++++ readme.md | 48 +++++++++++++++++++++++----------------------- state_test.go | 10 ++++++++++ 5 files changed, 112 insertions(+), 26 deletions(-) create mode 100644 perf_test.go diff --git a/combinator.go b/combinator.go index 1572e6c..01386e6 100644 --- a/combinator.go +++ b/combinator.go @@ -38,14 +38,40 @@ func NoAutoWS(parser Parserish) Parser { // Any matches the first successful parser and returns its result func Any(parsers ...Parserish) Parser { parserfied := ParsifyAll(parsers...) + // For + predictor := [255]int{} return NewParser("Any()", func(ps *State) Result { + if ps.Pos >= len(ps.Input) { + ps.ErrorHere("!EOF") + return Result{} + } longestError := Error{} startpos := ps.Pos - for _, parser := range parserfied { + predictorChar := ps.Input[startpos] + predicted := predictor[predictorChar] + + node := parserfied[predicted](ps) + if !ps.Errored() { + return node + } + + if ps.Error.pos >= longestError.pos { + longestError = ps.Error + } + if ps.Cut <= startpos { + ps.Recover() + } else { + return node + } + + for i, parser := range parserfied { + if i == predicted { + continue + } node := parser(ps) if ps.Errored() { - if ps.Error.pos > longestError.pos { + if ps.Error.pos >= longestError.pos { longestError = ps.Error } if ps.Cut > startpos { @@ -54,6 +80,7 @@ func Any(parsers ...Parserish) Parser { ps.Recover() continue } + predictor[predictorChar] = i return node } diff --git a/combinator_test.go b/combinator_test.go index 50b7010..1c95eba 100644 --- a/combinator_test.go +++ b/combinator_test.go @@ -61,6 +61,41 @@ func TestAny(t *testing.T) { require.Equal(t, Result{}, node) require.Equal(t, 0, p2.Pos) }) + + t.Run("branch prediction", func(t *testing.T) { + p := Any("hello", Seq("{", Cut(), "world", "}"), Seq("[", Cut(), "a", "]")) + // warm up the predictor + _, _ = Run(p, "hello") + _, _ = Run(p, "{world}") + + t.Run("matches", func(t *testing.T) { + node, ps := runParser("hello world!", p) + require.Equal(t, "hello", node.Token) + require.Equal(t, 5, ps.Pos) + }) + + t.Run("errors", func(t *testing.T) { + _, ps := runParser("help world!", p) + require.Equal(t, "offset 0: expected [", ps.Error.Error()) + require.Equal(t, 0, ps.Error.Pos()) + require.Equal(t, 0, ps.Pos) + }) + + t.Run("errors with cuts", func(t *testing.T) { + _, ps := runParser("{world", p) + require.Equal(t, "offset 6: expected }", ps.Error.Error()) + require.Equal(t, 6, ps.Error.Pos()) + require.Equal(t, 0, ps.Pos) + }) + + t.Run("misprededicted cut", func(t *testing.T) { + // This should probably only happen when the predictor is cold + _, ps := runParser("[a", p) + require.Equal(t, "offset 2: expected ]", ps.Error.Error()) + require.Equal(t, 2, ps.Error.Pos()) + require.Equal(t, 0, ps.Pos) + }) + }) } func TestSome(t *testing.T) { diff --git a/perf_test.go b/perf_test.go new file mode 100644 index 0000000..d6a0092 --- /dev/null +++ b/perf_test.go @@ -0,0 +1,14 @@ +package goparsify + +import "testing" + +func BenchmarkAny(b *testing.B) { + p := Any("hello", "goodbye", "help") + + for i := 0; i < b.N; i++ { + _, _ = Run(p, "hello") + _, _ = Run(p, "hello world") + _, _ = Run(p, "good boy") + _, _ = Run(p, "help me") + } +} diff --git a/readme.md b/readme.md index f4af219..bcb72f3 100644 --- a/readme.md +++ b/readme.md @@ -9,14 +9,14 @@ Run(parser, input, ASCIIWhitespace) ``` ### benchmarks -I dont have many benchmarks set up yet, but the json parser is very promising. Nearly keeping up with the stdlib for raw speed: +I dont have many benchmarks set up yet, but the json parser keeps up with the stdlib for raw speed: ``` $ go test -bench=. -benchtime=2s -benchmem ./json -BenchmarkUnmarshalParsec-8 20000 65682 ns/op 50460 B/op 1318 allocs/op -BenchmarkUnmarshalParsify-8 30000 51292 ns/op 45104 B/op 334 allocs/op -BenchmarkUnmarshalStdlib-8 30000 46522 ns/op 13953 B/op 262 allocs/op +BenchmarkUnmarshalParsec-8 50000 66012 ns/op 50462 B/op 1318 allocs/op +BenchmarkUnmarshalParsify-8 100000 46713 ns/op 44543 B/op 332 allocs/op +BenchmarkUnmarshalStdlib-8 100000 46967 ns/op 13952 B/op 262 allocs/op PASS -ok github.com/vektah/goparsify/json 10.840s +ok github.com/vektah/goparsify/json 14.424s ``` ### debugging parsers @@ -86,25 +86,25 @@ ok github.com/vektah/goparsify/html 0.118s If you build the parser with -tags debug it will instrument each parser and a call to DumpDebugStats() will show stats: ``` var name matches total time self time calls errors location - _value Any() 6.3303551s 46.0214ms 878801 calls 0 errors json.go:36 - _string string literal 100.0559ms 44.019ms 848489 calls 313135 errors json.go:12 - _false false 52.0288ms 43.0197ms 858593 calls 848489 errors json.go:11 - _null null 58.0309ms 42.0222ms 878801 calls 878798 errors json.go:9 - _properties string literal 119.3651ms 42.0151ms 818185 calls 0 errors json.go:14 - _properties : 54.5277ms 41.018ms 818185 calls 0 errors json.go:14 - _true true 56.5292ms 37.0166ms 878798 calls 858593 errors json.go:10 - _properties Seq() 4.2989722s 35.5217ms 818185 calls 0 errors json.go:14 - _properties , 45.0263ms 35.519ms 818185 calls 121213 errors json.go:14 - _number number literal 30.0208ms 11.5093ms 313135 calls 131315 errors json.go:13 - _array [ 12.0045ms 10.504ms 131315 calls 121213 errors json.go:16 - _properties Some() 4.4800665s 9.0051ms 121213 calls 0 errors json.go:14 - _object { 11.0053ms 8.5041ms 121213 calls 0 errors json.go:24 - _object } 9.0022ms 8.0031ms 121213 calls 0 errors json.go:24 - _object Seq() 4.5375994s 6.5055ms 121213 calls 0 errors json.go:24 - _array Seq() 1.1524115s 5.5023ms 131315 calls 121213 errors json.go:16 - _array , 3.0008ms 4.0012ms 50509 calls 10102 errors json.go:16 - _array ] 1.5013ms 1.5011ms 10102 calls 0 errors json.go:16 - _array Some() 1.116393s 0s 10102 calls 0 errors json.go:16 + _value Any() 5.1725682s 57.0243ms 878801 calls 0 errors json.go:36 + _properties string literal 131.5662ms 45.0273ms 818185 calls 0 errors json.go:14 + _properties Seq() 3.579274s 42.016ms 818185 calls 0 errors json.go:14 + _properties , 50.5254ms 35.5182ms 818185 calls 121213 errors json.go:14 + _properties : 51.5256ms 35.0183ms 818185 calls 0 errors json.go:14 + _string string literal 78.0462ms 28.0172ms 671723 calls 136369 errors json.go:12 + _number number literal 34.5187ms 15.5065ms 287886 calls 106066 errors json.go:13 + _null null 17.011ms 8.5058ms 252538 calls 252535 errors json.go:9 + _properties Some() 3.7588588s 7.5023ms 121213 calls 0 errors json.go:14 + _object { 10.5049ms 7.0029ms 161616 calls 40403 errors json.go:24 + _true true 10.0072ms 6.505ms 252537 calls 232332 errors json.go:10 + _false false 9.0039ms 4.5032ms 232333 calls 222229 errors json.go:11 + _object Seq() 3.81739s 4.5016ms 161616 calls 40403 errors json.go:24 + _array [ 5.0013ms 4.0011ms 65660 calls 55558 errors json.go:16 + _object } 5.5023ms 2.5021ms 121213 calls 0 errors json.go:24 + _array , 2.0018ms 1.5026ms 50509 calls 10102 errors json.go:16 + _array Some() 933.4591ms 500.8µs 10102 calls 0 errors json.go:16 + _array Seq() 952.9664ms 0s 65660 calls 55558 errors json.go:16 + _array ] 0s 0s 10102 calls 0 errors json.go:16 ``` All times are cumulative, it would be nice to break this down into a parse tree with relative times. This is a nice addition to pprof as it will break down the parsers based on where they are used instead of grouping them all by type. diff --git a/state_test.go b/state_test.go index c279d14..913db05 100644 --- a/state_test.go +++ b/state_test.go @@ -49,3 +49,13 @@ func TestState_Preview(t *testing.T) { require.Equal(t, "asdf", NewState("asdf").Preview(10)) require.Equal(t, "asdfasdfas", NewState("asdfasdfasdf").Preview(10)) } + +func TestWhitespaces(t *testing.T) { + p := Many(Any("hello", "world", "!")) + + _, err := Run(p, "hello world\u2005!", ASCIIWhitespace) + require.Equal(t, "left unparsed: \u2005!", err.Error()) + + _, err = Run(p, "hello world\u2005!", UnicodeWhitespace) + require.NoError(t, err) +}