Add one byte prediction to Any()

This commit is contained in:
Adam Scarr 2017-08-13 15:28:43 +10:00
parent e30b922764
commit 79b7cc082c
5 changed files with 112 additions and 26 deletions

View File

@ -38,14 +38,40 @@ func NoAutoWS(parser Parserish) Parser {
// Any matches the first successful parser and returns its result // Any matches the first successful parser and returns its result
func Any(parsers ...Parserish) Parser { func Any(parsers ...Parserish) Parser {
parserfied := ParsifyAll(parsers...) parserfied := ParsifyAll(parsers...)
// For
predictor := [255]int{}
return NewParser("Any()", func(ps *State) Result { return NewParser("Any()", func(ps *State) Result {
if ps.Pos >= len(ps.Input) {
ps.ErrorHere("!EOF")
return Result{}
}
longestError := Error{} longestError := Error{}
startpos := ps.Pos startpos := ps.Pos
for _, parser := range parserfied { predictorChar := ps.Input[startpos]
predicted := predictor[predictorChar]
node := parserfied[predicted](ps)
if !ps.Errored() {
return node
}
if ps.Error.pos >= longestError.pos {
longestError = ps.Error
}
if ps.Cut <= startpos {
ps.Recover()
} else {
return node
}
for i, parser := range parserfied {
if i == predicted {
continue
}
node := parser(ps) node := parser(ps)
if ps.Errored() { if ps.Errored() {
if ps.Error.pos > longestError.pos { if ps.Error.pos >= longestError.pos {
longestError = ps.Error longestError = ps.Error
} }
if ps.Cut > startpos { if ps.Cut > startpos {
@ -54,6 +80,7 @@ func Any(parsers ...Parserish) Parser {
ps.Recover() ps.Recover()
continue continue
} }
predictor[predictorChar] = i
return node return node
} }

View File

@ -61,6 +61,41 @@ func TestAny(t *testing.T) {
require.Equal(t, Result{}, node) require.Equal(t, Result{}, node)
require.Equal(t, 0, p2.Pos) require.Equal(t, 0, p2.Pos)
}) })
t.Run("branch prediction", func(t *testing.T) {
p := Any("hello", Seq("{", Cut(), "world", "}"), Seq("[", Cut(), "a", "]"))
// warm up the predictor
_, _ = Run(p, "hello")
_, _ = Run(p, "{world}")
t.Run("matches", func(t *testing.T) {
node, ps := runParser("hello world!", p)
require.Equal(t, "hello", node.Token)
require.Equal(t, 5, ps.Pos)
})
t.Run("errors", func(t *testing.T) {
_, ps := runParser("help world!", p)
require.Equal(t, "offset 0: expected [", ps.Error.Error())
require.Equal(t, 0, ps.Error.Pos())
require.Equal(t, 0, ps.Pos)
})
t.Run("errors with cuts", func(t *testing.T) {
_, ps := runParser("{world", p)
require.Equal(t, "offset 6: expected }", ps.Error.Error())
require.Equal(t, 6, ps.Error.Pos())
require.Equal(t, 0, ps.Pos)
})
t.Run("misprededicted cut", func(t *testing.T) {
// This should probably only happen when the predictor is cold
_, ps := runParser("[a", p)
require.Equal(t, "offset 2: expected ]", ps.Error.Error())
require.Equal(t, 2, ps.Error.Pos())
require.Equal(t, 0, ps.Pos)
})
})
} }
func TestSome(t *testing.T) { func TestSome(t *testing.T) {

14
perf_test.go Normal file
View File

@ -0,0 +1,14 @@
package goparsify
import "testing"
func BenchmarkAny(b *testing.B) {
p := Any("hello", "goodbye", "help")
for i := 0; i < b.N; i++ {
_, _ = Run(p, "hello")
_, _ = Run(p, "hello world")
_, _ = Run(p, "good boy")
_, _ = Run(p, "help me")
}
}

View File

@ -9,14 +9,14 @@ Run(parser, input, ASCIIWhitespace)
``` ```
### benchmarks ### benchmarks
I dont have many benchmarks set up yet, but the json parser is very promising. Nearly keeping up with the stdlib for raw speed: I dont have many benchmarks set up yet, but the json parser keeps up with the stdlib for raw speed:
``` ```
$ go test -bench=. -benchtime=2s -benchmem ./json $ go test -bench=. -benchtime=2s -benchmem ./json
BenchmarkUnmarshalParsec-8 20000 65682 ns/op 50460 B/op 1318 allocs/op BenchmarkUnmarshalParsec-8 50000 66012 ns/op 50462 B/op 1318 allocs/op
BenchmarkUnmarshalParsify-8 30000 51292 ns/op 45104 B/op 334 allocs/op BenchmarkUnmarshalParsify-8 100000 46713 ns/op 44543 B/op 332 allocs/op
BenchmarkUnmarshalStdlib-8 30000 46522 ns/op 13953 B/op 262 allocs/op BenchmarkUnmarshalStdlib-8 100000 46967 ns/op 13952 B/op 262 allocs/op
PASS PASS
ok github.com/vektah/goparsify/json 10.840s ok github.com/vektah/goparsify/json 14.424s
``` ```
### debugging parsers ### debugging parsers
@ -86,25 +86,25 @@ ok github.com/vektah/goparsify/html 0.118s
If you build the parser with -tags debug it will instrument each parser and a call to DumpDebugStats() will show stats: If you build the parser with -tags debug it will instrument each parser and a call to DumpDebugStats() will show stats:
``` ```
var name matches total time self time calls errors location var name matches total time self time calls errors location
_value Any() 6.3303551s 46.0214ms 878801 calls 0 errors json.go:36 _value Any() 5.1725682s 57.0243ms 878801 calls 0 errors json.go:36
_string string literal 100.0559ms 44.019ms 848489 calls 313135 errors json.go:12 _properties string literal 131.5662ms 45.0273ms 818185 calls 0 errors json.go:14
_false false 52.0288ms 43.0197ms 858593 calls 848489 errors json.go:11 _properties Seq() 3.579274s 42.016ms 818185 calls 0 errors json.go:14
_null null 58.0309ms 42.0222ms 878801 calls 878798 errors json.go:9 _properties , 50.5254ms 35.5182ms 818185 calls 121213 errors json.go:14
_properties string literal 119.3651ms 42.0151ms 818185 calls 0 errors json.go:14 _properties : 51.5256ms 35.0183ms 818185 calls 0 errors json.go:14
_properties : 54.5277ms 41.018ms 818185 calls 0 errors json.go:14 _string string literal 78.0462ms 28.0172ms 671723 calls 136369 errors json.go:12
_true true 56.5292ms 37.0166ms 878798 calls 858593 errors json.go:10 _number number literal 34.5187ms 15.5065ms 287886 calls 106066 errors json.go:13
_properties Seq() 4.2989722s 35.5217ms 818185 calls 0 errors json.go:14 _null null 17.011ms 8.5058ms 252538 calls 252535 errors json.go:9
_properties , 45.0263ms 35.519ms 818185 calls 121213 errors json.go:14 _properties Some() 3.7588588s 7.5023ms 121213 calls 0 errors json.go:14
_number number literal 30.0208ms 11.5093ms 313135 calls 131315 errors json.go:13 _object { 10.5049ms 7.0029ms 161616 calls 40403 errors json.go:24
_array [ 12.0045ms 10.504ms 131315 calls 121213 errors json.go:16 _true true 10.0072ms 6.505ms 252537 calls 232332 errors json.go:10
_properties Some() 4.4800665s 9.0051ms 121213 calls 0 errors json.go:14 _false false 9.0039ms 4.5032ms 232333 calls 222229 errors json.go:11
_object { 11.0053ms 8.5041ms 121213 calls 0 errors json.go:24 _object Seq() 3.81739s 4.5016ms 161616 calls 40403 errors json.go:24
_object } 9.0022ms 8.0031ms 121213 calls 0 errors json.go:24 _array [ 5.0013ms 4.0011ms 65660 calls 55558 errors json.go:16
_object Seq() 4.5375994s 6.5055ms 121213 calls 0 errors json.go:24 _object } 5.5023ms 2.5021ms 121213 calls 0 errors json.go:24
_array Seq() 1.1524115s 5.5023ms 131315 calls 121213 errors json.go:16 _array , 2.0018ms 1.5026ms 50509 calls 10102 errors json.go:16
_array , 3.0008ms 4.0012ms 50509 calls 10102 errors json.go:16 _array Some() 933.4591ms 500.8µs 10102 calls 0 errors json.go:16
_array ] 1.5013ms 1.5011ms 10102 calls 0 errors json.go:16 _array Seq() 952.9664ms 0s 65660 calls 55558 errors json.go:16
_array Some() 1.116393s 0s 10102 calls 0 errors json.go:16 _array ] 0s 0s 10102 calls 0 errors json.go:16
``` ```
All times are cumulative, it would be nice to break this down into a parse tree with relative times. This is a nice addition to pprof as it will break down the parsers based on where they are used instead of grouping them all by type. All times are cumulative, it would be nice to break this down into a parse tree with relative times. This is a nice addition to pprof as it will break down the parsers based on where they are used instead of grouping them all by type.

View File

@ -49,3 +49,13 @@ func TestState_Preview(t *testing.T) {
require.Equal(t, "asdf", NewState("asdf").Preview(10)) require.Equal(t, "asdf", NewState("asdf").Preview(10))
require.Equal(t, "asdfasdfas", NewState("asdfasdfasdf").Preview(10)) require.Equal(t, "asdfasdfas", NewState("asdfasdfasdf").Preview(10))
} }
func TestWhitespaces(t *testing.T) {
p := Many(Any("hello", "world", "!"))
_, err := Run(p, "hello world\u2005!", ASCIIWhitespace)
require.Equal(t, "left unparsed: \u2005!", err.Error())
_, err = Run(p, "hello world\u2005!", UnicodeWhitespace)
require.NoError(t, err)
}