Add one byte prediction to Any()

2017-08-13 15:28:43 +10:00 · 2017-08-13 15:28:43 +10:00 · 79b7cc082c
commit 79b7cc082c
parent e30b922764
5 changed files with 112 additions and 26 deletions
--- a/combinator.go
+++ b/combinator.go
@ -38,14 +38,40 @@ func NoAutoWS(parser Parserish) Parser {
 // Any matches the first successful parser and returns its result
 func Any(parsers ...Parserish) Parser {
 	parserfied := ParsifyAll(parsers...)
 	// For
 	predictor := [255]int{}
 	return NewParser("Any()", func(ps *State) Result {
 		if ps.Pos >= len(ps.Input) {
 			ps.ErrorHere("!EOF")
 			return Result{}
 		}
 		longestError := Error{}
 		startpos := ps.Pos
-		for _, parser := range parserfied {
+		predictorChar := ps.Input[startpos]
 		predicted := predictor[predictorChar]
 		node := parserfied[predicted](ps)
 		if !ps.Errored() {
 			return node
 		}
 		if ps.Error.pos >= longestError.pos {
 			longestError = ps.Error
 		}
 		if ps.Cut <= startpos {
 			ps.Recover()
 		} else {
 			return node
 		}
 		for i, parser := range parserfied {
 			if i == predicted {
 				continue
 			}
 			node := parser(ps)
 			if ps.Errored() {
-				if ps.Error.pos > longestError.pos {
+				if ps.Error.pos >= longestError.pos {
 					longestError = ps.Error
 				}
 				if ps.Cut > startpos {
@ -54,6 +80,7 @@ func Any(parsers ...Parserish) Parser {
 				ps.Recover()
 				continue
 			}
 			predictor[predictorChar] = i
 			return node
 		}
--- a/combinator_test.go
+++ b/combinator_test.go
@ -61,6 +61,41 @@ func TestAny(t *testing.T) {
 		require.Equal(t, Result{}, node)
 		require.Equal(t, 0, p2.Pos)
 	})
 	t.Run("branch prediction", func(t *testing.T) {
 		p := Any("hello", Seq("{", Cut(), "world", "}"), Seq("[", Cut(), "a", "]"))
 		// warm up the predictor
 		_, _ = Run(p, "hello")
 		_, _ = Run(p, "{world}")
 		t.Run("matches", func(t *testing.T) {
 			node, ps := runParser("hello world!", p)
 			require.Equal(t, "hello", node.Token)
 			require.Equal(t, 5, ps.Pos)
 		})
 		t.Run("errors", func(t *testing.T) {
 			_, ps := runParser("help world!", p)
 			require.Equal(t, "offset 0: expected [", ps.Error.Error())
 			require.Equal(t, 0, ps.Error.Pos())
 			require.Equal(t, 0, ps.Pos)
 		})
 		t.Run("errors with cuts", func(t *testing.T) {
 			_, ps := runParser("{world", p)
 			require.Equal(t, "offset 6: expected }", ps.Error.Error())
 			require.Equal(t, 6, ps.Error.Pos())
 			require.Equal(t, 0, ps.Pos)
 		})
 		t.Run("misprededicted cut", func(t *testing.T) {
 			// This should probably only happen when the predictor is cold
 			_, ps := runParser("[a", p)
 			require.Equal(t, "offset 2: expected ]", ps.Error.Error())
 			require.Equal(t, 2, ps.Error.Pos())
 			require.Equal(t, 0, ps.Pos)
 		})
 	})
 }
 func TestSome(t *testing.T) {
--- a/perf_test.go
+++ b/perf_test.go
@ -0,0 +1,14 @@
 package goparsify
 import "testing"
 func BenchmarkAny(b *testing.B) {
 	p := Any("hello", "goodbye", "help")
 	for i := 0; i < b.N; i++ {
 		_, _ = Run(p, "hello")
 		_, _ = Run(p, "hello world")
 		_, _ = Run(p, "good boy")
 		_, _ = Run(p, "help me")
 	}
 }
--- a/readme.md
+++ b/readme.md
@ -9,14 +9,14 @@ Run(parser, input, ASCIIWhitespace)
 ```
 ### benchmarks
-I dont have many benchmarks set up yet, but the json parser is very promising. Nearly keeping up with the stdlib for raw speed:
+I dont have many benchmarks set up yet, but the json parser keeps up with the stdlib for raw speed:
 ```
 $ go test -bench=. -benchtime=2s -benchmem ./json
-BenchmarkUnmarshalParsec-8         20000             65682 ns/op           50460 B/op       1318 allocs/op
+BenchmarkUnmarshalParsec-8         50000             66012 ns/op           50462 B/op       1318 allocs/op
-BenchmarkUnmarshalParsify-8        30000             51292 ns/op           45104 B/op        334 allocs/op
+BenchmarkUnmarshalParsify-8       100000             46713 ns/op           44543 B/op        332 allocs/op
-BenchmarkUnmarshalStdlib-8         30000             46522 ns/op           13953 B/op        262 allocs/op
+BenchmarkUnmarshalStdlib-8        100000             46967 ns/op           13952 B/op        262 allocs/op
 PASS
-ok      github.com/vektah/goparsify/json        10.840s
+ok      github.com/vektah/goparsify/json        14.424s
 ```
 ### debugging parsers
@ -86,25 +86,25 @@ ok      github.com/vektah/goparsify/html        0.118s
 If you build the parser with -tags debug it will instrument each parser and a call to DumpDebugStats() will show stats:
 ```
      var name           matches               total time        self time           calls           errors      location
-              _value                 Any()       6.3303551s        46.0214ms      878801 calls           0 errors json.go:36
+              _value                 Any()       5.1725682s        57.0243ms      878801 calls           0 errors json.go:36
-             _string        string literal       100.0559ms         44.019ms      848489 calls      313135 errors json.go:12
+         _properties        string literal       131.5662ms        45.0273ms      818185 calls           0 errors json.go:14
-              _false                 false        52.0288ms        43.0197ms      858593 calls      848489 errors json.go:11
+         _properties                 Seq()        3.579274s         42.016ms      818185 calls           0 errors json.go:14
-               _null                  null        58.0309ms        42.0222ms      878801 calls      878798 errors json.go:9
+         _properties                     ,        50.5254ms        35.5182ms      818185 calls      121213 errors json.go:14
-         _properties        string literal       119.3651ms        42.0151ms      818185 calls           0 errors json.go:14
+         _properties                     :        51.5256ms        35.0183ms      818185 calls           0 errors json.go:14
-         _properties                     :        54.5277ms         41.018ms      818185 calls           0 errors json.go:14
+             _string        string literal        78.0462ms        28.0172ms      671723 calls      136369 errors json.go:12
-               _true                  true        56.5292ms        37.0166ms      878798 calls      858593 errors json.go:10
+             _number        number literal        34.5187ms        15.5065ms      287886 calls      106066 errors json.go:13
-         _properties                 Seq()       4.2989722s        35.5217ms      818185 calls           0 errors json.go:14
+               _null                  null         17.011ms         8.5058ms      252538 calls      252535 errors json.go:9
-         _properties                     ,        45.0263ms         35.519ms      818185 calls      121213 errors json.go:14
+         _properties                Some()       3.7588588s         7.5023ms      121213 calls           0 errors json.go:14
-             _number        number literal        30.0208ms        11.5093ms      313135 calls      131315 errors json.go:13
+             _object                     {        10.5049ms         7.0029ms      161616 calls       40403 errors json.go:24
-              _array                     [        12.0045ms         10.504ms      131315 calls      121213 errors json.go:16
+               _true                  true        10.0072ms          6.505ms      252537 calls      232332 errors json.go:10
-         _properties                Some()       4.4800665s         9.0051ms      121213 calls           0 errors json.go:14
+              _false                 false         9.0039ms         4.5032ms      232333 calls      222229 errors json.go:11
-             _object                     {        11.0053ms         8.5041ms      121213 calls           0 errors json.go:24
+             _object                 Seq()         3.81739s         4.5016ms      161616 calls       40403 errors json.go:24
-             _object                     }         9.0022ms         8.0031ms      121213 calls           0 errors json.go:24
+              _array                     [         5.0013ms         4.0011ms       65660 calls       55558 errors json.go:16
-             _object                 Seq()       4.5375994s         6.5055ms      121213 calls           0 errors json.go:24
+             _object                     }         5.5023ms         2.5021ms      121213 calls           0 errors json.go:24
-              _array                 Seq()       1.1524115s         5.5023ms      131315 calls      121213 errors json.go:16
+              _array                     ,         2.0018ms         1.5026ms       50509 calls       10102 errors json.go:16
-              _array                     ,         3.0008ms         4.0012ms       50509 calls       10102 errors json.go:16
+              _array                Some()       933.4591ms          500.8µs       10102 calls           0 errors json.go:16
-              _array                     ]         1.5013ms         1.5011ms       10102 calls           0 errors json.go:16
+              _array                 Seq()       952.9664ms               0s       65660 calls       55558 errors json.go:16
-              _array                Some()        1.116393s               0s       10102 calls           0 errors json.go:16
+              _array                     ]               0s               0s       10102 calls           0 errors json.go:16
 ```
 All times are cumulative, it would be nice to break this down into a parse tree with relative times. This is a nice addition to pprof as it will break down the parsers based on where they are used instead of grouping them all by type. 
--- a/state_test.go
+++ b/state_test.go
@ -49,3 +49,13 @@ func TestState_Preview(t *testing.T) {
 	require.Equal(t, "asdf", NewState("asdf").Preview(10))
 	require.Equal(t, "asdfasdfas", NewState("asdfasdfasdf").Preview(10))
 }
 func TestWhitespaces(t *testing.T) {
 	p := Many(Any("hello", "world", "!"))
 	_, err := Run(p, "hello world\u2005!", ASCIIWhitespace)
 	require.Equal(t, "left unparsed: \u2005!", err.Error())
 	_, err = Run(p, "hello world\u2005!", UnicodeWhitespace)
 	require.NoError(t, err)
 }