summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdam Scarr <adam@vektah.net>2017-08-13 15:28:43 +1000
committerAdam Scarr <adam@vektah.net>2017-08-13 15:28:43 +1000
commit79b7cc082cac1904b87bbe7996df5a8824f7c2d8 (patch)
tree6f97bfaf20629a77fc2a847fa82b0bcb5a5cc2af
parente30b92276464f1a7cbd9e3772f315c990fac2073 (diff)
Add one byte prediction to Any()
-rw-r--r--combinator.go31
-rw-r--r--combinator_test.go35
-rw-r--r--perf_test.go14
-rw-r--r--readme.md48
-rw-r--r--state_test.go10
5 files changed, 112 insertions, 26 deletions
diff --git a/combinator.go b/combinator.go
index 1572e6c..01386e6 100644
--- a/combinator.go
+++ b/combinator.go
@@ -38,14 +38,40 @@ func NoAutoWS(parser Parserish) Parser {
// Any matches the first successful parser and returns its result
func Any(parsers ...Parserish) Parser {
parserfied := ParsifyAll(parsers...)
+ // For
+ predictor := [255]int{}
return NewParser("Any()", func(ps *State) Result {
+ if ps.Pos >= len(ps.Input) {
+ ps.ErrorHere("!EOF")
+ return Result{}
+ }
longestError := Error{}
startpos := ps.Pos
- for _, parser := range parserfied {
+ predictorChar := ps.Input[startpos]
+ predicted := predictor[predictorChar]
+
+ node := parserfied[predicted](ps)
+ if !ps.Errored() {
+ return node
+ }
+
+ if ps.Error.pos >= longestError.pos {
+ longestError = ps.Error
+ }
+ if ps.Cut <= startpos {
+ ps.Recover()
+ } else {
+ return node
+ }
+
+ for i, parser := range parserfied {
+ if i == predicted {
+ continue
+ }
node := parser(ps)
if ps.Errored() {
- if ps.Error.pos > longestError.pos {
+ if ps.Error.pos >= longestError.pos {
longestError = ps.Error
}
if ps.Cut > startpos {
@@ -54,6 +80,7 @@ func Any(parsers ...Parserish) Parser {
ps.Recover()
continue
}
+ predictor[predictorChar] = i
return node
}
diff --git a/combinator_test.go b/combinator_test.go
index 50b7010..1c95eba 100644
--- a/combinator_test.go
+++ b/combinator_test.go
@@ -61,6 +61,41 @@ func TestAny(t *testing.T) {
require.Equal(t, Result{}, node)
require.Equal(t, 0, p2.Pos)
})
+
+ t.Run("branch prediction", func(t *testing.T) {
+ p := Any("hello", Seq("{", Cut(), "world", "}"), Seq("[", Cut(), "a", "]"))
+ // warm up the predictor
+ _, _ = Run(p, "hello")
+ _, _ = Run(p, "{world}")
+
+ t.Run("matches", func(t *testing.T) {
+ node, ps := runParser("hello world!", p)
+ require.Equal(t, "hello", node.Token)
+ require.Equal(t, 5, ps.Pos)
+ })
+
+ t.Run("errors", func(t *testing.T) {
+ _, ps := runParser("help world!", p)
+ require.Equal(t, "offset 0: expected [", ps.Error.Error())
+ require.Equal(t, 0, ps.Error.Pos())
+ require.Equal(t, 0, ps.Pos)
+ })
+
+ t.Run("errors with cuts", func(t *testing.T) {
+ _, ps := runParser("{world", p)
+ require.Equal(t, "offset 6: expected }", ps.Error.Error())
+ require.Equal(t, 6, ps.Error.Pos())
+ require.Equal(t, 0, ps.Pos)
+ })
+
+ t.Run("misprededicted cut", func(t *testing.T) {
+ // This should probably only happen when the predictor is cold
+ _, ps := runParser("[a", p)
+ require.Equal(t, "offset 2: expected ]", ps.Error.Error())
+ require.Equal(t, 2, ps.Error.Pos())
+ require.Equal(t, 0, ps.Pos)
+ })
+ })
}
func TestSome(t *testing.T) {
diff --git a/perf_test.go b/perf_test.go
new file mode 100644
index 0000000..d6a0092
--- /dev/null
+++ b/perf_test.go
@@ -0,0 +1,14 @@
+package goparsify
+
+import "testing"
+
+func BenchmarkAny(b *testing.B) {
+ p := Any("hello", "goodbye", "help")
+
+ for i := 0; i < b.N; i++ {
+ _, _ = Run(p, "hello")
+ _, _ = Run(p, "hello world")
+ _, _ = Run(p, "good boy")
+ _, _ = Run(p, "help me")
+ }
+}
diff --git a/readme.md b/readme.md
index f4af219..bcb72f3 100644
--- a/readme.md
+++ b/readme.md
@@ -9,14 +9,14 @@ Run(parser, input, ASCIIWhitespace)
```
### benchmarks
-I dont have many benchmarks set up yet, but the json parser is very promising. Nearly keeping up with the stdlib for raw speed:
+I dont have many benchmarks set up yet, but the json parser keeps up with the stdlib for raw speed:
```
$ go test -bench=. -benchtime=2s -benchmem ./json
-BenchmarkUnmarshalParsec-8 20000 65682 ns/op 50460 B/op 1318 allocs/op
-BenchmarkUnmarshalParsify-8 30000 51292 ns/op 45104 B/op 334 allocs/op
-BenchmarkUnmarshalStdlib-8 30000 46522 ns/op 13953 B/op 262 allocs/op
+BenchmarkUnmarshalParsec-8 50000 66012 ns/op 50462 B/op 1318 allocs/op
+BenchmarkUnmarshalParsify-8 100000 46713 ns/op 44543 B/op 332 allocs/op
+BenchmarkUnmarshalStdlib-8 100000 46967 ns/op 13952 B/op 262 allocs/op
PASS
-ok github.com/vektah/goparsify/json 10.840s
+ok github.com/vektah/goparsify/json 14.424s
```
### debugging parsers
@@ -86,25 +86,25 @@ ok github.com/vektah/goparsify/html 0.118s
If you build the parser with -tags debug it will instrument each parser and a call to DumpDebugStats() will show stats:
```
var name matches total time self time calls errors location
- _value Any() 6.3303551s 46.0214ms 878801 calls 0 errors json.go:36
- _string string literal 100.0559ms 44.019ms 848489 calls 313135 errors json.go:12
- _false false 52.0288ms 43.0197ms 858593 calls 848489 errors json.go:11
- _null null 58.0309ms 42.0222ms 878801 calls 878798 errors json.go:9
- _properties string literal 119.3651ms 42.0151ms 818185 calls 0 errors json.go:14
- _properties : 54.5277ms 41.018ms 818185 calls 0 errors json.go:14
- _true true 56.5292ms 37.0166ms 878798 calls 858593 errors json.go:10
- _properties Seq() 4.2989722s 35.5217ms 818185 calls 0 errors json.go:14
- _properties , 45.0263ms 35.519ms 818185 calls 121213 errors json.go:14
- _number number literal 30.0208ms 11.5093ms 313135 calls 131315 errors json.go:13
- _array [ 12.0045ms 10.504ms 131315 calls 121213 errors json.go:16
- _properties Some() 4.4800665s 9.0051ms 121213 calls 0 errors json.go:14
- _object { 11.0053ms 8.5041ms 121213 calls 0 errors json.go:24
- _object } 9.0022ms 8.0031ms 121213 calls 0 errors json.go:24
- _object Seq() 4.5375994s 6.5055ms 121213 calls 0 errors json.go:24
- _array Seq() 1.1524115s 5.5023ms 131315 calls 121213 errors json.go:16
- _array , 3.0008ms 4.0012ms 50509 calls 10102 errors json.go:16
- _array ] 1.5013ms 1.5011ms 10102 calls 0 errors json.go:16
- _array Some() 1.116393s 0s 10102 calls 0 errors json.go:16
+ _value Any() 5.1725682s 57.0243ms 878801 calls 0 errors json.go:36
+ _properties string literal 131.5662ms 45.0273ms 818185 calls 0 errors json.go:14
+ _properties Seq() 3.579274s 42.016ms 818185 calls 0 errors json.go:14
+ _properties , 50.5254ms 35.5182ms 818185 calls 121213 errors json.go:14
+ _properties : 51.5256ms 35.0183ms 818185 calls 0 errors json.go:14
+ _string string literal 78.0462ms 28.0172ms 671723 calls 136369 errors json.go:12
+ _number number literal 34.5187ms 15.5065ms 287886 calls 106066 errors json.go:13
+ _null null 17.011ms 8.5058ms 252538 calls 252535 errors json.go:9
+ _properties Some() 3.7588588s 7.5023ms 121213 calls 0 errors json.go:14
+ _object { 10.5049ms 7.0029ms 161616 calls 40403 errors json.go:24
+ _true true 10.0072ms 6.505ms 252537 calls 232332 errors json.go:10
+ _false false 9.0039ms 4.5032ms 232333 calls 222229 errors json.go:11
+ _object Seq() 3.81739s 4.5016ms 161616 calls 40403 errors json.go:24
+ _array [ 5.0013ms 4.0011ms 65660 calls 55558 errors json.go:16
+ _object } 5.5023ms 2.5021ms 121213 calls 0 errors json.go:24
+ _array , 2.0018ms 1.5026ms 50509 calls 10102 errors json.go:16
+ _array Some() 933.4591ms 500.8µs 10102 calls 0 errors json.go:16
+ _array Seq() 952.9664ms 0s 65660 calls 55558 errors json.go:16
+ _array ] 0s 0s 10102 calls 0 errors json.go:16
```
All times are cumulative, it would be nice to break this down into a parse tree with relative times. This is a nice addition to pprof as it will break down the parsers based on where they are used instead of grouping them all by type.
diff --git a/state_test.go b/state_test.go
index c279d14..913db05 100644
--- a/state_test.go
+++ b/state_test.go
@@ -49,3 +49,13 @@ func TestState_Preview(t *testing.T) {
require.Equal(t, "asdf", NewState("asdf").Preview(10))
require.Equal(t, "asdfasdfas", NewState("asdfasdfasdf").Preview(10))
}
+
+func TestWhitespaces(t *testing.T) {
+ p := Many(Any("hello", "world", "!"))
+
+ _, err := Run(p, "hello world\u2005!", ASCIIWhitespace)
+ require.Equal(t, "left unparsed: \u2005!", err.Error())
+
+ _, err = Run(p, "hello world\u2005!", UnicodeWhitespace)
+ require.NoError(t, err)
+}