From a0e66b1c46ec57218f8a95a21ace7cbbceb29ec2 Mon Sep 17 00:00:00 2001 From: Adam Scarr Date: Thu, 10 Aug 2017 21:58:14 +1000 Subject: [PATCH] Document cuts --- combinator.go | 12 ++++++------ combinator_test.go | 13 ++++++++++++- debugon.go | 2 +- examples_test.go | 24 ++++++++++++++++++++++++ html/html.go | 8 ++++---- readme.md | 23 ++++++++++++++++++++--- state.go | 2 +- state_test.go | 6 ++++++ 8 files changed, 74 insertions(+), 16 deletions(-) create mode 100644 examples_test.go diff --git a/combinator.go b/combinator.go index 7a811bc..1572e6c 100644 --- a/combinator.go +++ b/combinator.go @@ -45,13 +45,12 @@ func Any(parsers ...Parserish) Parser { for _, parser := range parserfied { node := parser(ps) if ps.Errored() { - if ps.Cut > startpos { - longestError = ps.Error - break - } if ps.Error.pos > longestError.pos { longestError = ps.Error } + if ps.Cut > startpos { + break + } ps.Recover() continue } @@ -91,7 +90,7 @@ func manyImpl(min int, op Parserish, sep ...Parserish) Parser { for { node := opParser(ps) if ps.Errored() { - if len(result.Child) < min { + if len(result.Child) < min || ps.Cut > ps.Pos { ps.Pos = startpos return result } @@ -116,8 +115,9 @@ func Maybe(parser Parserish) Parser { parserfied := Parsify(parser) return NewParser("Maybe()", func(ps *State) Result { + startpos := ps.Pos node := parserfied(ps) - if ps.Errored() { + if ps.Errored() && ps.Cut <= startpos { ps.Recover() } diff --git a/combinator_test.go b/combinator_test.go index 5ac227f..acf0e84 100644 --- a/combinator_test.go +++ b/combinator_test.go @@ -163,12 +163,23 @@ func TestBind(t *testing.T) { } func TestCut(t *testing.T) { - // does backtracking happen anywhere else? t.Run("test any", func(t *testing.T) { _, ps := runParser("var world", Any(Seq("var", Cut, "hello"), "var world")) require.Equal(t, "offset 4: expected hello", ps.Error.Error()) require.Equal(t, 0, ps.Pos) }) + + t.Run("test many", func(t *testing.T) { + _, ps := runParser("hello "), Chars("a-z")))) + require.Equal(t, "offset 12: expected >", ps.Error.Error()) + require.Equal(t, 0, ps.Pos) + }) + + t.Run("test maybe", func(t *testing.T) { + _, ps := runParser("var", Maybe(Seq("var", Cut, "hello"))) + require.Equal(t, "offset 3: expected hello", ps.Error.Error()) + require.Equal(t, 0, ps.Pos) + }) } func TestMerge(t *testing.T) { diff --git a/debugon.go b/debugon.go index 4e68db3..9154472 100644 --- a/debugon.go +++ b/debugon.go @@ -32,7 +32,7 @@ type debugParser struct { } func (dp *debugParser) Name() string { - if len(activeParsers) > 2 && activeParsers[len(activeParsers)-2].Var == dp.Var { + if len(activeParsers) > 1 && activeParsers[len(activeParsers)-2].Var == dp.Var { return dp.Match } return dp.Var diff --git a/examples_test.go b/examples_test.go new file mode 100644 index 0000000..a1de129 --- /dev/null +++ b/examples_test.go @@ -0,0 +1,24 @@ +package goparsify_test + +import ( + "fmt" + + . "github.com/vektah/goparsify" +) + +func ExampleCuts() { + // without a cut if the close tag is left out the parser will backtrack and ignore the rest of the string + alpha := Chars("a-z") + nocut := Many(Any(Seq("<", alpha, ">"), alpha)) + _, err := Run(nocut, "asdf "), alpha)) + _, err = Run(cut, "asdf +} diff --git a/html/html.go b/html/html.go index 0def587..e2dff0b 100644 --- a/html/html.go +++ b/html/html.go @@ -42,17 +42,17 @@ var ( return Result{Result: attr} }) - tstart = Seq("<", Cut, identifier, attrs, ">") + tstart = Seq("<", identifier, Cut, attrs, ">") tend = Seq("") ) func init() { - tag = Map(Seq(tstart, elements, tend), func(node Result) Result { + tag = Map(Seq(tstart, Cut, elements, tend), func(node Result) Result { openTag := node.Child[0] return Result{Result: Tag{ - Name: openTag.Child[2].Token, + Name: openTag.Child[1].Token, Attributes: openTag.Child[3].Result.(map[string]string), - Body: node.Child[1].Result.([]interface{}), + Body: node.Child[2].Result.([]interface{}), }} }) diff --git a/readme.md b/readme.md index 9e712b0..1da05ed 100644 --- a/readme.md +++ b/readme.md @@ -7,9 +7,9 @@ A parser-combinator library for building easy to test, read and maintain parsers I dont have many benchmarks set up yet, but the json parser is very promising. Nearly keeping up with the stdlib for raw speed: ``` $ go test -bench=. -benchtime=2s -benchmem ./json -BenchmarkUnmarshalParsec-8 50000 71447 ns/op 50464 B/op 1318 allocs/op -BenchmarkUnmarshalParsify-8 50000 56414 ns/op 43887 B/op 334 allocs/op -BenchmarkUnmarshalStdlib-8 50000 50187 ns/op 13949 B/op 262 allocs/op +BenchmarkUnmarshalParsec-8 20000 65682 ns/op 50460 B/op 1318 allocs/op +BenchmarkUnmarshalParsify-8 30000 51292 ns/op 45104 B/op 334 allocs/op +BenchmarkUnmarshalStdlib-8 30000 46522 ns/op 13953 B/op 262 allocs/op PASS ok github.com/vektah/goparsify/json 10.840s ``` @@ -198,6 +198,23 @@ func init() { Take a look at [calc](calc/calc.go) for a full example. +### preventing backtracking with cuts +A cut is a marker that prevents backtracking past the point it was set. This greatly improves error messages when used correctly: +```go +alpha := Chars("a-z") + +// without a cut if the close tag is left out the parser will backtrack and ignore the rest of the string +nocut := Many(Any(Seq("<", alpha, ">"), alpha)) +_, err := Run(nocut, "asdf "), alpha)) +_, err = Run(cut, "asdf +``` ### prior art diff --git a/state.go b/state.go index 384eb32..64737dc 100644 --- a/state.go +++ b/state.go @@ -92,7 +92,7 @@ func (s *State) Get() string { // Preview of the the next x characters func (s *State) Preview(x int) string { - if s.Pos > len(s.Input) { + if s.Pos >= len(s.Input) { return "" } if len(s.Input)-s.Pos >= x { diff --git a/state_test.go b/state_test.go index 5ecf367..c279d14 100644 --- a/state_test.go +++ b/state_test.go @@ -43,3 +43,9 @@ func TestState_Errors(t *testing.T) { require.Equal(t, 2, ps.Error.Pos()) require.True(t, ps.Errored()) } + +func TestState_Preview(t *testing.T) { + require.Equal(t, "", NewState("").Preview(10)) + require.Equal(t, "asdf", NewState("asdf").Preview(10)) + require.Equal(t, "asdfasdfas", NewState("asdfasdfasdf").Preview(10)) +}