diff options
author | Adam Scarr <adam@vektah.net> | 2017-08-10 21:58:14 +1000 |
---|---|---|
committer | Adam Scarr <adam@vektah.net> | 2017-08-10 22:01:06 +1000 |
commit | a0e66b1c46ec57218f8a95a21ace7cbbceb29ec2 (patch) | |
tree | 630056d07ca6b44f7a747b7872ba422c6c301d85 | |
parent | af542eff9e1e51561a9efa37685ee07b1d01b53e (diff) |
Document cuts
-rw-r--r-- | combinator.go | 12 | ||||
-rw-r--r-- | combinator_test.go | 13 | ||||
-rw-r--r-- | debugon.go | 2 | ||||
-rw-r--r-- | examples_test.go | 24 | ||||
-rw-r--r-- | html/html.go | 8 | ||||
-rw-r--r-- | readme.md | 23 | ||||
-rw-r--r-- | state.go | 2 | ||||
-rw-r--r-- | state_test.go | 6 |
8 files changed, 74 insertions, 16 deletions
diff --git a/combinator.go b/combinator.go index 7a811bc..1572e6c 100644 --- a/combinator.go +++ b/combinator.go @@ -45,13 +45,12 @@ func Any(parsers ...Parserish) Parser { for _, parser := range parserfied { node := parser(ps) if ps.Errored() { - if ps.Cut > startpos { - longestError = ps.Error - break - } if ps.Error.pos > longestError.pos { longestError = ps.Error } + if ps.Cut > startpos { + break + } ps.Recover() continue } @@ -91,7 +90,7 @@ func manyImpl(min int, op Parserish, sep ...Parserish) Parser { for { node := opParser(ps) if ps.Errored() { - if len(result.Child) < min { + if len(result.Child) < min || ps.Cut > ps.Pos { ps.Pos = startpos return result } @@ -116,8 +115,9 @@ func Maybe(parser Parserish) Parser { parserfied := Parsify(parser) return NewParser("Maybe()", func(ps *State) Result { + startpos := ps.Pos node := parserfied(ps) - if ps.Errored() { + if ps.Errored() && ps.Cut <= startpos { ps.Recover() } diff --git a/combinator_test.go b/combinator_test.go index 5ac227f..acf0e84 100644 --- a/combinator_test.go +++ b/combinator_test.go @@ -163,12 +163,23 @@ func TestBind(t *testing.T) { } func TestCut(t *testing.T) { - // does backtracking happen anywhere else? t.Run("test any", func(t *testing.T) { _, ps := runParser("var world", Any(Seq("var", Cut, "hello"), "var world")) require.Equal(t, "offset 4: expected hello", ps.Error.Error()) require.Equal(t, 0, ps.Pos) }) + + t.Run("test many", func(t *testing.T) { + _, ps := runParser("hello <world", Many(Any(Seq("<", Cut, Chars("a-z"), ">"), Chars("a-z")))) + require.Equal(t, "offset 12: expected >", ps.Error.Error()) + require.Equal(t, 0, ps.Pos) + }) + + t.Run("test maybe", func(t *testing.T) { + _, ps := runParser("var", Maybe(Seq("var", Cut, "hello"))) + require.Equal(t, "offset 3: expected hello", ps.Error.Error()) + require.Equal(t, 0, ps.Pos) + }) } func TestMerge(t *testing.T) { @@ -32,7 +32,7 @@ type debugParser struct { } func (dp *debugParser) Name() string { - if len(activeParsers) > 2 && activeParsers[len(activeParsers)-2].Var == dp.Var { + if len(activeParsers) > 1 && activeParsers[len(activeParsers)-2].Var == dp.Var { return dp.Match } return dp.Var diff --git a/examples_test.go b/examples_test.go new file mode 100644 index 0000000..a1de129 --- /dev/null +++ b/examples_test.go @@ -0,0 +1,24 @@ +package goparsify_test + +import ( + "fmt" + + . "github.com/vektah/goparsify" +) + +func ExampleCuts() { + // without a cut if the close tag is left out the parser will backtrack and ignore the rest of the string + alpha := Chars("a-z") + nocut := Many(Any(Seq("<", alpha, ">"), alpha)) + _, err := Run(nocut, "asdf <foo") + fmt.Println(err.Error()) + + // with a cut, once we see the open tag we know there must be a close tag that matches it, so the parser will error + cut := Many(Any(Seq("<", Cut, alpha, ">"), alpha)) + _, err = Run(cut, "asdf <foo") + fmt.Println(err.Error()) + + // Output: + // left unparsed: <foo + // offset 9: expected > +} diff --git a/html/html.go b/html/html.go index 0def587..e2dff0b 100644 --- a/html/html.go +++ b/html/html.go @@ -42,17 +42,17 @@ var ( return Result{Result: attr} }) - tstart = Seq("<", Cut, identifier, attrs, ">") + tstart = Seq("<", identifier, Cut, attrs, ">") tend = Seq("</", Cut, identifier, ">") ) func init() { - tag = Map(Seq(tstart, elements, tend), func(node Result) Result { + tag = Map(Seq(tstart, Cut, elements, tend), func(node Result) Result { openTag := node.Child[0] return Result{Result: Tag{ - Name: openTag.Child[2].Token, + Name: openTag.Child[1].Token, Attributes: openTag.Child[3].Result.(map[string]string), - Body: node.Child[1].Result.([]interface{}), + Body: node.Child[2].Result.([]interface{}), }} }) @@ -7,9 +7,9 @@ A parser-combinator library for building easy to test, read and maintain parsers I dont have many benchmarks set up yet, but the json parser is very promising. Nearly keeping up with the stdlib for raw speed: ``` $ go test -bench=. -benchtime=2s -benchmem ./json -BenchmarkUnmarshalParsec-8 50000 71447 ns/op 50464 B/op 1318 allocs/op -BenchmarkUnmarshalParsify-8 50000 56414 ns/op 43887 B/op 334 allocs/op -BenchmarkUnmarshalStdlib-8 50000 50187 ns/op 13949 B/op 262 allocs/op +BenchmarkUnmarshalParsec-8 20000 65682 ns/op 50460 B/op 1318 allocs/op +BenchmarkUnmarshalParsify-8 30000 51292 ns/op 45104 B/op 334 allocs/op +BenchmarkUnmarshalStdlib-8 30000 46522 ns/op 13953 B/op 262 allocs/op PASS ok github.com/vektah/goparsify/json 10.840s ``` @@ -198,6 +198,23 @@ func init() { Take a look at [calc](calc/calc.go) for a full example. +### preventing backtracking with cuts +A cut is a marker that prevents backtracking past the point it was set. This greatly improves error messages when used correctly: +```go +alpha := Chars("a-z") + +// without a cut if the close tag is left out the parser will backtrack and ignore the rest of the string +nocut := Many(Any(Seq("<", alpha, ">"), alpha)) +_, err := Run(nocut, "asdf <foo") +fmt.Println(err.Error()) +// Outputs: left unparsed: <foo + +// with a cut, once we see the open tag we know there must be a close tag that matches it, so the parser will error +cut := Many(Any(Seq("<", Cut, alpha, ">"), alpha)) +_, err = Run(cut, "asdf <foo") +fmt.Println(err.Error()) +// Outputs: offset 9: expected > +``` ### prior art @@ -92,7 +92,7 @@ func (s *State) Get() string { // Preview of the the next x characters func (s *State) Preview(x int) string { - if s.Pos > len(s.Input) { + if s.Pos >= len(s.Input) { return "" } if len(s.Input)-s.Pos >= x { diff --git a/state_test.go b/state_test.go index 5ecf367..c279d14 100644 --- a/state_test.go +++ b/state_test.go @@ -43,3 +43,9 @@ func TestState_Errors(t *testing.T) { require.Equal(t, 2, ps.Error.Pos()) require.True(t, ps.Errored()) } + +func TestState_Preview(t *testing.T) { + require.Equal(t, "", NewState("").Preview(10)) + require.Equal(t, "asdf", NewState("asdf").Preview(10)) + require.Equal(t, "asdfasdfas", NewState("asdfasdfasdf").Preview(10)) +} |