diff --git a/combinator.go b/combinator.go index 1f1317c..6197d6b 100644 --- a/combinator.go +++ b/combinator.go @@ -5,12 +5,13 @@ import ( "fmt" ) -func Nil(p Pointer) (Node, Pointer) { - return nil, p +func Nil(ps *State) interface{} { + return nil } -func Never(p Pointer) (Node, Pointer) { - return Error{p.pos, "Never matches"}, p +func Never(ps *State) interface{} { + ps.ErrorHere("not anything") + return nil } func And(parsers ...Parserish) Parser { @@ -18,23 +19,22 @@ func And(parsers ...Parserish) Parser { return Nil } - ps := ParsifyAll(parsers...) + parserfied := ParsifyAll(parsers...) - return func(p Pointer) (Node, Pointer) { - var nodes = make([]Node, 0, len(ps)) - var node Node - newP := p - for _, parser := range ps { - node, newP = parser(newP) - if node == nil { - continue + return func(ps *State) interface{} { + var nodes = make([]interface{}, 0, len(parserfied)) + startpos := ps.Pos + for _, parser := range parserfied { + node := parser(ps) + if ps.Errored() { + ps.Pos = startpos + return nil } - if IsError(node) { - return node, p + if node != nil { + nodes = append(nodes, node) } - nodes = append(nodes, node) } - return nodes, newP + return nodes } } @@ -43,27 +43,26 @@ func Any(parsers ...Parserish) Parser { return Nil } - ps := ParsifyAll(parsers...) + parserfied := ParsifyAll(parsers...) - return func(p Pointer) (Node, Pointer) { - errors := []Error{} - for _, parser := range ps { - node, newP := parser(p) - if err, isErr := node.(Error); isErr { - errors = append(errors, err) + return func(ps *State) interface{} { + longestError := Error{} + startpos := ps.Pos + for _, parser := range parserfied { + node := parser(ps) + if ps.Errored() { + if ps.Error.pos > longestError.pos { + longestError = ps.Error + } + ps.ClearError() continue } - return node, newP + return node } - longestError := errors[0] - for _, e := range errors[1:] { - if e.pos > longestError.pos { - longestError = e - } - } - - return longestError, p + ps.Error = longestError + ps.Pos = startpos + return nil } } @@ -91,67 +90,81 @@ func manyImpl(min int, op Parserish, until Parserish, sep ...Parserish) Parser { sepParser = Parsify(sep[0]) } - return func(p Pointer) (Node, Pointer) { - var node Node - nodes := make([]Node, 0) - newP := p + return func(ps *State) interface{} { + var node interface{} + nodes := make([]interface{}, 0, 20) + startpos := ps.Pos for { - if node, _ := untilParser(newP); !IsError(node) { + tempPos := ps.Pos + node = untilParser(ps) + if !ps.Errored() { + ps.Pos = tempPos if len(nodes) < min { - return NewError(newP.pos, "Unexpected input"), p + ps.Pos = startpos + ps.ErrorHere("something else") + return nil } break } + ps.ClearError() + + node = opParser(ps) + if ps.Errored() { + if len(nodes) < min { + ps.Pos = startpos + return nil + } + ps.ClearError() + break + } - if node, newP = opParser(newP); IsError(node) { - if len(nodes) < min { - return node, p - } - break - } nodes = append(nodes, node) - if node, newP = sepParser(newP); IsError(node) { + + node = sepParser(ps) + if ps.Errored() { + ps.ClearError() break } } - return nodes, newP + return nodes } } func Maybe(parser Parserish) Parser { - realParser := Parsify(parser) + parserfied := Parsify(parser) - return func(p Pointer) (Node, Pointer) { - node, newP := realParser(p) - if IsError(node) { - return nil, p + return func(ps *State) interface{} { + node := parserfied(ps) + if ps.Errored() { + ps.ClearError() + return nil } - return node, newP + + return node } } -func Map(parser Parserish, f func(n Node) Node) Parser { +func Map(parser Parserish, f func(n interface{}) interface{}) Parser { p := Parsify(parser) - return func(ptr Pointer) (Node, Pointer) { - node, newPtr := p(ptr) - if IsError(node) { - return node, ptr + return func(ps *State) interface{} { + node := p(ps) + if ps.Errored() { + return nil } - - return f(node), newPtr + return f(node) } } -func flatten(n Node) string { +func flatten(n interface{}) interface{} { if s, ok := n.(string); ok { return s } - if nodes, ok := n.([]Node); ok { + if nodes, ok := n.([]interface{}); ok { sbuf := &bytes.Buffer{} for _, node := range nodes { - sbuf.WriteString(flatten(node)) + sbuf.WriteString(flatten(node).(string)) } return sbuf.String() } @@ -160,7 +173,5 @@ func flatten(n Node) string { } func Merge(parser Parserish) Parser { - return Map(parser, func(n Node) Node { - return flatten(n) - }) + return Map(parser, flatten) } diff --git a/combinator_test.go b/combinator_test.go index efee7ba..31fad1b 100644 --- a/combinator_test.go +++ b/combinator_test.go @@ -7,26 +7,35 @@ import ( ) func TestNil(t *testing.T) { - p := Pointer{"hello world", 0} + node, p2 := runParser("hello world", Nil) - node, p2 := Nil(p) require.Equal(t, nil, node) - require.Equal(t, p, p2) + require.Equal(t, 0, p2.Pos) + require.False(t, p2.Errored()) +} + +func TestNever(t *testing.T) { + node, p2 := runParser("hello world", Never) + + require.Equal(t, nil, node) + require.Equal(t, 0, p2.Pos) + require.True(t, p2.Errored()) } func TestAnd(t *testing.T) { - p := Pointer{"hello world", 0} + parser := And("hello", WS, "world") t.Run("matches sequence", func(t *testing.T) { - node, p2 := And("hello", WS, "world")(p) - require.Equal(t, []Node{"hello", "world"}, node) + node, p2 := runParser("hello world", parser) + require.Equal(t, []interface{}{"hello", "world"}, node) require.Equal(t, "", p2.Get()) }) t.Run("returns errors", func(t *testing.T) { - e, p3 := And("hello", WS, "there")(p) - require.Equal(t, NewError(6, "Expected there"), e) - require.Equal(t, 0, p3.pos) + _, p2 := runParser("hello there", parser) + require.Equal(t, "world", p2.Error.Expected) + require.Equal(t, 6, p2.Error.pos) + require.Equal(t, 0, p2.Pos) }) t.Run("No parsers", func(t *testing.T) { @@ -35,44 +44,42 @@ func TestAnd(t *testing.T) { } func TestMaybe(t *testing.T) { - p := Pointer{"hello world", 0} - t.Run("matches sequence", func(t *testing.T) { - node, p2 := Maybe("hello")(p) + node, p2 := runParser("hello world", Maybe("hello")) require.Equal(t, "hello", node) require.Equal(t, " world", p2.Get()) }) t.Run("returns no errors", func(t *testing.T) { - e, p3 := Maybe("world")(p) - require.Equal(t, nil, e) - require.Equal(t, 0, p3.pos) + node, p3 := runParser("hello world", Maybe("world")) + require.Equal(t, nil, node) + require.False(t, p3.Errored()) + require.Equal(t, 0, p3.Pos) }) } func TestAny(t *testing.T) { - p := Pointer{"hello world!", 0} - t.Run("Matches any", func(t *testing.T) { - node, p2 := Any("hello", "world")(p) + node, p2 := runParser("hello world!", Any("hello", "world")) require.Equal(t, "hello", node) - require.Equal(t, 5, p2.pos) + require.Equal(t, 5, p2.Pos) }) t.Run("Returns longest error", func(t *testing.T) { - err, p2 := Any( - Exact("nope"), - And(Exact("hello"), WS, Exact("world"), Exact(".")), - And(Exact("hello"), WS, Exact("brother")), - )(p) - require.Equal(t, NewError(11, "Expected ."), err) - require.Equal(t, 0, p2.pos) + _, p2 := runParser("hello world!", Any( + "nope", + And("hello", WS, "world", "."), + And("hello", WS, "brother"), + )) + require.Equal(t, "offset 11: Expected .", p2.Error.Error()) + require.Equal(t, 11, p2.Error.Pos()) + require.Equal(t, 0, p2.Pos) }) t.Run("Accepts nil matches", func(t *testing.T) { - node, p2 := Any(Exact("ffffff"), WS)(p) + node, p2 := runParser("hello world!", Any(Exact("ffffff"), WS)) require.Equal(t, nil, node) - require.Equal(t, 0, p2.pos) + require.Equal(t, 0, p2.Pos) }) t.Run("No parsers", func(t *testing.T) { @@ -81,87 +88,79 @@ func TestAny(t *testing.T) { } func TestKleene(t *testing.T) { - p := Pointer{"a,b,c,d,e,", 0} - t.Run("Matches sequence with sep", func(t *testing.T) { - node, p2 := Kleene(CharRun("abcdefg"), Exact(","))(p) - require.Equal(t, []Node{"a", "b", "c", "d", "e"}, node) - require.Equal(t, 10, p2.pos) + node, p2 := runParser("a,b,c,d,e,", Kleene(Chars("a-g"), ",")) + require.False(t, p2.Errored()) + require.Equal(t, []interface{}{"a", "b", "c", "d", "e"}, node) + require.Equal(t, 10, p2.Pos) }) t.Run("Matches sequence without sep", func(t *testing.T) { - node, p2 := Kleene(Any(CharRun("abcdefg"), Exact(",")))(p) - require.Equal(t, []Node{"a", ",", "b", ",", "c", ",", "d", ",", "e", ","}, node) - require.Equal(t, 10, p2.pos) + node, p2 := runParser("a,b,c,d,e,", Kleene(Any(Chars("a-g"), ","))) + require.Equal(t, []interface{}{"a", ",", "b", ",", "c", ",", "d", ",", "e", ","}, node) + require.Equal(t, 10, p2.Pos) }) t.Run("Stops on error", func(t *testing.T) { - node, p2 := Kleene(CharRun("abc"), Exact(","))(p) - require.Equal(t, []Node{"a", "b", "c"}, node) - require.Equal(t, 6, p2.pos) + node, p2 := runParser("a,b,c,d,e,", Kleene(Chars("a-c"), ",")) + require.Equal(t, []interface{}{"a", "b", "c"}, node) + require.Equal(t, 6, p2.Pos) require.Equal(t, "d,e,", p2.Get()) }) } func TestMany(t *testing.T) { - p := Pointer{"a,b,c,d,e,", 0} - t.Run("Matches sequence with sep", func(t *testing.T) { - node, p2 := Many(CharRun("abcdefg"), Exact(","))(p) - require.Equal(t, []Node{"a", "b", "c", "d", "e"}, node) - require.Equal(t, 10, p2.pos) + node, p2 := runParser("a,b,c,d,e,", Many(Chars("a-g"), Exact(","))) + require.Equal(t, []interface{}{"a", "b", "c", "d", "e"}, node) + require.Equal(t, 10, p2.Pos) }) t.Run("Matches sequence without sep", func(t *testing.T) { - node, p2 := Many(Any(CharRun("abcdefg"), Exact(",")))(p) - require.Equal(t, []Node{"a", ",", "b", ",", "c", ",", "d", ",", "e", ","}, node) - require.Equal(t, 10, p2.pos) + node, p2 := runParser("a,b,c,d,e,", Many(Any(Chars("abcdefg"), Exact(",")))) + require.Equal(t, []interface{}{"a", ",", "b", ",", "c", ",", "d", ",", "e", ","}, node) + require.Equal(t, 10, p2.Pos) }) t.Run("Stops on error", func(t *testing.T) { - node, p2 := Many(CharRun("abc"), Exact(","))(p) - require.Equal(t, []Node{"a", "b", "c"}, node) - require.Equal(t, 6, p2.pos) + node, p2 := runParser("a,b,c,d,e,", Many(Chars("abc"), Exact(","))) + require.Equal(t, []interface{}{"a", "b", "c"}, node) + require.Equal(t, 6, p2.Pos) require.Equal(t, "d,e,", p2.Get()) }) t.Run("Returns error if nothing matches", func(t *testing.T) { - node, p2 := Many(CharRun("def"), Exact(","))(p) - require.Equal(t, NewError(0, "Expected some of def"), node) - require.Equal(t, 0, p2.pos) + _, p2 := runParser("a,b,c,d,e,", Many(Chars("def"), Exact(","))) + require.Equal(t, "offset 0: Expected def", p2.Error.Error()) require.Equal(t, "a,b,c,d,e,", p2.Get()) }) } func TestKleeneUntil(t *testing.T) { - p := Pointer{"a,b,c,d,e,fg", 0} - t.Run("Matches sequence with sep", func(t *testing.T) { - node, p2 := KleeneUntil(CharRun("abcde"), CharRun("d"), Exact(","))(p) - require.Equal(t, []Node{"a", "b", "c"}, node) - require.Equal(t, 6, p2.pos) + node, p2 := runParser("a,b,c,d,e,fg", KleeneUntil(Chars("abcde"), "d", ",")) + require.Equal(t, []interface{}{"a", "b", "c"}, node) + require.Equal(t, "d,e,fg", p2.Get()) }) t.Run("Breaks if separator does not match", func(t *testing.T) { - node, p2 := KleeneUntil(Char("abcdefg"), Char("y"), Exact(","))(p) - require.Equal(t, []Node{"a", "b", "c", "d", "e", "f"}, node) - require.Equal(t, 11, p2.pos) + node, p2 := runParser("a,b,c,d,e,fg", KleeneUntil(Chars("abcdefg", 1, 1), "y", ",")) + require.Equal(t, []interface{}{"a", "b", "c", "d", "e", "f"}, node) + require.Equal(t, "g", p2.Get()) }) } func TestManyUntil(t *testing.T) { - p := Pointer{"a,b,c,d,e,", 0} - t.Run("Matches sequence until", func(t *testing.T) { - node, p2 := ManyUntil(CharRun("abcdefg"), Char("d"), Exact(","))(p) - require.Equal(t, []Node{"a", "b", "c"}, node) - require.Equal(t, 6, p2.pos) + node, p2 := runParser("a,b,c,d,e,", ManyUntil(Chars("abcdefg"), "d", ",")) + require.Equal(t, []interface{}{"a", "b", "c"}, node) + require.Equal(t, 6, p2.Pos) }) t.Run("Returns error until matches early", func(t *testing.T) { - node, p2 := ManyUntil(CharRun("abc"), Exact("a"), Exact(","))(p) - require.Equal(t, NewError(0, "Unexpected input"), node) - require.Equal(t, 0, p2.pos) + _, p2 := runParser("a,b,c,d,e,", ManyUntil(Chars("abc"), "a", ",")) + require.Equal(t, "offset 0: Expected something else", p2.Error.Error()) + require.Equal(t, 0, p2.Pos) require.Equal(t, "a,b,c,d,e,", p2.Get()) }) } @@ -171,19 +170,19 @@ type htmlTag struct { } func TestMap(t *testing.T) { - parser := Map(And("<", Range("a-zA-Z0-9"), ">"), func(n Node) Node { - return htmlTag{n.([]Node)[1].(string)} + parser := Map(And("<", Chars("a-zA-Z0-9"), ">"), func(n interface{}) interface{} { + return htmlTag{n.([]interface{})[1].(string)} }) t.Run("sucess", func(t *testing.T) { - result, _ := parser(Pointer{"", 0}) + result, _ := runParser("", parser) require.Equal(t, htmlTag{"html"}, result) }) t.Run("error", func(t *testing.T) { - result, ptr := parser(Pointer{""), result) - require.Equal(t, 0, ptr.pos) + _, ps := runParser("", ps.Error.Error()) + require.Equal(t, 0, ps.Pos) }) } @@ -193,14 +192,14 @@ func TestMerge(t *testing.T) { parser := Merge(bracer) t.Run("sucess", func(t *testing.T) { - result, _ := parser(Pointer{"((()))", 0}) + result, _ := runParser("((()))", parser) require.Equal(t, "((()))", result) }) t.Run("error", func(t *testing.T) { - result, ptr := parser(Pointer{"((())", 0}) - require.Equal(t, NewError(5, "Expected )"), result) - require.Equal(t, 0, ptr.pos) + _, ps := runParser("((())", parser) + require.Equal(t, "offset 5: Expected )", ps.Error.Error()) + require.Equal(t, 0, ps.Pos) }) require.Panics(t, func() { @@ -209,8 +208,7 @@ func TestMerge(t *testing.T) { } func assertNilParser(t *testing.T, parser Parser) { - p := Pointer{"fff", 0} - node, p2 := parser(p) + node, p2 := runParser("fff", parser) require.Equal(t, nil, node) - require.Equal(t, p, p2) + require.Equal(t, 0, p2.Pos) } diff --git a/html/html.go b/html/html.go index adee935..847af96 100644 --- a/html/html.go +++ b/html/html.go @@ -2,32 +2,32 @@ package html import . "github.com/vektah/goparsify" -func Parse(input string) (result Node, remaining string, err error) { +func Parse(input string) (result interface{}, remaining string, err error) { return ParseString(tag, input) } type Tag struct { Name string Attributes map[string]string - Body []Node + Body []interface{} } var ( tag Parser - identifier = Merge(And(Range("a-z", 1, 1), Range("a-zA-Z0-9", 0))) - text = CharRunUntil("<>") + identifier = Merge(And(Chars("a-z", 1, 1), Chars("a-zA-Z0-9", 0))) + text = NotChars("<>") element = Any(text, &tag) elements = Kleene(element) //attr := And(identifier, equal, String()) attr = And(WS, identifier, WS, "=", WS, Any(String('"'), String('\''))) - attrs = Map(Kleene(attr, WS), func(node Node) Node { - nodes := node.([]Node) + attrs = Map(Kleene(attr, WS), func(node interface{}) interface{} { + nodes := node.([]interface{}) attr := map[string]string{} for _, attrNode := range nodes { - attrNodes := attrNode.([]Node) + attrNodes := attrNode.([]interface{}) attr[attrNodes[0].(string)] = attrNodes[2].(string) } @@ -39,13 +39,13 @@ var ( ) func init() { - tag = Map(And(tstart, elements, tend), func(node Node) Node { - nodes := node.([]Node) - openTag := nodes[0].([]Node) + tag = Map(And(tstart, elements, tend), func(node interface{}) interface{} { + nodes := node.([]interface{}) + openTag := nodes[0].([]interface{}) return Tag{ Name: openTag[1].(string), Attributes: openTag[2].(map[string]string), - Body: nodes[1].([]Node), + Body: nodes[1].([]interface{}), } }) diff --git a/html/html_test.go b/html/html_test.go index defdc10..f2f8e6f 100644 --- a/html/html_test.go +++ b/html/html_test.go @@ -4,14 +4,13 @@ import ( "testing" "github.com/stretchr/testify/require" - . "github.com/vektah/goparsify" ) func TestParse(t *testing.T) { result, _, err := Parse(`
helloworld
`) require.NoError(t, err) - require.Equal(t, Tag{Name: "body", Attributes: map[string]string{}, Body: []Node{ + require.Equal(t, Tag{Name: "body", Attributes: map[string]string{}, Body: []interface{}{ "hello ", - Tag{Name: "p", Attributes: map[string]string{"color": "blue"}, Body: []Node{"world"}}, + Tag{Name: "p", Attributes: map[string]string{"color": "blue"}, Body: []interface{}{"world"}}, }}, result) } diff --git a/json/json.go b/json/json.go index 3aa5163..60c5a3f 100644 --- a/json/json.go +++ b/json/json.go @@ -9,15 +9,15 @@ import ( var ( value Parser - array = Map(And(WS, "[", Kleene(&value, And(WS, ",")), "]"), func(n Node) Node { - return n.([]Node)[1].([]Node) + array = Map(And(WS, "[", Kleene(&value, And(WS, ",")), "]"), func(n interface{}) interface{} { + return n.([]interface{})[1].([]interface{}) }) properties = Kleene(And(WS, String('"'), WS, ":", WS, &value), ",") - object = Map(And(WS, "{", WS, properties, WS, "}"), func(n Node) Node { + object = Map(And(WS, "{", WS, properties, WS, "}"), func(n interface{}) interface{} { ret := map[string]interface{}{} - for _, prop := range n.([]Node)[1].([]Node) { - vals := prop.([]Node) + for _, prop := range n.([]interface{})[1].([]interface{}) { + vals := prop.([]interface{}) if len(vals) == 3 { ret[vals[0].(string)] = vals[2] } else { @@ -28,20 +28,20 @@ var ( return ret }) - _null = Map(And(WS, "null"), func(n Node) Node { + _null = Map(And(WS, "null"), func(n interface{}) interface{} { return nil }) - _true = Map(And(WS, "true"), func(n Node) Node { + _true = Map(And(WS, "true"), func(n interface{}) interface{} { return true }) - _false = Map(And(WS, "false"), func(n Node) Node { + _false = Map(And(WS, "false"), func(n interface{}) interface{} { return false }) - Y = Map(And(&value, WS), func(n Node) Node { - nodes := n.([]Node) + Y = Map(And(&value, WS), func(n interface{}) interface{} { + nodes := n.([]interface{}) if len(nodes) > 0 { return nodes[0] } diff --git a/json/json_test.go b/json/json_test.go index 7f0ab9f..84fdd5c 100644 --- a/json/json_test.go +++ b/json/json_test.go @@ -1,10 +1,11 @@ package json import ( + stdlibJson "encoding/json" "testing" + parsecJson "github.com/prataprc/goparsec/json" "github.com/stretchr/testify/require" - . "github.com/vektah/goparsify" ) func TestUnmarshal(t *testing.T) { @@ -29,7 +30,7 @@ func TestUnmarshal(t *testing.T) { t.Run("array", func(t *testing.T) { result, err := Unmarshal(`[true, null, false]`) require.NoError(t, err) - require.Equal(t, []Node{true, nil, false}, result) + require.Equal(t, []interface{}{true, nil, false}, result) }) t.Run("object", func(t *testing.T) { @@ -41,16 +42,16 @@ func TestUnmarshal(t *testing.T) { const benchmarkString = `{"true":true, "false":false, "null": null}` -//func BenchmarkUnmarshalParsec(b *testing.B) { -// bytes := []byte(benchmarkString) -// -// for i := 0; i < b.N; i++ { -// scanner := parsecJson.NewJSONScanner(bytes) -// _, remaining := parsecJson.Y(scanner) -// -// require.True(b, remaining.Endof()) -// } -//} +func BenchmarkUnmarshalParsec(b *testing.B) { + bytes := []byte(benchmarkString) + + for i := 0; i < b.N; i++ { + scanner := parsecJson.NewJSONScanner(bytes) + _, remaining := parsecJson.Y(scanner) + + require.True(b, remaining.Endof()) + } +} func BenchmarkUnmarshalParsify(b *testing.B) { for i := 0; i < b.N; i++ { @@ -59,12 +60,11 @@ func BenchmarkUnmarshalParsify(b *testing.B) { } } -// -//func BenchmarkUnmarshalStdlib(b *testing.B) { -// bytes := []byte(benchmarkString) -// var result interface{} -// for i := 0; i < b.N; i++ { -// err := stdlibJson.Unmarshal(bytes, &result) -// require.NoError(b, err) -// } -//} +func BenchmarkUnmarshalStdlib(b *testing.B) { + bytes := []byte(benchmarkString) + var result interface{} + for i := 0; i < b.N; i++ { + err := stdlibJson.Unmarshal(bytes, &result) + require.NoError(b, err) + } +} diff --git a/json/profile/cpuprofile.bat b/json/profile/cpuprofile.bat index 2899eb3..c293b5e 100644 --- a/json/profile/cpuprofile.bat +++ b/json/profile/cpuprofile.bat @@ -1,3 +1,3 @@ go build profile.exe -cpuprofile cpu.out -go tool pprof --inuse_objects profile.exe cpu.out +go tool pprof profile.exe cpu.out diff --git a/json/profile/json.go b/json/profile/json.go index b94848b..a03d047 100644 --- a/json/profile/json.go +++ b/json/profile/json.go @@ -31,11 +31,13 @@ func main() { } }() } + max := 1000000 if *memprofile != "" { runtime.MemProfileRate = 1 + max = 10000 } - for i := 0; i < 10000; i++ { + for i := 0; i < max; i++ { _, err := json.Unmarshal(`{"true":true, "false":false, "null": null}`) if err != nil { panic(err) diff --git a/json/profile/memprofile.bat b/json/profile/memprofile.bat index a9c935e..71873ac 100644 --- a/json/profile/memprofile.bat +++ b/json/profile/memprofile.bat @@ -1,3 +1,3 @@ go build profile.exe -memprofile mem.out -go tool pprof --inuse_objects profile.exe mem.out +go tool pprof profile.exe mem.out diff --git a/nodes.go b/nodes.go deleted file mode 100644 index 29dd58f..0000000 --- a/nodes.go +++ /dev/null @@ -1,23 +0,0 @@ -package parsec - -import "fmt" - -type Node interface { -} - -type Error struct { - pos int - Message string -} - -func (e Error) Pos() int { return e.pos } -func (e Error) Error() string { return fmt.Sprintf("offset %d: %s", e.pos, e.Message) } - -func NewError(pos int, message string) Error { - return Error{pos, message} -} - -func IsError(n interface{}) bool { - _, isErr := n.(Error) - return isErr -} diff --git a/parser.go b/parser.go index 7599bf1..98f59ce 100644 --- a/parser.go +++ b/parser.go @@ -7,7 +7,7 @@ import ( "unicode/utf8" ) -type Parser func(Pointer) (Node, Pointer) +type Parser func(*State) interface{} // Parserish types are any type that can be turned into a Parser by Parsify // These currently include *Parser and string literals. @@ -27,13 +27,13 @@ type Parserish interface{} func Parsify(p Parserish) Parser { switch p := p.(type) { - case func(Pointer) (Node, Pointer): + case func(*State) interface{}: return Parser(p) case Parser: return p case *Parser: // Todo: Maybe capture this stack and on nil show it? Is there a good error library to do this? - return func(ptr Pointer) (Node, Pointer) { + return func(ptr *State) interface{} { return (*p)(ptr) } case string: @@ -51,80 +51,34 @@ func ParsifyAll(parsers ...Parserish) []Parser { return ret } -func ParseString(parser Parserish, input string) (result Node, remaining string, err error) { +func ParseString(parser Parserish, input string) (result interface{}, remaining string, err error) { p := Parsify(parser) - result, pointer := p(Pointer{input, 0}) + ps := &State{input, 0, Error{}} + result = p(ps) - if err, isErr := result.(Error); isErr { - return nil, pointer.Get(), err + if ps.Error.Expected != "" { + return nil, ps.Get(), ps.Error } - return result, pointer.Get(), nil + return result, ps.Get(), nil } func Exact(match string) Parser { - return func(p Pointer) (Node, Pointer) { - if !strings.HasPrefix(p.Get(), match) { - return NewError(p.pos, "Expected "+match), p + return func(ps *State) interface{} { + if !strings.HasPrefix(ps.Get(), match) { + ps.ErrorHere(match) + return nil } - return match, p.Advance(len(match)) + ps.Advance(len(match)) + + return match } } -func Char(match string) Parser { - return func(p Pointer) (Node, Pointer) { - r, w := utf8.DecodeRuneInString(p.Get()) - - if !strings.ContainsRune(match, r) { - return NewError(p.pos, "Expected one of "+string(match)), p - - } - return string(r), p.Advance(w) - } -} - -func CharRun(match string) Parser { - return func(p Pointer) (Node, Pointer) { - matched := 0 - for p.pos+matched < len(p.input) { - r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:]) - if !strings.ContainsRune(match, r) { - break - } - matched += w - } - - if matched == 0 { - return NewError(p.pos, "Expected some of "+match), p - } - - return p.input[p.pos : p.pos+matched], p.Advance(matched) - } -} - -func CharRunUntil(match string) Parser { - return func(p Pointer) (Node, Pointer) { - matched := 0 - for p.pos+matched < len(p.input) { - r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:]) - if strings.ContainsRune(match, r) { - break - } - matched += w - } - - if matched == 0 { - return NewError(p.pos, "Expected some of "+match), p - } - - return p.input[p.pos : p.pos+matched], p.Advance(matched) - } -} - -func Range(r string, repetition ...int) Parser { - min := int(1) - max := int(-1) +func parseRepetition(defaultMin, defaultMax int, repetition ...int) (min int, max int) { + min = defaultMin + max = defaultMax switch len(repetition) { case 0: case 1: @@ -135,39 +89,67 @@ func Range(r string, repetition ...int) Parser { default: panic(fmt.Errorf("Dont know what %d repetion args mean", len(repetition))) } + return min, max +} - runes := []rune(r) - if len(runes)%3 != 0 { - panic("ranges should be in the form a-z0-9") - } +// parseMatcher turns a string in the format a-f01234A-F into: +// - a set string of matches string(01234) +// - a set of ranges [][]rune{{'a', 'f'}, {'A', 'F'}} +func parseMatcher(matcher string) (matches string, ranges [][]rune) { + runes := []rune(matcher) - var ranges [][]rune - for i := 0; i < len(runes); i += 3 { - start := runes[i] - end := runes[i+2] - if start <= end { - ranges = append(ranges, []rune{start, end}) + for i := 0; i < len(runes); i++ { + + if i+2 < len(runes) && runes[i+1] == '-' { + start := runes[i] + end := runes[i+2] + if start <= end { + ranges = append(ranges, []rune{start, end}) + } else { + ranges = append(ranges, []rune{end, start}) + } + } else if i+1 < len(runes) && runes[i] == '\\' { + matches += string(runes[i+1]) } else { - ranges = append(ranges, []rune{end, start}) + matches += string(runes[i]) } + } - return func(p Pointer) (Node, Pointer) { + return matches, ranges +} + +func Chars(matcher string, repetition ...int) Parser { + return charsImpl(matcher, false, repetition...) +} + +func NotChars(matcher string, repetition ...int) Parser { + return charsImpl(matcher, true, repetition...) +} + +func charsImpl(matcher string, stopOn bool, repetition ...int) Parser { + min, max := parseRepetition(1, -1, repetition...) + matches, ranges := parseMatcher(matcher) + + return func(ps *State) interface{} { matched := 0 - for p.pos+matched < len(p.input) { + for ps.Pos+matched < len(ps.Input) { if max != -1 && matched >= max { break } - r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:]) + r, w := utf8.DecodeRuneInString(ps.Input[ps.Pos+matched:]) - anyMatched := false - for _, rng := range ranges { - if r >= rng[0] && r <= rng[1] { - anyMatched = true + anyMatched := strings.ContainsRune(matches, r) + if !anyMatched { + for _, rng := range ranges { + if r >= rng[0] && r <= rng[1] { + anyMatched = true + } } } - if !anyMatched { + + if anyMatched == stopOn { break } @@ -175,47 +157,55 @@ func Range(r string, repetition ...int) Parser { } if matched < min { - return NewError(p.pos+matched, fmt.Sprintf("Expected at least %d more of %s", min-matched, r)), p + ps.ErrorHere(matcher) + return nil } - return p.input[p.pos : p.pos+matched], p.Advance(matched) + result := ps.Input[ps.Pos : ps.Pos+matched] + ps.Advance(matched) + return result } } -func WS(p Pointer) (Node, Pointer) { - _, p2 := CharRun("\t\n\v\f\r \x85\xA0")(p) - return nil, p2 +var ws = Chars("\t\n\v\f\r \x85\xA0", 0) + +func WS(ps *State) interface{} { + ws(ps) + return nil } func String(quote rune) Parser { - return func(p Pointer) (Node, Pointer) { + return func(ps *State) interface{} { var r rune var w int - r, w = utf8.DecodeRuneInString(p.input[p.pos:]) + var matched int + r, matched = utf8.DecodeRuneInString(ps.Input[ps.Pos:]) if r != quote { - return NewError(p.pos, `Expected "`), p + ps.ErrorHere("\"") + return nil } - matched := w result := &bytes.Buffer{} - for p.pos+matched < len(p.input) { - r, w = utf8.DecodeRuneInString(p.input[p.pos+matched:]) + for ps.Pos+matched < len(ps.Input) { + r, w = utf8.DecodeRuneInString(ps.Input[ps.Pos+matched:]) matched += w if r == '\\' { - r, w = utf8.DecodeRuneInString(p.input[p.pos+matched:]) + r, w = utf8.DecodeRuneInString(ps.Input[ps.Pos+matched:]) result.WriteRune(r) matched += w continue } if r == quote { - return result.String(), p.Advance(matched) + ps.Advance(matched) + return result.String() } result.WriteRune(r) } - return NewError(p.pos, "Unterminated string"), p + ps.ErrorHere("\"") + return nil } } diff --git a/parser_test.go b/parser_test.go index 4d60781..f6122ab 100644 --- a/parser_test.go +++ b/parser_test.go @@ -7,31 +7,29 @@ import ( ) func TestParsify(t *testing.T) { - p := Pointer{"ffooo", 0} t.Run("strings", func(t *testing.T) { - node, _ := Parsify("ff")(p) - require.Equal(t, "ff", node) + require.Equal(t, "ff", Parsify("ff")(InputString("ffooo"))) }) t.Run("parsers", func(t *testing.T) { - node, _ := Parsify(CharRun("f"))(p) - require.Equal(t, "ff", node) + require.Equal(t, "ff", Parsify(Chars("f"))(InputString("ffooo"))) }) t.Run("parser funcs", func(t *testing.T) { - node, _ := Parsify(func(p Pointer) (Node, Pointer) { - return "hello", p - })(p) + node := Parsify(func(p *State) interface{} { + return "hello" + })(InputString("ffooo")) + require.Equal(t, "hello", node) }) t.Run("*parsers", func(t *testing.T) { var parser Parser parserfied := Parsify(&parser) - parser = CharRun("f") + parser = Chars("f") - node, _ := parserfied(p) + node := parserfied(InputString("ffooo")) require.Equal(t, "ff", node) }) @@ -43,122 +41,77 @@ func TestParsify(t *testing.T) { func TestParsifyAll(t *testing.T) { parsers := ParsifyAll("ff", "gg") - result, _ := parsers[0](Pointer{"ffooo", 0}) + result := parsers[0](InputString("ffooo")) require.Equal(t, "ff", result) - result, _ = parsers[1](Pointer{"ffooo", 0}) - require.Equal(t, NewError(0, "Expected gg"), result) + result = parsers[1](InputString("ffooo")) + require.Equal(t, nil, result) } func TestExact(t *testing.T) { - p := Pointer{"fooo", 0} - t.Run("success", func(t *testing.T) { - node, p2 := Exact("fo")(p) + node, ps := runParser("foobar", Exact("fo")) require.Equal(t, "fo", node) - require.Equal(t, p.Advance(2), p2) + require.Equal(t, "obar", ps.Get()) }) t.Run("error", func(t *testing.T) { - node, p2 := Exact("bar")(p) - require.Equal(t, NewError(0, "Expected bar"), node) - require.Equal(t, 0, p2.pos) + _, ps := runParser("foobar", Exact("bar")) + require.Equal(t, "bar", ps.Error.Expected) + require.Equal(t, 0, ps.Pos) }) } -func TestChar(t *testing.T) { - p := Pointer{"foobar", 0} - - t.Run("success", func(t *testing.T) { - node, p2 := Char("fo")(p) - require.Equal(t, "f", node) - require.Equal(t, p.Advance(1), p2) - }) - - t.Run("error", func(t *testing.T) { - node, p2 := Char("bar")(p) - require.Equal(t, NewError(0, "Expected one of bar"), node) - require.Equal(t, 0, p2.pos) - }) -} - -func TestCharRun(t *testing.T) { - p := Pointer{"foobar", 0} - - t.Run("success", func(t *testing.T) { - node, p2 := CharRun("fo")(p) - require.Equal(t, "foo", node) - require.Equal(t, p.Advance(3), p2) - }) - - t.Run("error", func(t *testing.T) { - node, p2 := CharRun("bar")(p) - require.Equal(t, NewError(0, "Expected some of bar"), node) - require.Equal(t, 0, p2.pos) - }) -} - -func TestCharUntil(t *testing.T) { - p := Pointer{"foobar", 0} - - t.Run("success", func(t *testing.T) { - node, p2 := CharRunUntil("z")(p) - require.Equal(t, "foobar", node) - require.Equal(t, p.Advance(6), p2) - }) - - t.Run("error", func(t *testing.T) { - node, p2 := CharRunUntil("f")(p) - require.Equal(t, NewError(0, "Expected some of f"), node) - require.Equal(t, 0, p2.pos) - }) -} - -func TestWS(t *testing.T) { - p := Pointer{" fooo", 0} - - node, p2 := WS(p) - require.Equal(t, nil, node) - require.Equal(t, p.Advance(2), p2) -} - -func TestRange(t *testing.T) { +func TestChars(t *testing.T) { t.Run("full match", func(t *testing.T) { - node, p := Range("a-z")(Pointer{"foobar", 0}) + node, ps := runParser("foobar", Chars("a-z")) require.Equal(t, "foobar", node) - require.Equal(t, "", p.Get()) + require.Equal(t, "", ps.Get()) + require.False(t, ps.Errored()) }) t.Run("partial match", func(t *testing.T) { - node, p := Range("1-4d-a")(Pointer{"a1b2c3d4efg", 0}) + node, ps := runParser("a1b2c3d4efg", Chars("1-4d-a")) require.Equal(t, "a1b2c3d4", node) - require.Equal(t, "efg", p.Get()) + require.Equal(t, "efg", ps.Get()) + require.False(t, ps.Errored()) }) t.Run("limited match", func(t *testing.T) { - node, p := Range("1-4d-a", 1, 2)(Pointer{"a1b2c3d4efg", 0}) + node, ps := runParser("a1b2c3d4efg", Chars("1-4d-a", 1, 2)) require.Equal(t, "a1", node) - require.Equal(t, "b2c3d4efg", p.Get()) + require.Equal(t, "b2c3d4efg", ps.Get()) + require.False(t, ps.Errored()) }) t.Run("no match", func(t *testing.T) { - node, p := Range("0-9")(Pointer{"ffffff", 0}) - require.Equal(t, NewError(0, "Expected at least 1 more of 0-9"), node) - require.Equal(t, 0, p.pos) + _, ps := runParser("ffffff", Chars("0-9")) + require.Equal(t, "offset 0: Expected 0-9", ps.Error.Error()) + require.Equal(t, 0, ps.Pos) }) t.Run("no match with min", func(t *testing.T) { - node, p := Range("0-9", 4)(Pointer{"ffffff", 0}) - require.Equal(t, NewError(0, "Expected at least 4 more of 0-9"), node) - require.Equal(t, 0, p.pos) + _, ps := runParser("ffffff", Chars("0-9", 4)) + require.Equal(t, "0-9", ps.Error.Expected) + require.Equal(t, 0, ps.Pos) + }) + + t.Run("test exact matches", func(t *testing.T) { + node, ps := runParser("aaff", Chars("abcd")) + require.Equal(t, "aa", node) + require.Equal(t, 2, ps.Pos) + require.False(t, ps.Errored()) + }) + + t.Run("test not matches", func(t *testing.T) { + node, ps := runParser("aaff", NotChars("ff")) + require.Equal(t, "aa", node) + require.Equal(t, 2, ps.Pos) + require.False(t, ps.Errored()) }) require.Panics(t, func() { - Range("abcd") - }) - - require.Panics(t, func() { - Range("a-b", 1, 2, 3) + Chars("a-b", 1, 2, 3) }) } @@ -181,26 +134,48 @@ func TestParseString(t *testing.T) { func TestString(t *testing.T) { t.Run("test basic match", func(t *testing.T) { - result, p := String('"')(Pointer{`"hello"`, 0}) + result, p := runParser(`"hello"`, String('"')) require.Equal(t, `hello`, result) require.Equal(t, "", p.Get()) }) t.Run("test non match", func(t *testing.T) { - result, p := String('"')(Pointer{`1`, 0}) - require.Equal(t, NewError(0, `Expected "`), result) + _, p := runParser(`1`, String('"')) + require.Equal(t, `"`, p.Error.Expected) require.Equal(t, `1`, p.Get()) }) t.Run("test unterminated string", func(t *testing.T) { - result, p := String('"')(Pointer{`"hello `, 0}) - require.Equal(t, NewError(0, `Unterminated string`), result) + _, p := runParser(`"hello `, String('"')) + require.Equal(t, `"`, p.Error.Expected) require.Equal(t, `"hello `, p.Get()) }) t.Run("test escaping", func(t *testing.T) { - result, p := String('"')(Pointer{`"hello \"world\""`, 0}) + result, p := runParser(`"hello \"world\""`, String('"')) require.Equal(t, `hello "world"`, result) require.Equal(t, ``, p.Get()) }) } + +func TestWS(t *testing.T) { + t.Run("consumes all whitespace", func(t *testing.T) { + result, p := runParser(" asdf", WS) + require.Equal(t, nil, result) + require.Equal(t, "asdf", p.Get()) + require.False(t, p.Errored()) + }) + + t.Run("never errors", func(t *testing.T) { + result, p := runParser("asdf", WS) + require.Equal(t, nil, result) + require.Equal(t, "asdf", p.Get()) + require.False(t, p.Errored()) + }) +} + +func runParser(input string, parser Parser) (interface{}, *State) { + ps := InputString(input) + result := parser(ps) + return result, ps +} diff --git a/pointer.go b/pointer.go deleted file mode 100644 index 7727833..0000000 --- a/pointer.go +++ /dev/null @@ -1,17 +0,0 @@ -package parsec - -type Pointer struct { - input string - pos int -} - -func (p Pointer) Advance(i int) Pointer { - return Pointer{p.input, p.pos + i} -} - -func (p Pointer) Get() string { - if p.pos > len(p.input) { - return "" - } - return p.input[p.pos:] -} diff --git a/pointer_test.go b/pointer_test.go deleted file mode 100644 index 6b432f0..0000000 --- a/pointer_test.go +++ /dev/null @@ -1,25 +0,0 @@ -package parsec - -import ( - "testing" - - "github.com/stretchr/testify/require" -) - -func TestPointer(t *testing.T) { - p := Pointer{"fooo", 0} - - t.Run("Advances", func(t *testing.T) { - p2 := p.Advance(2) - require.Equal(t, Pointer{"fooo", 2}, p2) - require.Equal(t, Pointer{"fooo", 0}, p) - require.Equal(t, Pointer{"fooo", 3}, p2.Advance(1)) - }) - - t.Run("Get", func(t *testing.T) { - require.Equal(t, "fooo", p.Get()) - require.Equal(t, "ooo", p.Advance(1).Get()) - require.Equal(t, "", p.Advance(4).Get()) - require.Equal(t, "", p.Advance(10).Get()) - }) -} diff --git a/state.go b/state.go new file mode 100644 index 0000000..f8df3f1 --- /dev/null +++ b/state.go @@ -0,0 +1,45 @@ +package parsec + +import "fmt" + +type Error struct { + pos int + Expected string +} + +func (e Error) Pos() int { return e.pos } +func (e Error) Error() string { return fmt.Sprintf("offset %d: Expected %s", e.pos, e.Expected) } + +type State struct { + Input string + Pos int + Error Error +} + +func (s *State) Advance(i int) { + s.Pos += i +} + +func (s *State) Get() string { + if s.Pos > len(s.Input) { + return "" + } + return s.Input[s.Pos:] +} + +func (s *State) ErrorHere(expected string) { + s.Error.pos = s.Pos + s.Error.Expected = expected +} + +func (s *State) ClearError() { + s.Error.Expected = "" +} + +func (s *State) Errored() bool { + return s.Error.Expected != "" +} + +func InputString(input string) *State { + return &State{Input: input} +} diff --git a/state_test.go b/state_test.go new file mode 100644 index 0000000..fe04a58 --- /dev/null +++ b/state_test.go @@ -0,0 +1,45 @@ +package parsec + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestState_Advance(t *testing.T) { + ps := InputString("fooo") + require.Equal(t, 0, ps.Pos) + ps.Advance(2) + require.Equal(t, 2, ps.Pos) + ps.Advance(1) + require.Equal(t, 3, ps.Pos) +} + +func TestState_Get(t *testing.T) { + ps := InputString("fooo") + require.Equal(t, "fooo", ps.Get()) + ps.Advance(1) + require.Equal(t, "ooo", ps.Get()) + ps.Advance(4) + require.Equal(t, "", ps.Get()) + ps.Advance(10) + require.Equal(t, "", ps.Get()) +} + +func TestState_Errors(t *testing.T) { + ps := InputString("fooo") + + ps.ErrorHere("hello") + require.Equal(t, "offset 0: Expected hello", ps.Error.Error()) + require.Equal(t, 0, ps.Error.Pos()) + require.True(t, ps.Errored()) + + ps.ClearError() + require.False(t, ps.Errored()) + + ps.Advance(2) + ps.ErrorHere("hello2") + require.Equal(t, "offset 2: Expected hello2", ps.Error.Error()) + require.Equal(t, 2, ps.Error.Pos()) + require.True(t, ps.Errored()) +}