diff --git a/combinator.go b/combinator.go index 6197d6b..3cb8c6d 100644 --- a/combinator.go +++ b/combinator.go @@ -2,14 +2,13 @@ package parsec import ( "bytes" - "fmt" ) -func Nil(ps *State) interface{} { +func Nil(ps *State) *Node { return nil } -func Never(ps *State) interface{} { +func Never(ps *State) *Node { ps.ErrorHere("not anything") return nil } @@ -21,8 +20,8 @@ func And(parsers ...Parserish) Parser { parserfied := ParsifyAll(parsers...) - return func(ps *State) interface{} { - var nodes = make([]interface{}, 0, len(parserfied)) + return func(ps *State) *Node { + var nodes = make([]*Node, 0, len(parserfied)) startpos := ps.Pos for _, parser := range parserfied { node := parser(ps) @@ -34,7 +33,7 @@ func And(parsers ...Parserish) Parser { nodes = append(nodes, node) } } - return nodes + return &Node{Children: nodes} } } @@ -45,7 +44,7 @@ func Any(parsers ...Parserish) Parser { parserfied := ParsifyAll(parsers...) - return func(ps *State) interface{} { + return func(ps *State) *Node { longestError := Error{} startpos := ps.Pos for _, parser := range parserfied { @@ -90,9 +89,9 @@ func manyImpl(min int, op Parserish, until Parserish, sep ...Parserish) Parser { sepParser = Parsify(sep[0]) } - return func(ps *State) interface{} { - var node interface{} - nodes := make([]interface{}, 0, 20) + return func(ps *State) *Node { + var node *Node + nodes := make([]*Node, 0, 20) startpos := ps.Pos for { tempPos := ps.Pos @@ -126,14 +125,14 @@ func manyImpl(min int, op Parserish, until Parserish, sep ...Parserish) Parser { break } } - return nodes + return &Node{Children: nodes} } } func Maybe(parser Parserish) Parser { parserfied := Parsify(parser) - return func(ps *State) interface{} { + return func(ps *State) *Node { node := parserfied(ps) if ps.Errored() { ps.ClearError() @@ -144,10 +143,10 @@ func Maybe(parser Parserish) Parser { } } -func Map(parser Parserish, f func(n interface{}) interface{}) Parser { +func Map(parser Parserish, f func(n *Node) *Node) Parser { p := Parsify(parser) - return func(ps *State) interface{} { + return func(ps *State) *Node { node := p(ps) if ps.Errored() { return nil @@ -156,22 +155,24 @@ func Map(parser Parserish, f func(n interface{}) interface{}) Parser { } } -func flatten(n interface{}) interface{} { - if s, ok := n.(string); ok { - return s +func flatten(n *Node) string { + if n.Token != "" { + return n.Token } - if nodes, ok := n.([]interface{}); ok { + if len(n.Children) > 0 { sbuf := &bytes.Buffer{} - for _, node := range nodes { - sbuf.WriteString(flatten(node).(string)) + for _, node := range n.Children { + sbuf.WriteString(flatten(node)) } return sbuf.String() } - panic(fmt.Errorf("Dont know how to flatten %t", n)) + return "" } func Merge(parser Parserish) Parser { - return Map(parser, flatten) + return Map(parser, func(n *Node) *Node { + return &Node{Token: flatten(n)} + }) } diff --git a/combinator_test.go b/combinator_test.go index 31fad1b..0cd5ea5 100644 --- a/combinator_test.go +++ b/combinator_test.go @@ -9,7 +9,7 @@ import ( func TestNil(t *testing.T) { node, p2 := runParser("hello world", Nil) - require.Equal(t, nil, node) + require.Nil(t, node) require.Equal(t, 0, p2.Pos) require.False(t, p2.Errored()) } @@ -17,7 +17,7 @@ func TestNil(t *testing.T) { func TestNever(t *testing.T) { node, p2 := runParser("hello world", Never) - require.Equal(t, nil, node) + require.Nil(t, node) require.Equal(t, 0, p2.Pos) require.True(t, p2.Errored()) } @@ -27,7 +27,7 @@ func TestAnd(t *testing.T) { t.Run("matches sequence", func(t *testing.T) { node, p2 := runParser("hello world", parser) - require.Equal(t, []interface{}{"hello", "world"}, node) + assertSequence(t, node, "hello", "world") require.Equal(t, "", p2.Get()) }) @@ -46,13 +46,13 @@ func TestAnd(t *testing.T) { func TestMaybe(t *testing.T) { t.Run("matches sequence", func(t *testing.T) { node, p2 := runParser("hello world", Maybe("hello")) - require.Equal(t, "hello", node) + require.Equal(t, "hello", node.Token) require.Equal(t, " world", p2.Get()) }) t.Run("returns no errors", func(t *testing.T) { node, p3 := runParser("hello world", Maybe("world")) - require.Equal(t, nil, node) + require.Nil(t, node) require.False(t, p3.Errored()) require.Equal(t, 0, p3.Pos) }) @@ -61,7 +61,7 @@ func TestMaybe(t *testing.T) { func TestAny(t *testing.T) { t.Run("Matches any", func(t *testing.T) { node, p2 := runParser("hello world!", Any("hello", "world")) - require.Equal(t, "hello", node) + require.Equal(t, "hello", node.Token) require.Equal(t, 5, p2.Pos) }) @@ -78,7 +78,7 @@ func TestAny(t *testing.T) { t.Run("Accepts nil matches", func(t *testing.T) { node, p2 := runParser("hello world!", Any(Exact("ffffff"), WS)) - require.Equal(t, nil, node) + require.Nil(t, node) require.Equal(t, 0, p2.Pos) }) @@ -91,19 +91,19 @@ func TestKleene(t *testing.T) { t.Run("Matches sequence with sep", func(t *testing.T) { node, p2 := runParser("a,b,c,d,e,", Kleene(Chars("a-g"), ",")) require.False(t, p2.Errored()) - require.Equal(t, []interface{}{"a", "b", "c", "d", "e"}, node) + assertSequence(t, node, "a", "b", "c", "d", "e") require.Equal(t, 10, p2.Pos) }) t.Run("Matches sequence without sep", func(t *testing.T) { node, p2 := runParser("a,b,c,d,e,", Kleene(Any(Chars("a-g"), ","))) - require.Equal(t, []interface{}{"a", ",", "b", ",", "c", ",", "d", ",", "e", ","}, node) + assertSequence(t, node, "a", ",", "b", ",", "c", ",", "d", ",", "e", ",") require.Equal(t, 10, p2.Pos) }) t.Run("Stops on error", func(t *testing.T) { node, p2 := runParser("a,b,c,d,e,", Kleene(Chars("a-c"), ",")) - require.Equal(t, []interface{}{"a", "b", "c"}, node) + assertSequence(t, node, "a", "b", "c") require.Equal(t, 6, p2.Pos) require.Equal(t, "d,e,", p2.Get()) }) @@ -112,19 +112,19 @@ func TestKleene(t *testing.T) { func TestMany(t *testing.T) { t.Run("Matches sequence with sep", func(t *testing.T) { node, p2 := runParser("a,b,c,d,e,", Many(Chars("a-g"), Exact(","))) - require.Equal(t, []interface{}{"a", "b", "c", "d", "e"}, node) + assertSequence(t, node, "a", "b", "c", "d", "e") require.Equal(t, 10, p2.Pos) }) t.Run("Matches sequence without sep", func(t *testing.T) { node, p2 := runParser("a,b,c,d,e,", Many(Any(Chars("abcdefg"), Exact(",")))) - require.Equal(t, []interface{}{"a", ",", "b", ",", "c", ",", "d", ",", "e", ","}, node) + assertSequence(t, node, "a", ",", "b", ",", "c", ",", "d", ",", "e", ",") require.Equal(t, 10, p2.Pos) }) t.Run("Stops on error", func(t *testing.T) { node, p2 := runParser("a,b,c,d,e,", Many(Chars("abc"), Exact(","))) - require.Equal(t, []interface{}{"a", "b", "c"}, node) + assertSequence(t, node, "a", "b", "c") require.Equal(t, 6, p2.Pos) require.Equal(t, "d,e,", p2.Get()) }) @@ -139,13 +139,13 @@ func TestMany(t *testing.T) { func TestKleeneUntil(t *testing.T) { t.Run("Matches sequence with sep", func(t *testing.T) { node, p2 := runParser("a,b,c,d,e,fg", KleeneUntil(Chars("abcde"), "d", ",")) - require.Equal(t, []interface{}{"a", "b", "c"}, node) + assertSequence(t, node, "a", "b", "c") require.Equal(t, "d,e,fg", p2.Get()) }) t.Run("Breaks if separator does not match", func(t *testing.T) { node, p2 := runParser("a,b,c,d,e,fg", KleeneUntil(Chars("abcdefg", 1, 1), "y", ",")) - require.Equal(t, []interface{}{"a", "b", "c", "d", "e", "f"}, node) + assertSequence(t, node, "a", "b", "c", "d", "e", "f") require.Equal(t, "g", p2.Get()) }) } @@ -153,7 +153,7 @@ func TestKleeneUntil(t *testing.T) { func TestManyUntil(t *testing.T) { t.Run("Matches sequence until", func(t *testing.T) { node, p2 := runParser("a,b,c,d,e,", ManyUntil(Chars("abcdefg"), "d", ",")) - require.Equal(t, []interface{}{"a", "b", "c"}, node) + assertSequence(t, node, "a", "b", "c") require.Equal(t, 6, p2.Pos) }) @@ -170,13 +170,13 @@ type htmlTag struct { } func TestMap(t *testing.T) { - parser := Map(And("<", Chars("a-zA-Z0-9"), ">"), func(n interface{}) interface{} { - return htmlTag{n.([]interface{})[1].(string)} + parser := Map(And("<", Chars("a-zA-Z0-9"), ">"), func(n *Node) *Node { + return &Node{Result: htmlTag{n.Children[1].Token}} }) t.Run("sucess", func(t *testing.T) { result, _ := runParser("", parser) - require.Equal(t, htmlTag{"html"}, result) + require.Equal(t, htmlTag{"html"}, result.Result) }) t.Run("error", func(t *testing.T) { @@ -193,7 +193,7 @@ func TestMerge(t *testing.T) { t.Run("sucess", func(t *testing.T) { result, _ := runParser("((()))", parser) - require.Equal(t, "((()))", result) + require.Equal(t, "((()))", result.Token) }) t.Run("error", func(t *testing.T) { @@ -201,14 +201,20 @@ func TestMerge(t *testing.T) { require.Equal(t, "offset 5: Expected )", ps.Error.Error()) require.Equal(t, 0, ps.Pos) }) - - require.Panics(t, func() { - flatten(1) - }) } func assertNilParser(t *testing.T, parser Parser) { node, p2 := runParser("fff", parser) - require.Equal(t, nil, node) + require.Nil(t, node) require.Equal(t, 0, p2.Pos) } + +func assertSequence(t *testing.T, node *Node, expected ...string) { + actual := []string{} + + for _, child := range node.Children { + actual = append(actual, child.Token) + } + + require.Equal(t, expected, actual) +} diff --git a/html/html.go b/html/html.go index 847af96..8b19c23 100644 --- a/html/html.go +++ b/html/html.go @@ -16,22 +16,28 @@ var ( tag Parser identifier = Merge(And(Chars("a-z", 1, 1), Chars("a-zA-Z0-9", 0))) - text = NotChars("<>") + text = Map(NotChars("<>"), func(n *Node) *Node { + return &Node{Result: n.Token} + }) element = Any(text, &tag) - elements = Kleene(element) - //attr := And(identifier, equal, String()) + elements = Map(Kleene(element), func(n *Node) *Node { + ret := []interface{}{} + for _, child := range n.Children { + ret = append(ret, child.Result) + } + return &Node{Result: ret} + }) + attr = And(WS, identifier, WS, "=", WS, Any(String('"'), String('\''))) - attrs = Map(Kleene(attr, WS), func(node interface{}) interface{} { - nodes := node.([]interface{}) + attrs = Map(Kleene(attr, WS), func(node *Node) *Node { attr := map[string]string{} - for _, attrNode := range nodes { - attrNodes := attrNode.([]interface{}) - attr[attrNodes[0].(string)] = attrNodes[2].(string) + for _, attrNode := range node.Children { + attr[attrNode.Children[0].Token] = attrNode.Children[2].Token } - return attr + return &Node{Result: attr} }) tstart = And("<", identifier, attrs, ">") @@ -39,14 +45,13 @@ var ( ) func init() { - tag = Map(And(tstart, elements, tend), func(node interface{}) interface{} { - nodes := node.([]interface{}) - openTag := nodes[0].([]interface{}) - return Tag{ - Name: openTag[1].(string), - Attributes: openTag[2].(map[string]string), - Body: nodes[1].([]interface{}), - } + tag = Map(And(tstart, elements, tend), func(node *Node) *Node { + openTag := node.Children[0] + return &Node{Result: Tag{ + Name: openTag.Children[1].Token, + Attributes: openTag.Children[2].Result.(map[string]string), + Body: node.Children[1].Result.([]interface{}), + }} }) } diff --git a/json/json.go b/json/json.go index 60c5a3f..fbba21c 100644 --- a/json/json.go +++ b/json/json.go @@ -9,48 +9,47 @@ import ( var ( value Parser - array = Map(And(WS, "[", Kleene(&value, And(WS, ",")), "]"), func(n interface{}) interface{} { - return n.([]interface{})[1].([]interface{}) + _array = Map(And(WS, "[", Kleene(&value, And(WS, ",")), "]"), func(n *Node) *Node { + ret := []interface{}{} + for _, child := range n.Children[1].Children { + ret = append(ret, child.Result) + } + return &Node{Result: ret} }) properties = Kleene(And(WS, String('"'), WS, ":", WS, &value), ",") - object = Map(And(WS, "{", WS, properties, WS, "}"), func(n interface{}) interface{} { + _object = Map(And(WS, "{", WS, properties, WS, "}"), func(n *Node) *Node { ret := map[string]interface{}{} - for _, prop := range n.([]interface{})[1].([]interface{}) { - vals := prop.([]interface{}) - if len(vals) == 3 { - ret[vals[0].(string)] = vals[2] - } else { - ret[vals[0].(string)] = nil - } + for _, prop := range n.Children[1].Children { + ret[prop.Children[0].Token] = prop.Children[2].Result } - return ret + return &Node{Result: ret} }) - _null = Map(And(WS, "null"), func(n interface{}) interface{} { - return nil + _null = Map(And(WS, "null"), func(n *Node) *Node { + return &Node{Result: nil} }) - _true = Map(And(WS, "true"), func(n interface{}) interface{} { - return true + _true = Map(And(WS, "true"), func(n *Node) *Node { + return &Node{Result: true} }) - _false = Map(And(WS, "false"), func(n interface{}) interface{} { - return false + _false = Map(And(WS, "false"), func(n *Node) *Node { + return &Node{Result: false} }) - Y = Map(And(&value, WS), func(n interface{}) interface{} { - nodes := n.([]interface{}) - if len(nodes) > 0 { - return nodes[0] - } - return nil + _string = Map(String('"'), func(n *Node) *Node { + return &Node{Result: n.Token} + }) + + Y = Map(And(&value, WS), func(n *Node) *Node { + return &Node{Result: n.Children[0].Result} }) ) func init() { - value = Any(_null, _true, _false, String('"'), array, object) + value = Any(_null, _true, _false, _string, _array, _object) } func Unmarshal(input string) (interface{}, error) { diff --git a/parser.go b/parser.go index 98f59ce..30a3a01 100644 --- a/parser.go +++ b/parser.go @@ -7,7 +7,13 @@ import ( "unicode/utf8" ) -type Parser func(*State) interface{} +type Node struct { + Token string + Children []*Node + Result interface{} +} + +type Parser func(*State) *Node // Parserish types are any type that can be turned into a Parser by Parsify // These currently include *Parser and string literals. @@ -27,13 +33,13 @@ type Parserish interface{} func Parsify(p Parserish) Parser { switch p := p.(type) { - case func(*State) interface{}: + case func(*State) *Node: return Parser(p) case Parser: return p case *Parser: // Todo: Maybe capture this stack and on nil show it? Is there a good error library to do this? - return func(ptr *State) interface{} { + return func(ptr *State) *Node { return (*p)(ptr) } case string: @@ -54,17 +60,17 @@ func ParsifyAll(parsers ...Parserish) []Parser { func ParseString(parser Parserish, input string) (result interface{}, remaining string, err error) { p := Parsify(parser) ps := &State{input, 0, Error{}} - result = p(ps) + ret := p(ps) if ps.Error.Expected != "" { return nil, ps.Get(), ps.Error } - return result, ps.Get(), nil + return ret.Result, ps.Get(), nil } func Exact(match string) Parser { - return func(ps *State) interface{} { + return func(ps *State) *Node { if !strings.HasPrefix(ps.Get(), match) { ps.ErrorHere(match) return nil @@ -72,7 +78,7 @@ func Exact(match string) Parser { ps.Advance(len(match)) - return match + return &Node{Token: match} } } @@ -131,7 +137,7 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser { min, max := parseRepetition(1, -1, repetition...) matches, ranges := parseMatcher(matcher) - return func(ps *State) interface{} { + return func(ps *State) *Node { matched := 0 for ps.Pos+matched < len(ps.Input) { if max != -1 && matched >= max { @@ -163,19 +169,19 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser { result := ps.Input[ps.Pos : ps.Pos+matched] ps.Advance(matched) - return result + return &Node{Token: result} } } var ws = Chars("\t\n\v\f\r \x85\xA0", 0) -func WS(ps *State) interface{} { +func WS(ps *State) *Node { ws(ps) return nil } func String(quote rune) Parser { - return func(ps *State) interface{} { + return func(ps *State) *Node { var r rune var w int var matched int @@ -200,7 +206,7 @@ func String(quote rune) Parser { if r == quote { ps.Advance(matched) - return result.String() + return &Node{Token: result.String()} } result.WriteRune(r) } diff --git a/parser_test.go b/parser_test.go index f6122ab..acb569e 100644 --- a/parser_test.go +++ b/parser_test.go @@ -9,19 +9,19 @@ import ( func TestParsify(t *testing.T) { t.Run("strings", func(t *testing.T) { - require.Equal(t, "ff", Parsify("ff")(InputString("ffooo"))) + require.Equal(t, "ff", Parsify("ff")(InputString("ffooo")).Token) }) t.Run("parsers", func(t *testing.T) { - require.Equal(t, "ff", Parsify(Chars("f"))(InputString("ffooo"))) + require.Equal(t, "ff", Parsify(Chars("f"))(InputString("ffooo")).Token) }) t.Run("parser funcs", func(t *testing.T) { - node := Parsify(func(p *State) interface{} { - return "hello" + node := Parsify(func(p *State) *Node { + return &Node{Token: "hello"} })(InputString("ffooo")) - require.Equal(t, "hello", node) + require.Equal(t, "hello", node.Token) }) t.Run("*parsers", func(t *testing.T) { @@ -30,7 +30,7 @@ func TestParsify(t *testing.T) { parser = Chars("f") node := parserfied(InputString("ffooo")) - require.Equal(t, "ff", node) + require.Equal(t, "ff", node.Token) }) require.Panics(t, func() { @@ -42,16 +42,16 @@ func TestParsifyAll(t *testing.T) { parsers := ParsifyAll("ff", "gg") result := parsers[0](InputString("ffooo")) - require.Equal(t, "ff", result) + require.Equal(t, "ff", result.Token) result = parsers[1](InputString("ffooo")) - require.Equal(t, nil, result) + require.Nil(t, result) } func TestExact(t *testing.T) { t.Run("success", func(t *testing.T) { node, ps := runParser("foobar", Exact("fo")) - require.Equal(t, "fo", node) + require.Equal(t, "fo", node.Token) require.Equal(t, "obar", ps.Get()) }) @@ -65,21 +65,21 @@ func TestExact(t *testing.T) { func TestChars(t *testing.T) { t.Run("full match", func(t *testing.T) { node, ps := runParser("foobar", Chars("a-z")) - require.Equal(t, "foobar", node) + require.Equal(t, "foobar", node.Token) require.Equal(t, "", ps.Get()) require.False(t, ps.Errored()) }) t.Run("partial match", func(t *testing.T) { node, ps := runParser("a1b2c3d4efg", Chars("1-4d-a")) - require.Equal(t, "a1b2c3d4", node) + require.Equal(t, "a1b2c3d4", node.Token) require.Equal(t, "efg", ps.Get()) require.False(t, ps.Errored()) }) t.Run("limited match", func(t *testing.T) { node, ps := runParser("a1b2c3d4efg", Chars("1-4d-a", 1, 2)) - require.Equal(t, "a1", node) + require.Equal(t, "a1", node.Token) require.Equal(t, "b2c3d4efg", ps.Get()) require.False(t, ps.Errored()) }) @@ -98,14 +98,14 @@ func TestChars(t *testing.T) { t.Run("test exact matches", func(t *testing.T) { node, ps := runParser("aaff", Chars("abcd")) - require.Equal(t, "aa", node) + require.Equal(t, "aa", node.Token) require.Equal(t, 2, ps.Pos) require.False(t, ps.Errored()) }) t.Run("test not matches", func(t *testing.T) { node, ps := runParser("aaff", NotChars("ff")) - require.Equal(t, "aa", node) + require.Equal(t, "aa", node.Token) require.Equal(t, 2, ps.Pos) require.False(t, ps.Errored()) }) @@ -116,26 +116,27 @@ func TestChars(t *testing.T) { } func TestParseString(t *testing.T) { + Y := Map("hello", func(n *Node) *Node { return &Node{Result: n.Token} }) t.Run("partial match", func(t *testing.T) { - result, remaining, err := ParseString("hello", "hello world") + result, remaining, err := ParseString(Y, "hello world") require.Equal(t, "hello", result) require.Equal(t, " world", remaining) require.NoError(t, err) }) t.Run("error", func(t *testing.T) { - result, remaining, err := ParseString("world", "hello world") - require.Equal(t, nil, result) - require.Equal(t, "hello world", remaining) + result, remaining, err := ParseString(Y, "world") + require.Nil(t, result) + require.Equal(t, "world", remaining) require.Error(t, err) - require.Equal(t, "offset 0: Expected world", err.Error()) + require.Equal(t, "offset 0: Expected hello", err.Error()) }) } func TestString(t *testing.T) { t.Run("test basic match", func(t *testing.T) { result, p := runParser(`"hello"`, String('"')) - require.Equal(t, `hello`, result) + require.Equal(t, `hello`, result.Token) require.Equal(t, "", p.Get()) }) @@ -153,7 +154,7 @@ func TestString(t *testing.T) { t.Run("test escaping", func(t *testing.T) { result, p := runParser(`"hello \"world\""`, String('"')) - require.Equal(t, `hello "world"`, result) + require.Equal(t, `hello "world"`, result.Token) require.Equal(t, ``, p.Get()) }) } @@ -161,20 +162,20 @@ func TestString(t *testing.T) { func TestWS(t *testing.T) { t.Run("consumes all whitespace", func(t *testing.T) { result, p := runParser(" asdf", WS) - require.Equal(t, nil, result) + require.Nil(t, result) require.Equal(t, "asdf", p.Get()) require.False(t, p.Errored()) }) t.Run("never errors", func(t *testing.T) { result, p := runParser("asdf", WS) - require.Equal(t, nil, result) + require.Nil(t, result) require.Equal(t, "asdf", p.Get()) require.False(t, p.Errored()) }) } -func runParser(input string, parser Parser) (interface{}, *State) { +func runParser(input string, parser Parser) (*Node, *State) { ps := InputString(input) result := parser(ps) return result, ps