diff --git a/combinator.go b/combinator.go index 2b6b8a3..1f1317c 100644 --- a/combinator.go +++ b/combinator.go @@ -1,5 +1,10 @@ package parsec +import ( + "bytes" + "fmt" +) + func Nil(p Pointer) (Node, Pointer) { return nil, p } @@ -29,7 +34,7 @@ func And(parsers ...Parserish) Parser { } nodes = append(nodes, node) } - return NewSequence(p.pos, nodes...), newP + return nodes, newP } } @@ -109,6 +114,53 @@ func manyImpl(min int, op Parserish, until Parserish, sep ...Parserish) Parser { break } } - return NewSequence(p.pos, nodes...), newP + return nodes, newP } } + +func Maybe(parser Parserish) Parser { + realParser := Parsify(parser) + + return func(p Pointer) (Node, Pointer) { + node, newP := realParser(p) + if IsError(node) { + return nil, p + } + return node, newP + } +} + +func Map(parser Parserish, f func(n Node) Node) Parser { + p := Parsify(parser) + + return func(ptr Pointer) (Node, Pointer) { + node, newPtr := p(ptr) + if IsError(node) { + return node, ptr + } + + return f(node), newPtr + } +} + +func flatten(n Node) string { + if s, ok := n.(string); ok { + return s + } + + if nodes, ok := n.([]Node); ok { + sbuf := &bytes.Buffer{} + for _, node := range nodes { + sbuf.WriteString(flatten(node)) + } + return sbuf.String() + } + + panic(fmt.Errorf("Dont know how to flatten %t", n)) +} + +func Merge(parser Parserish) Parser { + return Map(parser, func(n Node) Node { + return flatten(n) + }) +} diff --git a/combinator_test.go b/combinator_test.go index 2bec734..efee7ba 100644 --- a/combinator_test.go +++ b/combinator_test.go @@ -19,7 +19,7 @@ func TestAnd(t *testing.T) { t.Run("matches sequence", func(t *testing.T) { node, p2 := And("hello", WS, "world")(p) - require.Equal(t, NewSequence(0, NewToken(0, "hello"), NewToken(6, "world")), node) + require.Equal(t, []Node{"hello", "world"}, node) require.Equal(t, "", p2.Get()) }) @@ -34,12 +34,28 @@ func TestAnd(t *testing.T) { }) } +func TestMaybe(t *testing.T) { + p := Pointer{"hello world", 0} + + t.Run("matches sequence", func(t *testing.T) { + node, p2 := Maybe("hello")(p) + require.Equal(t, "hello", node) + require.Equal(t, " world", p2.Get()) + }) + + t.Run("returns no errors", func(t *testing.T) { + e, p3 := Maybe("world")(p) + require.Equal(t, nil, e) + require.Equal(t, 0, p3.pos) + }) +} + func TestAny(t *testing.T) { p := Pointer{"hello world!", 0} t.Run("Matches any", func(t *testing.T) { node, p2 := Any("hello", "world")(p) - require.Equal(t, NewToken(0, "hello"), node) + require.Equal(t, "hello", node) require.Equal(t, 5, p2.pos) }) @@ -69,40 +85,19 @@ func TestKleene(t *testing.T) { t.Run("Matches sequence with sep", func(t *testing.T) { node, p2 := Kleene(CharRun("abcdefg"), Exact(","))(p) - require.Equal(t, NewSequence(0, - NewToken(0, "a"), - NewToken(2, "b"), - NewToken(4, "c"), - NewToken(6, "d"), - NewToken(8, "e"), - ), node) + require.Equal(t, []Node{"a", "b", "c", "d", "e"}, node) require.Equal(t, 10, p2.pos) }) t.Run("Matches sequence without sep", func(t *testing.T) { node, p2 := Kleene(Any(CharRun("abcdefg"), Exact(",")))(p) - require.Equal(t, NewSequence(0, - NewToken(0, "a"), - NewToken(1, ","), - NewToken(2, "b"), - NewToken(3, ","), - NewToken(4, "c"), - NewToken(5, ","), - NewToken(6, "d"), - NewToken(7, ","), - NewToken(8, "e"), - NewToken(9, ","), - ), node) + require.Equal(t, []Node{"a", ",", "b", ",", "c", ",", "d", ",", "e", ","}, node) require.Equal(t, 10, p2.pos) }) t.Run("Stops on error", func(t *testing.T) { node, p2 := Kleene(CharRun("abc"), Exact(","))(p) - require.Equal(t, NewSequence(0, - NewToken(0, "a"), - NewToken(2, "b"), - NewToken(4, "c"), - ), node) + require.Equal(t, []Node{"a", "b", "c"}, node) require.Equal(t, 6, p2.pos) require.Equal(t, "d,e,", p2.Get()) }) @@ -113,40 +108,19 @@ func TestMany(t *testing.T) { t.Run("Matches sequence with sep", func(t *testing.T) { node, p2 := Many(CharRun("abcdefg"), Exact(","))(p) - require.Equal(t, NewSequence(0, - NewToken(0, "a"), - NewToken(2, "b"), - NewToken(4, "c"), - NewToken(6, "d"), - NewToken(8, "e"), - ), node) + require.Equal(t, []Node{"a", "b", "c", "d", "e"}, node) require.Equal(t, 10, p2.pos) }) t.Run("Matches sequence without sep", func(t *testing.T) { node, p2 := Many(Any(CharRun("abcdefg"), Exact(",")))(p) - require.Equal(t, NewSequence(0, - NewToken(0, "a"), - NewToken(1, ","), - NewToken(2, "b"), - NewToken(3, ","), - NewToken(4, "c"), - NewToken(5, ","), - NewToken(6, "d"), - NewToken(7, ","), - NewToken(8, "e"), - NewToken(9, ","), - ), node) + require.Equal(t, []Node{"a", ",", "b", ",", "c", ",", "d", ",", "e", ","}, node) require.Equal(t, 10, p2.pos) }) t.Run("Stops on error", func(t *testing.T) { node, p2 := Many(CharRun("abc"), Exact(","))(p) - require.Equal(t, NewSequence(0, - NewToken(0, "a"), - NewToken(2, "b"), - NewToken(4, "c"), - ), node) + require.Equal(t, []Node{"a", "b", "c"}, node) require.Equal(t, 6, p2.pos) require.Equal(t, "d,e,", p2.Get()) }) @@ -164,24 +138,13 @@ func TestKleeneUntil(t *testing.T) { t.Run("Matches sequence with sep", func(t *testing.T) { node, p2 := KleeneUntil(CharRun("abcde"), CharRun("d"), Exact(","))(p) - require.Equal(t, NewSequence(0, - NewToken(0, "a"), - NewToken(2, "b"), - NewToken(4, "c"), - ), node) + require.Equal(t, []Node{"a", "b", "c"}, node) require.Equal(t, 6, p2.pos) }) t.Run("Breaks if separator does not match", func(t *testing.T) { node, p2 := KleeneUntil(Char("abcdefg"), Char("y"), Exact(","))(p) - require.Equal(t, NewSequence(0, - NewToken(0, "a"), - NewToken(2, "b"), - NewToken(4, "c"), - NewToken(6, "d"), - NewToken(8, "e"), - NewToken(10, "f"), - ), node) + require.Equal(t, []Node{"a", "b", "c", "d", "e", "f"}, node) require.Equal(t, 11, p2.pos) }) } @@ -191,11 +154,7 @@ func TestManyUntil(t *testing.T) { t.Run("Matches sequence until", func(t *testing.T) { node, p2 := ManyUntil(CharRun("abcdefg"), Char("d"), Exact(","))(p) - require.Equal(t, NewSequence(0, - NewToken(0, "a"), - NewToken(2, "b"), - NewToken(4, "c"), - ), node) + require.Equal(t, []Node{"a", "b", "c"}, node) require.Equal(t, 6, p2.pos) }) @@ -207,6 +166,48 @@ func TestManyUntil(t *testing.T) { }) } +type htmlTag struct { + Name string +} + +func TestMap(t *testing.T) { + parser := Map(And("<", Range("a-zA-Z0-9"), ">"), func(n Node) Node { + return htmlTag{n.([]Node)[1].(string)} + }) + + t.Run("sucess", func(t *testing.T) { + result, _ := parser(Pointer{"", 0}) + require.Equal(t, htmlTag{"html"}, result) + }) + + t.Run("error", func(t *testing.T) { + result, ptr := parser(Pointer{""), result) + require.Equal(t, 0, ptr.pos) + }) +} + +func TestMerge(t *testing.T) { + var bracer Parser + bracer = And("(", Maybe(&bracer), ")") + parser := Merge(bracer) + + t.Run("sucess", func(t *testing.T) { + result, _ := parser(Pointer{"((()))", 0}) + require.Equal(t, "((()))", result) + }) + + t.Run("error", func(t *testing.T) { + result, ptr := parser(Pointer{"((())", 0}) + require.Equal(t, NewError(5, "Expected )"), result) + require.Equal(t, 0, ptr.pos) + }) + + require.Panics(t, func() { + flatten(1) + }) +} + func assertNilParser(t *testing.T, parser Parser) { p := Pointer{"fff", 0} node, p2 := parser(p) diff --git a/examples/html.go b/examples/html.go deleted file mode 100644 index d1a290a..0000000 --- a/examples/html.go +++ /dev/null @@ -1,34 +0,0 @@ -package main - -import ( - "fmt" - - . "github.com/vektah/goparsify" -) - -func html(p Pointer) (Node, Pointer) { - identifier := And(Range("a-z", 1, 1), Range("a-zA-Z0-9")) - text := CharRunUntil("<>") - - var tag Parser - - element := Any(text, &tag) - elements := Kleene(element) - //attr := And(identifier, equal, String()) - attr := And(identifier, "=", `"test"`) - attrws := And(attr, WS) - attrs := Kleene(attrws) - tstart := And("<", identifier, attrs, ">") - tend := And("") - tag = And(tstart, elements, tend) - - return element(p) -} - -func main() { - result, _, err := ParseString(html, "

hello world

") - if err != nil { - panic(err) - } - fmt.Printf("%#v\n", result) -} diff --git a/html/README.md b/html/README.md new file mode 100644 index 0000000..2e72f20 --- /dev/null +++ b/html/README.md @@ -0,0 +1,4 @@ +example html parser +=== + +This is a **very** rudimentary html parser that should be used as an example only. diff --git a/html/html.go b/html/html.go new file mode 100644 index 0000000..5ceb5e9 --- /dev/null +++ b/html/html.go @@ -0,0 +1,52 @@ +package html + +import . "github.com/vektah/goparsify" + +func Parse(input string) (result Node, remaining string, err error) { + return ParseString(tag, input) +} + +type Tag struct { + Name string + Attributes map[string]string + Body []Node +} + +var ( + tag Parser + + identifier = Merge(And(Range("a-z", 1, 1), Range("a-zA-Z0-9", 0))) + text = CharRunUntil("<>") + + element = Any(text, &tag) + elements = Kleene(element) + //attr := And(identifier, equal, String()) + attr = And(identifier, WS, "=", WS, `"test"`) + attrs = Map(Kleene(attr, WS), func(node Node) Node { + nodes := node.([]Node) + attr := map[string]string{} + + for _, attrNode := range nodes { + attrNodes := attrNode.([]Node) + attr[attrNodes[0].(string)] = attrNodes[2].(string) + } + + return attr + }) + + tstart = And("<", identifier, attrs, ">") + tend = And("") +) + +func init() { + tag = Map(And(tstart, elements, tend), func(node Node) Node { + nodes := node.([]Node) + openTag := nodes[0].([]Node) + return Tag{ + Name: openTag[1].(string), + Attributes: openTag[2].(map[string]string), + Body: nodes[1].([]Node), + } + + }) +} diff --git a/html/html_test.go b/html/html_test.go new file mode 100644 index 0000000..6dca6d4 --- /dev/null +++ b/html/html_test.go @@ -0,0 +1,17 @@ +package html + +import ( + "testing" + + "github.com/stretchr/testify/require" + . "github.com/vektah/goparsify" +) + +func TestParse(t *testing.T) { + result, _, err := Parse("hello world") + require.NoError(t, err) + require.Equal(t, Tag{Name: "body", Attributes: map[string]string{}, Body: []Node{ + "hello ", + Tag{Name: "b", Attributes: map[string]string{}, Body: []Node{"world"}}, + }}, result) +} diff --git a/nodes.go b/nodes.go index 410fa7e..29dd58f 100644 --- a/nodes.go +++ b/nodes.go @@ -3,18 +3,6 @@ package parsec import "fmt" type Node interface { - Pos() int -} - -type Token struct { - pos int - Value string -} - -func (e Token) Pos() int { return e.pos } - -func NewToken(pos int, value string) Token { - return Token{pos, value} } type Error struct { @@ -29,18 +17,7 @@ func NewError(pos int, message string) Error { return Error{pos, message} } -func IsError(n Node) bool { +func IsError(n interface{}) bool { _, isErr := n.(Error) return isErr } - -type Sequence struct { - pos int - Nodes []Node -} - -func (e Sequence) Pos() int { return e.pos } - -func NewSequence(pos int, n ...Node) Sequence { - return Sequence{pos, n} -} diff --git a/parser.go b/parser.go index 82b5d56..3c5d752 100644 --- a/parser.go +++ b/parser.go @@ -67,7 +67,7 @@ func Exact(match string) Parser { return NewError(p.pos, "Expected "+match), p } - return NewToken(p.pos, match), p.Advance(len(match)) + return match, p.Advance(len(match)) } } @@ -79,7 +79,7 @@ func Char(match string) Parser { return NewError(p.pos, "Expected one of "+string(match)), p } - return NewToken(p.pos, string(r)), p.Advance(w) + return string(r), p.Advance(w) } } @@ -98,7 +98,7 @@ func CharRun(match string) Parser { return NewError(p.pos, "Expected some of "+match), p } - return NewToken(p.pos, p.input[p.pos:p.pos+matched]), p.Advance(matched) + return p.input[p.pos : p.pos+matched], p.Advance(matched) } } @@ -117,7 +117,7 @@ func CharRunUntil(match string) Parser { return NewError(p.pos, "Expected some of "+match), p } - return NewToken(p.pos, p.input[p.pos:p.pos+matched]), p.Advance(matched) + return p.input[p.pos : p.pos+matched], p.Advance(matched) } } @@ -177,7 +177,7 @@ func Range(r string, repetition ...int) Parser { return NewError(p.pos+matched, fmt.Sprintf("Expected at least %d more of %s", min-matched, r)), p } - return NewToken(p.pos, p.input[p.pos:p.pos+matched]), p.Advance(matched) + return p.input[p.pos : p.pos+matched], p.Advance(matched) } } diff --git a/parser_test.go b/parser_test.go index 83a95f6..a560c8c 100644 --- a/parser_test.go +++ b/parser_test.go @@ -11,19 +11,19 @@ func TestParsify(t *testing.T) { t.Run("strings", func(t *testing.T) { node, _ := Parsify("ff")(p) - require.Equal(t, NewToken(0, "ff"), node) + require.Equal(t, "ff", node) }) t.Run("parsers", func(t *testing.T) { node, _ := Parsify(CharRun("f"))(p) - require.Equal(t, NewToken(0, "ff"), node) + require.Equal(t, "ff", node) }) t.Run("parser funcs", func(t *testing.T) { node, _ := Parsify(func(p Pointer) (Node, Pointer) { - return NewToken(0, "hello"), p + return "hello", p })(p) - require.Equal(t, NewToken(0, "hello"), node) + require.Equal(t, "hello", node) }) t.Run("*parsers", func(t *testing.T) { @@ -32,7 +32,7 @@ func TestParsify(t *testing.T) { parser = CharRun("f") node, _ := parserfied(p) - require.Equal(t, NewToken(0, "ff"), node) + require.Equal(t, "ff", node) }) require.Panics(t, func() { @@ -44,7 +44,7 @@ func TestParsifyAll(t *testing.T) { parsers := ParsifyAll("ff", "gg") result, _ := parsers[0](Pointer{"ffooo", 0}) - require.Equal(t, NewToken(0, "ff"), result) + require.Equal(t, "ff", result) result, _ = parsers[1](Pointer{"ffooo", 0}) require.Equal(t, NewError(0, "Expected gg"), result) @@ -55,7 +55,7 @@ func TestExact(t *testing.T) { t.Run("success", func(t *testing.T) { node, p2 := Exact("fo")(p) - require.Equal(t, NewToken(0, "fo"), node) + require.Equal(t, "fo", node) require.Equal(t, p.Advance(2), p2) }) @@ -71,7 +71,7 @@ func TestChar(t *testing.T) { t.Run("success", func(t *testing.T) { node, p2 := Char("fo")(p) - require.Equal(t, NewToken(0, "f"), node) + require.Equal(t, "f", node) require.Equal(t, p.Advance(1), p2) }) @@ -87,7 +87,7 @@ func TestCharRun(t *testing.T) { t.Run("success", func(t *testing.T) { node, p2 := CharRun("fo")(p) - require.Equal(t, NewToken(0, "foo"), node) + require.Equal(t, "foo", node) require.Equal(t, p.Advance(3), p2) }) @@ -103,7 +103,7 @@ func TestCharUntil(t *testing.T) { t.Run("success", func(t *testing.T) { node, p2 := CharRunUntil("z")(p) - require.Equal(t, NewToken(0, "foobar"), node) + require.Equal(t, "foobar", node) require.Equal(t, p.Advance(6), p2) }) @@ -125,19 +125,19 @@ func TestWS(t *testing.T) { func TestRange(t *testing.T) { t.Run("full match", func(t *testing.T) { node, p := Range("a-z")(Pointer{"foobar", 0}) - require.Equal(t, NewToken(0, "foobar"), node) + require.Equal(t, "foobar", node) require.Equal(t, "", p.Get()) }) t.Run("partial match", func(t *testing.T) { node, p := Range("1-4d-a")(Pointer{"a1b2c3d4efg", 0}) - require.Equal(t, NewToken(0, "a1b2c3d4"), node) + require.Equal(t, "a1b2c3d4", node) require.Equal(t, "efg", p.Get()) }) t.Run("limited match", func(t *testing.T) { node, p := Range("1-4d-a", 1, 2)(Pointer{"a1b2c3d4efg", 0}) - require.Equal(t, NewToken(0, "a1"), node) + require.Equal(t, "a1", node) require.Equal(t, "b2c3d4efg", p.Get()) }) @@ -165,7 +165,7 @@ func TestRange(t *testing.T) { func TestParseString(t *testing.T) { t.Run("partial match", func(t *testing.T) { result, remaining, err := ParseString("hello", "hello world") - require.Equal(t, NewToken(0, "hello"), result) + require.Equal(t, "hello", result) require.Equal(t, " world", remaining) require.NoError(t, err) })