summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdam Scarr <adam@vektah.net>2017-08-06 17:02:39 +1000
committerAdam Scarr <adam@vektah.net>2017-08-06 17:02:39 +1000
commita65a9325aaebd1499a8e523463cc023124f8536a (patch)
tree2fb31800b67a3914a7204a28319a7aeb0ac649c7
parent8b343d6360d0edc065b9b62ab5e708e907b45a92 (diff)
Get the HTML parser working
-rw-r--r--combinator.go56
-rw-r--r--combinator_test.go137
-rw-r--r--examples/html.go34
-rw-r--r--html/README.md4
-rw-r--r--html/html.go52
-rw-r--r--html/html_test.go17
-rw-r--r--nodes.go25
-rw-r--r--parser.go10
-rw-r--r--parser_test.go28
9 files changed, 216 insertions, 147 deletions
diff --git a/combinator.go b/combinator.go
index 2b6b8a3..1f1317c 100644
--- a/combinator.go
+++ b/combinator.go
@@ -1,5 +1,10 @@
package parsec
+import (
+ "bytes"
+ "fmt"
+)
+
func Nil(p Pointer) (Node, Pointer) {
return nil, p
}
@@ -29,7 +34,7 @@ func And(parsers ...Parserish) Parser {
}
nodes = append(nodes, node)
}
- return NewSequence(p.pos, nodes...), newP
+ return nodes, newP
}
}
@@ -109,6 +114,53 @@ func manyImpl(min int, op Parserish, until Parserish, sep ...Parserish) Parser {
break
}
}
- return NewSequence(p.pos, nodes...), newP
+ return nodes, newP
+ }
+}
+
+func Maybe(parser Parserish) Parser {
+ realParser := Parsify(parser)
+
+ return func(p Pointer) (Node, Pointer) {
+ node, newP := realParser(p)
+ if IsError(node) {
+ return nil, p
+ }
+ return node, newP
+ }
+}
+
+func Map(parser Parserish, f func(n Node) Node) Parser {
+ p := Parsify(parser)
+
+ return func(ptr Pointer) (Node, Pointer) {
+ node, newPtr := p(ptr)
+ if IsError(node) {
+ return node, ptr
+ }
+
+ return f(node), newPtr
+ }
+}
+
+func flatten(n Node) string {
+ if s, ok := n.(string); ok {
+ return s
}
+
+ if nodes, ok := n.([]Node); ok {
+ sbuf := &bytes.Buffer{}
+ for _, node := range nodes {
+ sbuf.WriteString(flatten(node))
+ }
+ return sbuf.String()
+ }
+
+ panic(fmt.Errorf("Dont know how to flatten %t", n))
+}
+
+func Merge(parser Parserish) Parser {
+ return Map(parser, func(n Node) Node {
+ return flatten(n)
+ })
}
diff --git a/combinator_test.go b/combinator_test.go
index 2bec734..efee7ba 100644
--- a/combinator_test.go
+++ b/combinator_test.go
@@ -19,7 +19,7 @@ func TestAnd(t *testing.T) {
t.Run("matches sequence", func(t *testing.T) {
node, p2 := And("hello", WS, "world")(p)
- require.Equal(t, NewSequence(0, NewToken(0, "hello"), NewToken(6, "world")), node)
+ require.Equal(t, []Node{"hello", "world"}, node)
require.Equal(t, "", p2.Get())
})
@@ -34,12 +34,28 @@ func TestAnd(t *testing.T) {
})
}
+func TestMaybe(t *testing.T) {
+ p := Pointer{"hello world", 0}
+
+ t.Run("matches sequence", func(t *testing.T) {
+ node, p2 := Maybe("hello")(p)
+ require.Equal(t, "hello", node)
+ require.Equal(t, " world", p2.Get())
+ })
+
+ t.Run("returns no errors", func(t *testing.T) {
+ e, p3 := Maybe("world")(p)
+ require.Equal(t, nil, e)
+ require.Equal(t, 0, p3.pos)
+ })
+}
+
func TestAny(t *testing.T) {
p := Pointer{"hello world!", 0}
t.Run("Matches any", func(t *testing.T) {
node, p2 := Any("hello", "world")(p)
- require.Equal(t, NewToken(0, "hello"), node)
+ require.Equal(t, "hello", node)
require.Equal(t, 5, p2.pos)
})
@@ -69,40 +85,19 @@ func TestKleene(t *testing.T) {
t.Run("Matches sequence with sep", func(t *testing.T) {
node, p2 := Kleene(CharRun("abcdefg"), Exact(","))(p)
- require.Equal(t, NewSequence(0,
- NewToken(0, "a"),
- NewToken(2, "b"),
- NewToken(4, "c"),
- NewToken(6, "d"),
- NewToken(8, "e"),
- ), node)
+ require.Equal(t, []Node{"a", "b", "c", "d", "e"}, node)
require.Equal(t, 10, p2.pos)
})
t.Run("Matches sequence without sep", func(t *testing.T) {
node, p2 := Kleene(Any(CharRun("abcdefg"), Exact(",")))(p)
- require.Equal(t, NewSequence(0,
- NewToken(0, "a"),
- NewToken(1, ","),
- NewToken(2, "b"),
- NewToken(3, ","),
- NewToken(4, "c"),
- NewToken(5, ","),
- NewToken(6, "d"),
- NewToken(7, ","),
- NewToken(8, "e"),
- NewToken(9, ","),
- ), node)
+ require.Equal(t, []Node{"a", ",", "b", ",", "c", ",", "d", ",", "e", ","}, node)
require.Equal(t, 10, p2.pos)
})
t.Run("Stops on error", func(t *testing.T) {
node, p2 := Kleene(CharRun("abc"), Exact(","))(p)
- require.Equal(t, NewSequence(0,
- NewToken(0, "a"),
- NewToken(2, "b"),
- NewToken(4, "c"),
- ), node)
+ require.Equal(t, []Node{"a", "b", "c"}, node)
require.Equal(t, 6, p2.pos)
require.Equal(t, "d,e,", p2.Get())
})
@@ -113,40 +108,19 @@ func TestMany(t *testing.T) {
t.Run("Matches sequence with sep", func(t *testing.T) {
node, p2 := Many(CharRun("abcdefg"), Exact(","))(p)
- require.Equal(t, NewSequence(0,
- NewToken(0, "a"),
- NewToken(2, "b"),
- NewToken(4, "c"),
- NewToken(6, "d"),
- NewToken(8, "e"),
- ), node)
+ require.Equal(t, []Node{"a", "b", "c", "d", "e"}, node)
require.Equal(t, 10, p2.pos)
})
t.Run("Matches sequence without sep", func(t *testing.T) {
node, p2 := Many(Any(CharRun("abcdefg"), Exact(",")))(p)
- require.Equal(t, NewSequence(0,
- NewToken(0, "a"),
- NewToken(1, ","),
- NewToken(2, "b"),
- NewToken(3, ","),
- NewToken(4, "c"),
- NewToken(5, ","),
- NewToken(6, "d"),
- NewToken(7, ","),
- NewToken(8, "e"),
- NewToken(9, ","),
- ), node)
+ require.Equal(t, []Node{"a", ",", "b", ",", "c", ",", "d", ",", "e", ","}, node)
require.Equal(t, 10, p2.pos)
})
t.Run("Stops on error", func(t *testing.T) {
node, p2 := Many(CharRun("abc"), Exact(","))(p)
- require.Equal(t, NewSequence(0,
- NewToken(0, "a"),
- NewToken(2, "b"),
- NewToken(4, "c"),
- ), node)
+ require.Equal(t, []Node{"a", "b", "c"}, node)
require.Equal(t, 6, p2.pos)
require.Equal(t, "d,e,", p2.Get())
})
@@ -164,24 +138,13 @@ func TestKleeneUntil(t *testing.T) {
t.Run("Matches sequence with sep", func(t *testing.T) {
node, p2 := KleeneUntil(CharRun("abcde"), CharRun("d"), Exact(","))(p)
- require.Equal(t, NewSequence(0,
- NewToken(0, "a"),
- NewToken(2, "b"),
- NewToken(4, "c"),
- ), node)
+ require.Equal(t, []Node{"a", "b", "c"}, node)
require.Equal(t, 6, p2.pos)
})
t.Run("Breaks if separator does not match", func(t *testing.T) {
node, p2 := KleeneUntil(Char("abcdefg"), Char("y"), Exact(","))(p)
- require.Equal(t, NewSequence(0,
- NewToken(0, "a"),
- NewToken(2, "b"),
- NewToken(4, "c"),
- NewToken(6, "d"),
- NewToken(8, "e"),
- NewToken(10, "f"),
- ), node)
+ require.Equal(t, []Node{"a", "b", "c", "d", "e", "f"}, node)
require.Equal(t, 11, p2.pos)
})
}
@@ -191,11 +154,7 @@ func TestManyUntil(t *testing.T) {
t.Run("Matches sequence until", func(t *testing.T) {
node, p2 := ManyUntil(CharRun("abcdefg"), Char("d"), Exact(","))(p)
- require.Equal(t, NewSequence(0,
- NewToken(0, "a"),
- NewToken(2, "b"),
- NewToken(4, "c"),
- ), node)
+ require.Equal(t, []Node{"a", "b", "c"}, node)
require.Equal(t, 6, p2.pos)
})
@@ -207,6 +166,48 @@ func TestManyUntil(t *testing.T) {
})
}
+type htmlTag struct {
+ Name string
+}
+
+func TestMap(t *testing.T) {
+ parser := Map(And("<", Range("a-zA-Z0-9"), ">"), func(n Node) Node {
+ return htmlTag{n.([]Node)[1].(string)}
+ })
+
+ t.Run("sucess", func(t *testing.T) {
+ result, _ := parser(Pointer{"<html>", 0})
+ require.Equal(t, htmlTag{"html"}, result)
+ })
+
+ t.Run("error", func(t *testing.T) {
+ result, ptr := parser(Pointer{"<html", 0})
+ require.Equal(t, NewError(5, "Expected >"), result)
+ require.Equal(t, 0, ptr.pos)
+ })
+}
+
+func TestMerge(t *testing.T) {
+ var bracer Parser
+ bracer = And("(", Maybe(&bracer), ")")
+ parser := Merge(bracer)
+
+ t.Run("sucess", func(t *testing.T) {
+ result, _ := parser(Pointer{"((()))", 0})
+ require.Equal(t, "((()))", result)
+ })
+
+ t.Run("error", func(t *testing.T) {
+ result, ptr := parser(Pointer{"((())", 0})
+ require.Equal(t, NewError(5, "Expected )"), result)
+ require.Equal(t, 0, ptr.pos)
+ })
+
+ require.Panics(t, func() {
+ flatten(1)
+ })
+}
+
func assertNilParser(t *testing.T, parser Parser) {
p := Pointer{"fff", 0}
node, p2 := parser(p)
diff --git a/examples/html.go b/examples/html.go
deleted file mode 100644
index d1a290a..0000000
--- a/examples/html.go
+++ /dev/null
@@ -1,34 +0,0 @@
-package main
-
-import (
- "fmt"
-
- . "github.com/vektah/goparsify"
-)
-
-func html(p Pointer) (Node, Pointer) {
- identifier := And(Range("a-z", 1, 1), Range("a-zA-Z0-9"))
- text := CharRunUntil("<>")
-
- var tag Parser
-
- element := Any(text, &tag)
- elements := Kleene(element)
- //attr := And(identifier, equal, String())
- attr := And(identifier, "=", `"test"`)
- attrws := And(attr, WS)
- attrs := Kleene(attrws)
- tstart := And("<", identifier, attrs, ">")
- tend := And("</", identifier, ">")
- tag = And(tstart, elements, tend)
-
- return element(p)
-}
-
-func main() {
- result, _, err := ParseString(html, "<h1>hello world</h1>")
- if err != nil {
- panic(err)
- }
- fmt.Printf("%#v\n", result)
-}
diff --git a/html/README.md b/html/README.md
new file mode 100644
index 0000000..2e72f20
--- /dev/null
+++ b/html/README.md
@@ -0,0 +1,4 @@
+example html parser
+===
+
+This is a **very** rudimentary html parser that should be used as an example only.
diff --git a/html/html.go b/html/html.go
new file mode 100644
index 0000000..5ceb5e9
--- /dev/null
+++ b/html/html.go
@@ -0,0 +1,52 @@
+package html
+
+import . "github.com/vektah/goparsify"
+
+func Parse(input string) (result Node, remaining string, err error) {
+ return ParseString(tag, input)
+}
+
+type Tag struct {
+ Name string
+ Attributes map[string]string
+ Body []Node
+}
+
+var (
+ tag Parser
+
+ identifier = Merge(And(Range("a-z", 1, 1), Range("a-zA-Z0-9", 0)))
+ text = CharRunUntil("<>")
+
+ element = Any(text, &tag)
+ elements = Kleene(element)
+ //attr := And(identifier, equal, String())
+ attr = And(identifier, WS, "=", WS, `"test"`)
+ attrs = Map(Kleene(attr, WS), func(node Node) Node {
+ nodes := node.([]Node)
+ attr := map[string]string{}
+
+ for _, attrNode := range nodes {
+ attrNodes := attrNode.([]Node)
+ attr[attrNodes[0].(string)] = attrNodes[2].(string)
+ }
+
+ return attr
+ })
+
+ tstart = And("<", identifier, attrs, ">")
+ tend = And("</", identifier, ">")
+)
+
+func init() {
+ tag = Map(And(tstart, elements, tend), func(node Node) Node {
+ nodes := node.([]Node)
+ openTag := nodes[0].([]Node)
+ return Tag{
+ Name: openTag[1].(string),
+ Attributes: openTag[2].(map[string]string),
+ Body: nodes[1].([]Node),
+ }
+
+ })
+}
diff --git a/html/html_test.go b/html/html_test.go
new file mode 100644
index 0000000..6dca6d4
--- /dev/null
+++ b/html/html_test.go
@@ -0,0 +1,17 @@
+package html
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/require"
+ . "github.com/vektah/goparsify"
+)
+
+func TestParse(t *testing.T) {
+ result, _, err := Parse("<body>hello <b>world</b></body>")
+ require.NoError(t, err)
+ require.Equal(t, Tag{Name: "body", Attributes: map[string]string{}, Body: []Node{
+ "hello ",
+ Tag{Name: "b", Attributes: map[string]string{}, Body: []Node{"world"}},
+ }}, result)
+}
diff --git a/nodes.go b/nodes.go
index 410fa7e..29dd58f 100644
--- a/nodes.go
+++ b/nodes.go
@@ -3,18 +3,6 @@ package parsec
import "fmt"
type Node interface {
- Pos() int
-}
-
-type Token struct {
- pos int
- Value string
-}
-
-func (e Token) Pos() int { return e.pos }
-
-func NewToken(pos int, value string) Token {
- return Token{pos, value}
}
type Error struct {
@@ -29,18 +17,7 @@ func NewError(pos int, message string) Error {
return Error{pos, message}
}
-func IsError(n Node) bool {
+func IsError(n interface{}) bool {
_, isErr := n.(Error)
return isErr
}
-
-type Sequence struct {
- pos int
- Nodes []Node
-}
-
-func (e Sequence) Pos() int { return e.pos }
-
-func NewSequence(pos int, n ...Node) Sequence {
- return Sequence{pos, n}
-}
diff --git a/parser.go b/parser.go
index 82b5d56..3c5d752 100644
--- a/parser.go
+++ b/parser.go
@@ -67,7 +67,7 @@ func Exact(match string) Parser {
return NewError(p.pos, "Expected "+match), p
}
- return NewToken(p.pos, match), p.Advance(len(match))
+ return match, p.Advance(len(match))
}
}
@@ -79,7 +79,7 @@ func Char(match string) Parser {
return NewError(p.pos, "Expected one of "+string(match)), p
}
- return NewToken(p.pos, string(r)), p.Advance(w)
+ return string(r), p.Advance(w)
}
}
@@ -98,7 +98,7 @@ func CharRun(match string) Parser {
return NewError(p.pos, "Expected some of "+match), p
}
- return NewToken(p.pos, p.input[p.pos:p.pos+matched]), p.Advance(matched)
+ return p.input[p.pos : p.pos+matched], p.Advance(matched)
}
}
@@ -117,7 +117,7 @@ func CharRunUntil(match string) Parser {
return NewError(p.pos, "Expected some of "+match), p
}
- return NewToken(p.pos, p.input[p.pos:p.pos+matched]), p.Advance(matched)
+ return p.input[p.pos : p.pos+matched], p.Advance(matched)
}
}
@@ -177,7 +177,7 @@ func Range(r string, repetition ...int) Parser {
return NewError(p.pos+matched, fmt.Sprintf("Expected at least %d more of %s", min-matched, r)), p
}
- return NewToken(p.pos, p.input[p.pos:p.pos+matched]), p.Advance(matched)
+ return p.input[p.pos : p.pos+matched], p.Advance(matched)
}
}
diff --git a/parser_test.go b/parser_test.go
index 83a95f6..a560c8c 100644
--- a/parser_test.go
+++ b/parser_test.go
@@ -11,19 +11,19 @@ func TestParsify(t *testing.T) {
t.Run("strings", func(t *testing.T) {
node, _ := Parsify("ff")(p)
- require.Equal(t, NewToken(0, "ff"), node)
+ require.Equal(t, "ff", node)
})
t.Run("parsers", func(t *testing.T) {
node, _ := Parsify(CharRun("f"))(p)
- require.Equal(t, NewToken(0, "ff"), node)
+ require.Equal(t, "ff", node)
})
t.Run("parser funcs", func(t *testing.T) {
node, _ := Parsify(func(p Pointer) (Node, Pointer) {
- return NewToken(0, "hello"), p
+ return "hello", p
})(p)
- require.Equal(t, NewToken(0, "hello"), node)
+ require.Equal(t, "hello", node)
})
t.Run("*parsers", func(t *testing.T) {
@@ -32,7 +32,7 @@ func TestParsify(t *testing.T) {
parser = CharRun("f")
node, _ := parserfied(p)
- require.Equal(t, NewToken(0, "ff"), node)
+ require.Equal(t, "ff", node)
})
require.Panics(t, func() {
@@ -44,7 +44,7 @@ func TestParsifyAll(t *testing.T) {
parsers := ParsifyAll("ff", "gg")
result, _ := parsers[0](Pointer{"ffooo", 0})
- require.Equal(t, NewToken(0, "ff"), result)
+ require.Equal(t, "ff", result)
result, _ = parsers[1](Pointer{"ffooo", 0})
require.Equal(t, NewError(0, "Expected gg"), result)
@@ -55,7 +55,7 @@ func TestExact(t *testing.T) {
t.Run("success", func(t *testing.T) {
node, p2 := Exact("fo")(p)
- require.Equal(t, NewToken(0, "fo"), node)
+ require.Equal(t, "fo", node)
require.Equal(t, p.Advance(2), p2)
})
@@ -71,7 +71,7 @@ func TestChar(t *testing.T) {
t.Run("success", func(t *testing.T) {
node, p2 := Char("fo")(p)
- require.Equal(t, NewToken(0, "f"), node)
+ require.Equal(t, "f", node)
require.Equal(t, p.Advance(1), p2)
})
@@ -87,7 +87,7 @@ func TestCharRun(t *testing.T) {
t.Run("success", func(t *testing.T) {
node, p2 := CharRun("fo")(p)
- require.Equal(t, NewToken(0, "foo"), node)
+ require.Equal(t, "foo", node)
require.Equal(t, p.Advance(3), p2)
})
@@ -103,7 +103,7 @@ func TestCharUntil(t *testing.T) {
t.Run("success", func(t *testing.T) {
node, p2 := CharRunUntil("z")(p)
- require.Equal(t, NewToken(0, "foobar"), node)
+ require.Equal(t, "foobar", node)
require.Equal(t, p.Advance(6), p2)
})
@@ -125,19 +125,19 @@ func TestWS(t *testing.T) {
func TestRange(t *testing.T) {
t.Run("full match", func(t *testing.T) {
node, p := Range("a-z")(Pointer{"foobar", 0})
- require.Equal(t, NewToken(0, "foobar"), node)
+ require.Equal(t, "foobar", node)
require.Equal(t, "", p.Get())
})
t.Run("partial match", func(t *testing.T) {
node, p := Range("1-4d-a")(Pointer{"a1b2c3d4efg", 0})
- require.Equal(t, NewToken(0, "a1b2c3d4"), node)
+ require.Equal(t, "a1b2c3d4", node)
require.Equal(t, "efg", p.Get())
})
t.Run("limited match", func(t *testing.T) {
node, p := Range("1-4d-a", 1, 2)(Pointer{"a1b2c3d4efg", 0})
- require.Equal(t, NewToken(0, "a1"), node)
+ require.Equal(t, "a1", node)
require.Equal(t, "b2c3d4efg", p.Get())
})
@@ -165,7 +165,7 @@ func TestRange(t *testing.T) {
func TestParseString(t *testing.T) {
t.Run("partial match", func(t *testing.T) {
result, remaining, err := ParseString("hello", "hello world")
- require.Equal(t, NewToken(0, "hello"), result)
+ require.Equal(t, "hello", result)
require.Equal(t, " world", remaining)
require.NoError(t, err)
})