goparsify/html/html.go

60 lines
1.3 KiB
Go
Raw Normal View History

2017-08-06 09:02:39 +02:00
package html
2017-08-07 13:20:30 +02:00
import (
. "github.com/vektah/goparsify"
)
2017-08-06 09:02:39 +02:00
2017-08-06 15:32:10 +02:00
func Parse(input string) (result interface{}, remaining string, err error) {
2017-08-06 09:02:39 +02:00
return ParseString(tag, input)
}
type Tag struct {
Name string
Attributes map[string]string
2017-08-06 15:32:10 +02:00
Body []interface{}
2017-08-06 09:02:39 +02:00
}
var (
tag Parser
2017-08-07 13:20:30 +02:00
identifier = NoAutoWS(Merge(And(WS(), Chars("a-zA-Z", 1), Chars("a-zA-Z0-9", 0))))
2017-08-07 13:45:12 +02:00
text = Map(NotChars("<>"), func(n Node) Node {
return Node{Result: n.Token}
2017-08-07 10:25:23 +02:00
})
2017-08-06 09:02:39 +02:00
element = Any(text, &tag)
2017-08-07 13:45:12 +02:00
elements = Map(Kleene(element), func(n Node) Node {
2017-08-07 10:25:23 +02:00
ret := []interface{}{}
2017-08-08 15:11:47 +02:00
for _, child := range n.Child {
2017-08-07 10:25:23 +02:00
ret = append(ret, child.Result)
}
2017-08-07 13:45:12 +02:00
return Node{Result: ret}
2017-08-07 10:25:23 +02:00
})
2017-08-08 11:56:14 +02:00
attr = And(identifier, "=", StringLit(`"'`))
2017-08-07 13:45:12 +02:00
attrs = Map(Kleene(attr), func(node Node) Node {
2017-08-06 09:02:39 +02:00
attr := map[string]string{}
2017-08-08 15:11:47 +02:00
for _, attrNode := range node.Child {
attr[attrNode.Child[0].Token] = attrNode.Child[2].Result.(string)
2017-08-06 09:02:39 +02:00
}
2017-08-07 13:45:12 +02:00
return Node{Result: attr}
2017-08-06 09:02:39 +02:00
})
tstart = And("<", identifier, attrs, ">")
tend = And("</", identifier, ">")
)
func init() {
2017-08-07 13:45:12 +02:00
tag = Map(And(tstart, elements, tend), func(node Node) Node {
2017-08-08 15:11:47 +02:00
openTag := node.Child[0]
2017-08-07 13:45:12 +02:00
return Node{Result: Tag{
2017-08-08 15:11:47 +02:00
Name: openTag.Child[1].Token,
Attributes: openTag.Child[2].Result.(map[string]string),
Body: node.Child[1].Result.([]interface{}),
2017-08-07 10:25:23 +02:00
}}
2017-08-06 09:02:39 +02:00
})
}