goparsify/html/html.go

57 lines
1.2 KiB
Go
Raw Normal View History

2017-08-06 09:02:39 +02:00
package html
2017-08-07 13:20:30 +02:00
import (
. "github.com/vektah/goparsify"
)
2017-08-06 09:02:39 +02:00
2017-08-10 16:08:08 +02:00
func parse(input string) (result interface{}, err error) {
2017-08-09 13:18:14 +02:00
return Run(tag, input)
2017-08-06 09:02:39 +02:00
}
2017-08-10 16:08:08 +02:00
type htmlTag struct {
2017-08-06 09:02:39 +02:00
Name string
Attributes map[string]string
2017-08-06 15:32:10 +02:00
Body []interface{}
2017-08-06 09:02:39 +02:00
}
var (
tag Parser
2017-08-09 13:41:57 +02:00
identifier = Regex("[a-zA-Z][a-zA-Z0-9]*")
2017-08-13 09:30:10 +02:00
text = NotChars("<>").Map(func(n *Result) { n.Result = n.Token })
2017-08-06 09:02:39 +02:00
element = Any(text, &tag)
2017-08-13 09:30:10 +02:00
elements = Some(element).Map(func(n *Result) {
2017-08-07 10:25:23 +02:00
ret := []interface{}{}
2017-08-08 15:11:47 +02:00
for _, child := range n.Child {
2017-08-07 10:25:23 +02:00
ret = append(ret, child.Result)
}
2017-08-13 09:30:10 +02:00
n.Result = ret
2017-08-07 10:25:23 +02:00
})
2017-08-09 11:35:15 +02:00
attr = Seq(identifier, "=", StringLit(`"'`))
2017-08-13 09:30:10 +02:00
attrs = Some(attr).Map(func(node *Result) {
2017-08-06 09:02:39 +02:00
attr := map[string]string{}
2017-08-08 15:11:47 +02:00
for _, attrNode := range node.Child {
attr[attrNode.Child[0].Token] = attrNode.Child[2].Token
2017-08-06 09:02:39 +02:00
}
2017-08-13 09:30:10 +02:00
node.Result = attr
2017-08-06 09:02:39 +02:00
})
2017-08-10 14:10:30 +02:00
tstart = Seq("<", identifier, Cut(), attrs, ">")
tend = Seq("</", Cut(), identifier, ">")
2017-08-06 09:02:39 +02:00
)
func init() {
2017-08-13 09:30:10 +02:00
tag = Seq(tstart, Cut(), elements, tend).Map(func(node *Result) {
2017-08-08 15:11:47 +02:00
openTag := node.Child[0]
2017-08-13 09:30:10 +02:00
node.Result = htmlTag{
2017-08-10 13:58:14 +02:00
Name: openTag.Child[1].Token,
2017-08-10 13:04:14 +02:00
Attributes: openTag.Child[3].Result.(map[string]string),
2017-08-10 13:58:14 +02:00
Body: node.Child[2].Result.([]interface{}),
2017-08-13 09:30:10 +02:00
}
2017-08-06 09:02:39 +02:00
})
}