goparsify/html/html.go

60 lines
1.3 KiB
Go
Raw Normal View History

2017-08-06 09:02:39 +02:00
package html
2017-08-07 13:20:30 +02:00
import (
. "github.com/vektah/goparsify"
)
2017-08-06 09:02:39 +02:00
2017-08-09 13:18:14 +02:00
func Parse(input string) (result interface{}, err error) {
return Run(tag, input)
2017-08-06 09:02:39 +02:00
}
type Tag struct {
Name string
Attributes map[string]string
2017-08-06 15:32:10 +02:00
Body []interface{}
2017-08-06 09:02:39 +02:00
}
var (
tag Parser
2017-08-09 13:41:57 +02:00
identifier = Regex("[a-zA-Z][a-zA-Z0-9]*")
2017-08-09 13:18:14 +02:00
text = Map(NotChars("<>"), func(n Result) Result {
return Result{Result: n.Token}
2017-08-07 10:25:23 +02:00
})
2017-08-06 09:02:39 +02:00
element = Any(text, &tag)
2017-08-09 13:18:14 +02:00
elements = Map(Some(element), func(n Result) Result {
2017-08-07 10:25:23 +02:00
ret := []interface{}{}
2017-08-08 15:11:47 +02:00
for _, child := range n.Child {
2017-08-07 10:25:23 +02:00
ret = append(ret, child.Result)
}
2017-08-09 13:18:14 +02:00
return Result{Result: ret}
2017-08-07 10:25:23 +02:00
})
2017-08-09 11:35:15 +02:00
attr = Seq(identifier, "=", StringLit(`"'`))
2017-08-09 13:18:14 +02:00
attrs = Map(Some(attr), func(node Result) Result {
2017-08-06 09:02:39 +02:00
attr := map[string]string{}
2017-08-08 15:11:47 +02:00
for _, attrNode := range node.Child {
attr[attrNode.Child[0].Token] = attrNode.Child[2].Result.(string)
2017-08-06 09:02:39 +02:00
}
2017-08-09 13:18:14 +02:00
return Result{Result: attr}
2017-08-06 09:02:39 +02:00
})
2017-08-10 13:04:14 +02:00
tstart = Seq("<", Cut, identifier, attrs, ">")
tend = Seq("</", Cut, identifier, ">")
2017-08-06 09:02:39 +02:00
)
func init() {
2017-08-09 13:18:14 +02:00
tag = Map(Seq(tstart, elements, tend), func(node Result) Result {
2017-08-08 15:11:47 +02:00
openTag := node.Child[0]
2017-08-09 13:18:14 +02:00
return Result{Result: Tag{
2017-08-10 13:04:14 +02:00
Name: openTag.Child[2].Token,
Attributes: openTag.Child[3].Result.(map[string]string),
2017-08-08 15:11:47 +02:00
Body: node.Child[1].Result.([]interface{}),
2017-08-07 10:25:23 +02:00
}}
2017-08-06 09:02:39 +02:00
})
}