Add godoc
This commit is contained in:
parent
8b2f10f238
commit
47badae641
@ -6,6 +6,8 @@ jobs:
|
|||||||
working_directory: /go/src/github.com/vektah/goparsify
|
working_directory: /go/src/github.com/vektah/goparsify
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
- run: go get -u github.com/golang/dep/cmd/dep
|
- run: go get -u github.com/golang/dep/cmd/dep github.com/alecthomas/gometalinter && gometalinter --install
|
||||||
- run: dep ensure --vendor-only
|
- run: dep ensure --vendor-only
|
||||||
|
- run: go vet ./...
|
||||||
- run: go test -v ./...
|
- run: go test -v ./...
|
||||||
|
- run: gometalinter . --disable gocyclo
|
||||||
|
28
calc/calc.go
28
calc/calc.go
@ -1,7 +1,6 @@
|
|||||||
package calc
|
package calc
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
. "github.com/vektah/goparsify"
|
. "github.com/vektah/goparsify"
|
||||||
@ -13,22 +12,22 @@ var (
|
|||||||
sumOp = Chars("+-", 1, 1)
|
sumOp = Chars("+-", 1, 1)
|
||||||
prodOp = Chars("/*", 1, 1)
|
prodOp = Chars("/*", 1, 1)
|
||||||
|
|
||||||
groupExpr = Map(Seq("(", sum, ")"), func(n Node) Node {
|
groupExpr = Map(Seq("(", sum, ")"), func(n Result) Result {
|
||||||
return Node{Result: n.Child[1].Result}
|
return Result{Result: n.Child[1].Result}
|
||||||
})
|
})
|
||||||
|
|
||||||
number = Map(NumberLit(), func(n Node) Node {
|
number = Map(NumberLit(), func(n Result) Result {
|
||||||
switch i := n.Result.(type) {
|
switch i := n.Result.(type) {
|
||||||
case int64:
|
case int64:
|
||||||
return Node{Result: float64(i)}
|
return Result{Result: float64(i)}
|
||||||
case float64:
|
case float64:
|
||||||
return Node{Result: i}
|
return Result{Result: i}
|
||||||
default:
|
default:
|
||||||
panic(fmt.Errorf("unknown value %#v", i))
|
panic(fmt.Errorf("unknown value %#v", i))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
sum = Map(Seq(prod, Some(Seq(sumOp, prod))), func(n Node) Node {
|
sum = Map(Seq(prod, Some(Seq(sumOp, prod))), func(n Result) Result {
|
||||||
i := n.Child[0].Result.(float64)
|
i := n.Child[0].Result.(float64)
|
||||||
|
|
||||||
for _, op := range n.Child[1].Child {
|
for _, op := range n.Child[1].Child {
|
||||||
@ -40,10 +39,10 @@ var (
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return Node{Result: i}
|
return Result{Result: i}
|
||||||
})
|
})
|
||||||
|
|
||||||
prod = Map(Seq(&value, Some(Seq(prodOp, &value))), func(n Node) Node {
|
prod = Map(Seq(&value, Some(Seq(prodOp, &value))), func(n Result) Result {
|
||||||
i := n.Child[0].Result.(float64)
|
i := n.Child[0].Result.(float64)
|
||||||
|
|
||||||
for _, op := range n.Child[1].Child {
|
for _, op := range n.Child[1].Child {
|
||||||
@ -55,10 +54,10 @@ var (
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return Node{Result: i}
|
return Result{Result: i}
|
||||||
})
|
})
|
||||||
|
|
||||||
Y = Maybe(sum)
|
y = Maybe(sum)
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
@ -66,15 +65,10 @@ func init() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func Calc(input string) (float64, error) {
|
func Calc(input string) (float64, error) {
|
||||||
result, remaining, err := ParseString(Y, input)
|
result, err := Run(y, input)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if remaining != "" {
|
|
||||||
return result.(float64), errors.New("left unparsed: " + remaining)
|
|
||||||
}
|
|
||||||
|
|
||||||
return result.(float64), nil
|
return result.(float64), nil
|
||||||
}
|
}
|
||||||
|
@ -4,11 +4,12 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Seq matches all of the given parsers in order and returns their nodes as .Child[n]
|
||||||
func Seq(parsers ...Parserish) Parser {
|
func Seq(parsers ...Parserish) Parser {
|
||||||
parserfied := ParsifyAll(parsers...)
|
parserfied := ParsifyAll(parsers...)
|
||||||
|
|
||||||
return NewParser("Seq()", func(ps *State) Node {
|
return NewParser("Seq()", func(ps *State) Result {
|
||||||
result := Node{Child: make([]Node, len(parserfied))}
|
result := Result{Child: make([]Result, len(parserfied))}
|
||||||
startpos := ps.Pos
|
startpos := ps.Pos
|
||||||
for i, parser := range parserfied {
|
for i, parser := range parserfied {
|
||||||
result.Child[i] = parser(ps)
|
result.Child[i] = parser(ps)
|
||||||
@ -21,9 +22,10 @@ func Seq(parsers ...Parserish) Parser {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NoAutoWS disables automatically ignoring whitespace between tokens for all parsers underneath
|
||||||
func NoAutoWS(parser Parserish) Parser {
|
func NoAutoWS(parser Parserish) Parser {
|
||||||
parserfied := Parsify(parser)
|
parserfied := Parsify(parser)
|
||||||
return func(ps *State) Node {
|
return func(ps *State) Result {
|
||||||
ps.NoAutoWS = true
|
ps.NoAutoWS = true
|
||||||
|
|
||||||
ret := parserfied(ps)
|
ret := parserfied(ps)
|
||||||
@ -33,10 +35,11 @@ func NoAutoWS(parser Parserish) Parser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Any matches the first successful parser and returns its node
|
||||||
func Any(parsers ...Parserish) Parser {
|
func Any(parsers ...Parserish) Parser {
|
||||||
parserfied := ParsifyAll(parsers...)
|
parserfied := ParsifyAll(parsers...)
|
||||||
|
|
||||||
return NewParser("Any()", func(ps *State) Node {
|
return NewParser("Any()", func(ps *State) Result {
|
||||||
longestError := Error{}
|
longestError := Error{}
|
||||||
startpos := ps.Pos
|
startpos := ps.Pos
|
||||||
for _, parser := range parserfied {
|
for _, parser := range parserfied {
|
||||||
@ -45,7 +48,7 @@ func Any(parsers ...Parserish) Parser {
|
|||||||
if ps.Error.pos > longestError.pos {
|
if ps.Error.pos > longestError.pos {
|
||||||
longestError = ps.Error
|
longestError = ps.Error
|
||||||
}
|
}
|
||||||
ps.ClearError()
|
ps.Recover()
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
return node
|
return node
|
||||||
@ -53,16 +56,22 @@ func Any(parsers ...Parserish) Parser {
|
|||||||
|
|
||||||
ps.Error = longestError
|
ps.Error = longestError
|
||||||
ps.Pos = startpos
|
ps.Pos = startpos
|
||||||
return Node{}
|
return Result{}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func Some(opScan Parserish, sepScan ...Parserish) Parser {
|
// Some matches one or more parsers and returns the value as .Child[n]
|
||||||
return NewParser("Some()", manyImpl(0, opScan, sepScan...))
|
// an optional separator can be provided and that value will be consumed
|
||||||
|
// but not returned. Only one separator can be provided.
|
||||||
|
func Some(parser Parserish, separator ...Parserish) Parser {
|
||||||
|
return NewParser("Some()", manyImpl(0, parser, separator...))
|
||||||
}
|
}
|
||||||
|
|
||||||
func Many(opScan Parserish, sepScan ...Parserish) Parser {
|
// Many matches zero or more parsers and returns the value as .Child[n]
|
||||||
return NewParser("Many()", manyImpl(1, opScan, sepScan...))
|
// an optional separator can be provided and that value will be consumed
|
||||||
|
// but not returned. Only one separator can be provided.
|
||||||
|
func Many(parser Parserish, separator ...Parserish) Parser {
|
||||||
|
return NewParser("Many()", manyImpl(1, parser, separator...))
|
||||||
}
|
}
|
||||||
|
|
||||||
func manyImpl(min int, op Parserish, sep ...Parserish) Parser {
|
func manyImpl(min int, op Parserish, sep ...Parserish) Parser {
|
||||||
@ -72,8 +81,8 @@ func manyImpl(min int, op Parserish, sep ...Parserish) Parser {
|
|||||||
sepParser = Parsify(sep[0])
|
sepParser = Parsify(sep[0])
|
||||||
}
|
}
|
||||||
|
|
||||||
return func(ps *State) Node {
|
return func(ps *State) Result {
|
||||||
var result Node
|
var result Result
|
||||||
startpos := ps.Pos
|
startpos := ps.Pos
|
||||||
for {
|
for {
|
||||||
node := opParser(ps)
|
node := opParser(ps)
|
||||||
@ -82,7 +91,7 @@ func manyImpl(min int, op Parserish, sep ...Parserish) Parser {
|
|||||||
ps.Pos = startpos
|
ps.Pos = startpos
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
ps.ClearError()
|
ps.Recover()
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
result.Child = append(result.Child, node)
|
result.Child = append(result.Child, node)
|
||||||
@ -90,7 +99,7 @@ func manyImpl(min int, op Parserish, sep ...Parserish) Parser {
|
|||||||
if sepParser != nil {
|
if sepParser != nil {
|
||||||
sepParser(ps)
|
sepParser(ps)
|
||||||
if ps.Errored() {
|
if ps.Errored() {
|
||||||
ps.ClearError()
|
ps.Recover()
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -98,23 +107,27 @@ func manyImpl(min int, op Parserish, sep ...Parserish) Parser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Maybe will 0 or 1 of the parser
|
||||||
func Maybe(parser Parserish) Parser {
|
func Maybe(parser Parserish) Parser {
|
||||||
parserfied := Parsify(parser)
|
parserfied := Parsify(parser)
|
||||||
|
|
||||||
return NewParser("Maybe()", func(ps *State) Node {
|
return NewParser("Maybe()", func(ps *State) Result {
|
||||||
node := parserfied(ps)
|
node := parserfied(ps)
|
||||||
if ps.Errored() {
|
if ps.Errored() {
|
||||||
ps.ClearError()
|
ps.Recover()
|
||||||
}
|
}
|
||||||
|
|
||||||
return node
|
return node
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Bind will set the node .Result when the given parser matches
|
||||||
|
// This is useful for giving a value to keywords and constant literals
|
||||||
|
// like true and false. See the json parser for an example.
|
||||||
func Bind(parser Parserish, val interface{}) Parser {
|
func Bind(parser Parserish, val interface{}) Parser {
|
||||||
p := Parsify(parser)
|
p := Parsify(parser)
|
||||||
|
|
||||||
return func(ps *State) Node {
|
return func(ps *State) Result {
|
||||||
node := p(ps)
|
node := p(ps)
|
||||||
if ps.Errored() {
|
if ps.Errored() {
|
||||||
return node
|
return node
|
||||||
@ -124,10 +137,12 @@ func Bind(parser Parserish, val interface{}) Parser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func Map(parser Parserish, f func(n Node) Node) Parser {
|
// Map applies the callback if the parser matches. This is used to set the Result
|
||||||
|
// based on the matched result.
|
||||||
|
func Map(parser Parserish, f func(n Result) Result) Parser {
|
||||||
p := Parsify(parser)
|
p := Parsify(parser)
|
||||||
|
|
||||||
return NewParser("Map()", func(ps *State) Node {
|
return NewParser("Map()", func(ps *State) Result {
|
||||||
node := p(ps)
|
node := p(ps)
|
||||||
if ps.Errored() {
|
if ps.Errored() {
|
||||||
return node
|
return node
|
||||||
@ -136,7 +151,7 @@ func Map(parser Parserish, f func(n Node) Node) Parser {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func flatten(n Node) string {
|
func flatten(n Result) string {
|
||||||
if n.Token != "" {
|
if n.Token != "" {
|
||||||
return n.Token
|
return n.Token
|
||||||
}
|
}
|
||||||
@ -152,8 +167,9 @@ func flatten(n Node) string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Merge all child Tokens together recursively
|
||||||
func Merge(parser Parserish) Parser {
|
func Merge(parser Parserish) Parser {
|
||||||
return NewParser("Merge()", Map(parser, func(n Node) Node {
|
return NewParser("Merge()", Map(parser, func(n Result) Result {
|
||||||
return Node{Token: flatten(n)}
|
return Result{Token: flatten(n)}
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
@ -17,7 +17,7 @@ func TestSeq(t *testing.T) {
|
|||||||
|
|
||||||
t.Run("returns errors", func(t *testing.T) {
|
t.Run("returns errors", func(t *testing.T) {
|
||||||
_, p2 := runParser("hello there", parser)
|
_, p2 := runParser("hello there", parser)
|
||||||
require.Equal(t, "world", p2.Error.Expected)
|
require.Equal(t, "world", p2.Error.expected)
|
||||||
require.Equal(t, 6, p2.Error.pos)
|
require.Equal(t, 6, p2.Error.pos)
|
||||||
require.Equal(t, 0, p2.Pos)
|
require.Equal(t, 0, p2.Pos)
|
||||||
})
|
})
|
||||||
@ -32,7 +32,7 @@ func TestMaybe(t *testing.T) {
|
|||||||
|
|
||||||
t.Run("returns no errors", func(t *testing.T) {
|
t.Run("returns no errors", func(t *testing.T) {
|
||||||
node, p3 := runParser("hello world", Maybe("world"))
|
node, p3 := runParser("hello world", Maybe("world"))
|
||||||
require.Equal(t, Node{}, node)
|
require.Equal(t, Result{}, node)
|
||||||
require.False(t, p3.Errored())
|
require.False(t, p3.Errored())
|
||||||
require.Equal(t, 0, p3.Pos)
|
require.Equal(t, 0, p3.Pos)
|
||||||
})
|
})
|
||||||
@ -51,14 +51,14 @@ func TestAny(t *testing.T) {
|
|||||||
Seq("hello", "world", "."),
|
Seq("hello", "world", "."),
|
||||||
Seq("hello", "brother"),
|
Seq("hello", "brother"),
|
||||||
))
|
))
|
||||||
require.Equal(t, "offset 11: Expected .", p2.Error.Error())
|
require.Equal(t, "offset 11: expected .", p2.Error.Error())
|
||||||
require.Equal(t, 11, p2.Error.Pos())
|
require.Equal(t, 11, p2.Error.Pos())
|
||||||
require.Equal(t, 0, p2.Pos)
|
require.Equal(t, 0, p2.Pos)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("Accepts nil matches", func(t *testing.T) {
|
t.Run("Accepts nil matches", func(t *testing.T) {
|
||||||
node, p2 := runParser("hello world!", Any(Exact("ffffff")))
|
node, p2 := runParser("hello world!", Any(Exact("ffffff")))
|
||||||
require.Equal(t, Node{}, node)
|
require.Equal(t, Result{}, node)
|
||||||
require.Equal(t, 0, p2.Pos)
|
require.Equal(t, 0, p2.Pos)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -113,7 +113,7 @@ func TestMany(t *testing.T) {
|
|||||||
|
|
||||||
t.Run("Returns error if nothing matches", func(t *testing.T) {
|
t.Run("Returns error if nothing matches", func(t *testing.T) {
|
||||||
_, p2 := runParser("a,b,c,d,e,", Many(Chars("def"), Exact(",")))
|
_, p2 := runParser("a,b,c,d,e,", Many(Chars("def"), Exact(",")))
|
||||||
require.Equal(t, "offset 0: Expected def", p2.Error.Error())
|
require.Equal(t, "offset 0: expected def", p2.Error.Error())
|
||||||
require.Equal(t, "a,b,c,d,e,", p2.Get())
|
require.Equal(t, "a,b,c,d,e,", p2.Get())
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -123,8 +123,8 @@ type htmlTag struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestMap(t *testing.T) {
|
func TestMap(t *testing.T) {
|
||||||
parser := Map(Seq("<", Chars("a-zA-Z0-9"), ">"), func(n Node) Node {
|
parser := Map(Seq("<", Chars("a-zA-Z0-9"), ">"), func(n Result) Result {
|
||||||
return Node{Result: htmlTag{n.Child[1].Token}}
|
return Result{Result: htmlTag{n.Child[1].Token}}
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("sucess", func(t *testing.T) {
|
t.Run("sucess", func(t *testing.T) {
|
||||||
@ -134,7 +134,7 @@ func TestMap(t *testing.T) {
|
|||||||
|
|
||||||
t.Run("error", func(t *testing.T) {
|
t.Run("error", func(t *testing.T) {
|
||||||
_, ps := runParser("<html", parser)
|
_, ps := runParser("<html", parser)
|
||||||
require.Equal(t, "offset 5: Expected >", ps.Error.Error())
|
require.Equal(t, "offset 5: expected >", ps.Error.Error())
|
||||||
require.Equal(t, 0, ps.Pos)
|
require.Equal(t, 0, ps.Pos)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -151,12 +151,12 @@ func TestMerge(t *testing.T) {
|
|||||||
|
|
||||||
t.Run("error", func(t *testing.T) {
|
t.Run("error", func(t *testing.T) {
|
||||||
_, ps := runParser("((())", parser)
|
_, ps := runParser("((())", parser)
|
||||||
require.Equal(t, "offset 5: Expected )", ps.Error.Error())
|
require.Equal(t, "offset 5: expected )", ps.Error.Error())
|
||||||
require.Equal(t, 0, ps.Pos)
|
require.Equal(t, 0, ps.Pos)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func assertSequence(t *testing.T, node Node, expected ...string) {
|
func assertSequence(t *testing.T, node Result, expected ...string) {
|
||||||
require.NotNil(t, node)
|
require.NotNil(t, node)
|
||||||
actual := []string{}
|
actual := []string{}
|
||||||
|
|
||||||
|
@ -2,10 +2,12 @@
|
|||||||
|
|
||||||
package goparsify
|
package goparsify
|
||||||
|
|
||||||
|
// NewParser should be called around the creation of every Parser.
|
||||||
|
// It does nothing normally and should incur no runtime overhead, but when building with -tags debug
|
||||||
|
// it will instrument every parser to collect valuable timing information displayable with DumpDebugStats.
|
||||||
func NewParser(description string, p Parser) Parser {
|
func NewParser(description string, p Parser) Parser {
|
||||||
return p
|
return p
|
||||||
}
|
}
|
||||||
|
|
||||||
func DumpDebugStats() {
|
// DumpDebugStats will print out the curring timings for each parser if built with -tags debug
|
||||||
|
func DumpDebugStats() {}
|
||||||
}
|
|
||||||
|
12
debugon.go
12
debugon.go
@ -10,9 +10,9 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
var parsers []*DebugParser
|
var parsers []*debugParser
|
||||||
|
|
||||||
type DebugParser struct {
|
type debugParser struct {
|
||||||
Description string
|
Description string
|
||||||
Caller string
|
Caller string
|
||||||
Next Parser
|
Next Parser
|
||||||
@ -20,7 +20,7 @@ type DebugParser struct {
|
|||||||
Calls int
|
Calls int
|
||||||
}
|
}
|
||||||
|
|
||||||
func (dp *DebugParser) Parse(ps *State) Node {
|
func (dp *debugParser) Parse(ps *State) Result {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
|
||||||
ret := dp.Next(ps)
|
ret := dp.Next(ps)
|
||||||
@ -42,6 +42,9 @@ func getPackageName(f *runtime.Func) string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NewParser should be called around the creation of every Parser.
|
||||||
|
// It does nothing normally and should incur no runtime overhead, but when building with -tags debug
|
||||||
|
// it will instrument every parser to collect valuable timing information displayable with DumpDebugStats.
|
||||||
func NewParser(description string, p Parser) Parser {
|
func NewParser(description string, p Parser) Parser {
|
||||||
fpcs := make([]uintptr, 1)
|
fpcs := make([]uintptr, 1)
|
||||||
caller := ""
|
caller := ""
|
||||||
@ -61,7 +64,7 @@ func NewParser(description string, p Parser) Parser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dp := &DebugParser{
|
dp := &debugParser{
|
||||||
Description: description,
|
Description: description,
|
||||||
Next: p,
|
Next: p,
|
||||||
Caller: caller,
|
Caller: caller,
|
||||||
@ -71,6 +74,7 @@ func NewParser(description string, p Parser) Parser {
|
|||||||
return dp.Parse
|
return dp.Parse
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DumpDebugStats will print out the curring timings for each parser if built with -tags debug
|
||||||
func DumpDebugStats() {
|
func DumpDebugStats() {
|
||||||
sort.Slice(parsers, func(i, j int) bool {
|
sort.Slice(parsers, func(i, j int) bool {
|
||||||
return parsers[i].Time >= parsers[j].Time
|
return parsers[i].Time >= parsers[j].Time
|
||||||
|
20
html/html.go
20
html/html.go
@ -4,8 +4,8 @@ import (
|
|||||||
. "github.com/vektah/goparsify"
|
. "github.com/vektah/goparsify"
|
||||||
)
|
)
|
||||||
|
|
||||||
func Parse(input string) (result interface{}, remaining string, err error) {
|
func Parse(input string) (result interface{}, err error) {
|
||||||
return ParseString(tag, input)
|
return Run(tag, input)
|
||||||
}
|
}
|
||||||
|
|
||||||
type Tag struct {
|
type Tag struct {
|
||||||
@ -18,28 +18,28 @@ var (
|
|||||||
tag Parser
|
tag Parser
|
||||||
|
|
||||||
identifier = NoAutoWS(Merge(Seq(WS(), Chars("a-zA-Z", 1), Chars("a-zA-Z0-9", 0))))
|
identifier = NoAutoWS(Merge(Seq(WS(), Chars("a-zA-Z", 1), Chars("a-zA-Z0-9", 0))))
|
||||||
text = Map(NotChars("<>"), func(n Node) Node {
|
text = Map(NotChars("<>"), func(n Result) Result {
|
||||||
return Node{Result: n.Token}
|
return Result{Result: n.Token}
|
||||||
})
|
})
|
||||||
|
|
||||||
element = Any(text, &tag)
|
element = Any(text, &tag)
|
||||||
elements = Map(Some(element), func(n Node) Node {
|
elements = Map(Some(element), func(n Result) Result {
|
||||||
ret := []interface{}{}
|
ret := []interface{}{}
|
||||||
for _, child := range n.Child {
|
for _, child := range n.Child {
|
||||||
ret = append(ret, child.Result)
|
ret = append(ret, child.Result)
|
||||||
}
|
}
|
||||||
return Node{Result: ret}
|
return Result{Result: ret}
|
||||||
})
|
})
|
||||||
|
|
||||||
attr = Seq(identifier, "=", StringLit(`"'`))
|
attr = Seq(identifier, "=", StringLit(`"'`))
|
||||||
attrs = Map(Some(attr), func(node Node) Node {
|
attrs = Map(Some(attr), func(node Result) Result {
|
||||||
attr := map[string]string{}
|
attr := map[string]string{}
|
||||||
|
|
||||||
for _, attrNode := range node.Child {
|
for _, attrNode := range node.Child {
|
||||||
attr[attrNode.Child[0].Token] = attrNode.Child[2].Result.(string)
|
attr[attrNode.Child[0].Token] = attrNode.Child[2].Result.(string)
|
||||||
}
|
}
|
||||||
|
|
||||||
return Node{Result: attr}
|
return Result{Result: attr}
|
||||||
})
|
})
|
||||||
|
|
||||||
tstart = Seq("<", identifier, attrs, ">")
|
tstart = Seq("<", identifier, attrs, ">")
|
||||||
@ -47,9 +47,9 @@ var (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
tag = Map(Seq(tstart, elements, tend), func(node Node) Node {
|
tag = Map(Seq(tstart, elements, tend), func(node Result) Result {
|
||||||
openTag := node.Child[0]
|
openTag := node.Child[0]
|
||||||
return Node{Result: Tag{
|
return Result{Result: Tag{
|
||||||
Name: openTag.Child[1].Token,
|
Name: openTag.Child[1].Token,
|
||||||
Attributes: openTag.Child[2].Result.(map[string]string),
|
Attributes: openTag.Child[2].Result.(map[string]string),
|
||||||
Body: node.Child[1].Result.([]interface{}),
|
Body: node.Child[1].Result.([]interface{}),
|
||||||
|
@ -7,7 +7,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func TestParse(t *testing.T) {
|
func TestParse(t *testing.T) {
|
||||||
result, _, err := Parse(`<body>hello <p color="blue">world</p></body>`)
|
result, err := Parse(`<body>hello <p color="blue">world</p></body>`)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Equal(t, Tag{Name: "body", Attributes: map[string]string{}, Body: []interface{}{
|
require.Equal(t, Tag{Name: "body", Attributes: map[string]string{}, Body: []interface{}{
|
||||||
"hello ",
|
"hello ",
|
||||||
|
21
json/json.go
21
json/json.go
@ -1,6 +1,5 @@
|
|||||||
package json
|
package json
|
||||||
|
|
||||||
import "errors"
|
|
||||||
import . "github.com/vektah/goparsify"
|
import . "github.com/vektah/goparsify"
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@ -12,22 +11,22 @@ var (
|
|||||||
_number = NumberLit()
|
_number = NumberLit()
|
||||||
_properties = Some(Seq(StringLit(`"`), ":", &_value), ",")
|
_properties = Some(Seq(StringLit(`"`), ":", &_value), ",")
|
||||||
|
|
||||||
_array = Map(Seq("[", Some(&_value, ","), "]"), func(n Node) Node {
|
_array = Map(Seq("[", Some(&_value, ","), "]"), func(n Result) Result {
|
||||||
ret := []interface{}{}
|
ret := []interface{}{}
|
||||||
for _, child := range n.Child[1].Child {
|
for _, child := range n.Child[1].Child {
|
||||||
ret = append(ret, child.Result)
|
ret = append(ret, child.Result)
|
||||||
}
|
}
|
||||||
return Node{Result: ret}
|
return Result{Result: ret}
|
||||||
})
|
})
|
||||||
|
|
||||||
_object = Map(Seq("{", _properties, "}"), func(n Node) Node {
|
_object = Map(Seq("{", _properties, "}"), func(n Result) Result {
|
||||||
ret := map[string]interface{}{}
|
ret := map[string]interface{}{}
|
||||||
|
|
||||||
for _, prop := range n.Child[1].Child {
|
for _, prop := range n.Child[1].Child {
|
||||||
ret[prop.Child[0].Result.(string)] = prop.Child[2].Result
|
ret[prop.Child[0].Result.(string)] = prop.Child[2].Result
|
||||||
}
|
}
|
||||||
|
|
||||||
return Node{Result: ret}
|
return Result{Result: ret}
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -36,15 +35,5 @@ func init() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func Unmarshal(input string) (interface{}, error) {
|
func Unmarshal(input string) (interface{}, error) {
|
||||||
result, remaining, err := ParseString(_value, input)
|
return Run(_value, input)
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return result, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if remaining != "" {
|
|
||||||
return result, errors.New("left unparsed: " + remaining)
|
|
||||||
}
|
|
||||||
|
|
||||||
return result, err
|
|
||||||
}
|
}
|
||||||
|
@ -22,7 +22,7 @@ func main() {
|
|||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
pprof.StartCPUProfile(f)
|
_ = pprof.StartCPUProfile(f)
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
pprof.StopCPUProfile()
|
pprof.StopCPUProfile()
|
||||||
@ -32,7 +32,7 @@ func main() {
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
max := 1000
|
max := 100000
|
||||||
if *memprofile != "" {
|
if *memprofile != "" {
|
||||||
runtime.MemProfileRate = 1
|
runtime.MemProfileRate = 1
|
||||||
max = 1000
|
max = 1000
|
||||||
@ -42,8 +42,8 @@ func main() {
|
|||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
pprof.WriteHeapProfile(f)
|
_ = pprof.WriteHeapProfile(f)
|
||||||
f.Close()
|
_ = f.Close()
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
40
literals.go
40
literals.go
@ -6,22 +6,21 @@ import (
|
|||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// StringLit matches a quoted string and returns it in .Result. It may contain:
|
||||||
|
// - unicode
|
||||||
|
// - escaped characters, eg \" or \n
|
||||||
|
// - unicode sequences, eg \uBEEF
|
||||||
func StringLit(allowedQuotes string) Parser {
|
func StringLit(allowedQuotes string) Parser {
|
||||||
return NewParser("string literal", func(ps *State) Node {
|
return NewParser("string literal", func(ps *State) Result {
|
||||||
ps.AutoWS()
|
ps.AutoWS()
|
||||||
|
|
||||||
for i := 0; i < len(allowedQuotes); i++ {
|
|
||||||
if ps.Input[ps.Pos] == allowedQuotes[i] {
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !stringContainsByte(allowedQuotes, ps.Input[ps.Pos]) {
|
if !stringContainsByte(allowedQuotes, ps.Input[ps.Pos]) {
|
||||||
ps.ErrorHere(allowedQuotes)
|
ps.ErrorHere(allowedQuotes)
|
||||||
return Node{}
|
return Result{}
|
||||||
}
|
}
|
||||||
quote := ps.Input[ps.Pos]
|
quote := ps.Input[ps.Pos]
|
||||||
|
|
||||||
var end int = ps.Pos + 1
|
var end = ps.Pos + 1
|
||||||
|
|
||||||
inputLen := len(ps.Input)
|
inputLen := len(ps.Input)
|
||||||
var buf *bytes.Buffer
|
var buf *bytes.Buffer
|
||||||
@ -31,7 +30,7 @@ func StringLit(allowedQuotes string) Parser {
|
|||||||
case '\\':
|
case '\\':
|
||||||
if end+1 >= inputLen {
|
if end+1 >= inputLen {
|
||||||
ps.ErrorHere(string(quote))
|
ps.ErrorHere(string(quote))
|
||||||
return Node{}
|
return Result{}
|
||||||
}
|
}
|
||||||
|
|
||||||
if buf == nil {
|
if buf == nil {
|
||||||
@ -41,16 +40,16 @@ func StringLit(allowedQuotes string) Parser {
|
|||||||
c := ps.Input[end+1]
|
c := ps.Input[end+1]
|
||||||
if c == 'u' {
|
if c == 'u' {
|
||||||
if end+6 >= inputLen {
|
if end+6 >= inputLen {
|
||||||
ps.Error.Expected = "[a-f0-9]{4}"
|
ps.Error.expected = "[a-f0-9]{4}"
|
||||||
ps.Error.pos = end + 2
|
ps.Error.pos = end + 2
|
||||||
return Node{}
|
return Result{}
|
||||||
}
|
}
|
||||||
|
|
||||||
r, ok := unhex(ps.Input[end+2 : end+6])
|
r, ok := unhex(ps.Input[end+2 : end+6])
|
||||||
if !ok {
|
if !ok {
|
||||||
ps.Error.Expected = "[a-f0-9]"
|
ps.Error.expected = "[a-f0-9]"
|
||||||
ps.Error.pos = end + 2
|
ps.Error.pos = end + 2
|
||||||
return Node{}
|
return Result{}
|
||||||
}
|
}
|
||||||
buf.WriteRune(r)
|
buf.WriteRune(r)
|
||||||
end += 6
|
end += 6
|
||||||
@ -62,10 +61,10 @@ func StringLit(allowedQuotes string) Parser {
|
|||||||
if buf == nil {
|
if buf == nil {
|
||||||
result := ps.Input[ps.Pos+1 : end]
|
result := ps.Input[ps.Pos+1 : end]
|
||||||
ps.Pos = end + 1
|
ps.Pos = end + 1
|
||||||
return Node{Result: result}
|
return Result{Result: result}
|
||||||
}
|
}
|
||||||
ps.Pos = end + 1
|
ps.Pos = end + 1
|
||||||
return Node{Result: buf.String()}
|
return Result{Result: buf.String()}
|
||||||
default:
|
default:
|
||||||
if buf == nil {
|
if buf == nil {
|
||||||
if ps.Input[end] < 127 {
|
if ps.Input[end] < 127 {
|
||||||
@ -83,12 +82,13 @@ func StringLit(allowedQuotes string) Parser {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ps.ErrorHere(string(quote))
|
ps.ErrorHere(string(quote))
|
||||||
return Node{}
|
return Result{}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NumberLit matches a floating point or integer number and returns it as a int64 or float64 in .Result
|
||||||
func NumberLit() Parser {
|
func NumberLit() Parser {
|
||||||
return NewParser("number literal", func(ps *State) Node {
|
return NewParser("number literal", func(ps *State) Result {
|
||||||
ps.AutoWS()
|
ps.AutoWS()
|
||||||
end := ps.Pos
|
end := ps.Pos
|
||||||
float := false
|
float := false
|
||||||
@ -126,7 +126,7 @@ func NumberLit() Parser {
|
|||||||
|
|
||||||
if end == ps.Pos {
|
if end == ps.Pos {
|
||||||
ps.ErrorHere("number")
|
ps.ErrorHere("number")
|
||||||
return Node{}
|
return Result{}
|
||||||
}
|
}
|
||||||
|
|
||||||
var result interface{}
|
var result interface{}
|
||||||
@ -138,10 +138,10 @@ func NumberLit() Parser {
|
|||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
ps.ErrorHere("number")
|
ps.ErrorHere("number")
|
||||||
return Node{}
|
return Result{}
|
||||||
}
|
}
|
||||||
ps.Pos = end
|
ps.Pos = end
|
||||||
return Node{Result: result}
|
return Result{Result: result}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -28,25 +28,25 @@ func TestStringLit(t *testing.T) {
|
|||||||
|
|
||||||
t.Run("test non match", func(t *testing.T) {
|
t.Run("test non match", func(t *testing.T) {
|
||||||
_, p := runParser(`1`, parser)
|
_, p := runParser(`1`, parser)
|
||||||
require.Equal(t, `"'`, p.Error.Expected)
|
require.Equal(t, `"'`, p.Error.expected)
|
||||||
require.Equal(t, `1`, p.Get())
|
require.Equal(t, `1`, p.Get())
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("test unterminated string", func(t *testing.T) {
|
t.Run("test unterminated string", func(t *testing.T) {
|
||||||
_, p := runParser(`"hello `, parser)
|
_, p := runParser(`"hello `, parser)
|
||||||
require.Equal(t, `"`, p.Error.Expected)
|
require.Equal(t, `"`, p.Error.expected)
|
||||||
require.Equal(t, `"hello `, p.Get())
|
require.Equal(t, `"hello `, p.Get())
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("test unmatched quotes", func(t *testing.T) {
|
t.Run("test unmatched quotes", func(t *testing.T) {
|
||||||
_, p := runParser(`"hello '`, parser)
|
_, p := runParser(`"hello '`, parser)
|
||||||
require.Equal(t, `"`, p.Error.Expected)
|
require.Equal(t, `"`, p.Error.expected)
|
||||||
require.Equal(t, 0, p.Pos)
|
require.Equal(t, 0, p.Pos)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("test unterminated escape", func(t *testing.T) {
|
t.Run("test unterminated escape", func(t *testing.T) {
|
||||||
_, p := runParser(`"hello \`, parser)
|
_, p := runParser(`"hello \`, parser)
|
||||||
require.Equal(t, `"`, p.Error.Expected)
|
require.Equal(t, `"`, p.Error.expected)
|
||||||
require.Equal(t, 0, p.Pos)
|
require.Equal(t, 0, p.Pos)
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -64,20 +64,20 @@ func TestStringLit(t *testing.T) {
|
|||||||
|
|
||||||
t.Run("test escaped unicode", func(t *testing.T) {
|
t.Run("test escaped unicode", func(t *testing.T) {
|
||||||
result, p := runParser(`"hello \ubeef cake"`, parser)
|
result, p := runParser(`"hello \ubeef cake"`, parser)
|
||||||
require.Equal(t, "", p.Error.Expected)
|
require.Equal(t, "", p.Error.expected)
|
||||||
require.Equal(t, "hello \uBEEF cake", result.Result)
|
require.Equal(t, "hello \uBEEF cake", result.Result)
|
||||||
require.Equal(t, ``, p.Get())
|
require.Equal(t, ``, p.Get())
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("test invalid escaped unicode", func(t *testing.T) {
|
t.Run("test invalid escaped unicode", func(t *testing.T) {
|
||||||
_, p := runParser(`"hello \ucake"`, parser)
|
_, p := runParser(`"hello \ucake"`, parser)
|
||||||
require.Equal(t, "offset 9: Expected [a-f0-9]", p.Error.Error())
|
require.Equal(t, "offset 9: expected [a-f0-9]", p.Error.Error())
|
||||||
require.Equal(t, 0, p.Pos)
|
require.Equal(t, 0, p.Pos)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("test incomplete escaped unicode", func(t *testing.T) {
|
t.Run("test incomplete escaped unicode", func(t *testing.T) {
|
||||||
_, p := runParser(`"hello \uca"`, parser)
|
_, p := runParser(`"hello \uca"`, parser)
|
||||||
require.Equal(t, "offset 9: Expected [a-f0-9]{4}", p.Error.Error())
|
require.Equal(t, "offset 9: expected [a-f0-9]{4}", p.Error.Error())
|
||||||
require.Equal(t, 0, p.Pos)
|
require.Equal(t, 0, p.Pos)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -164,13 +164,13 @@ func TestNumberLit(t *testing.T) {
|
|||||||
|
|
||||||
t.Run("non matching string", func(t *testing.T) {
|
t.Run("non matching string", func(t *testing.T) {
|
||||||
_, p := runParser("foo", parser)
|
_, p := runParser("foo", parser)
|
||||||
require.Equal(t, "offset 0: Expected number", p.Error.Error())
|
require.Equal(t, "offset 0: expected number", p.Error.Error())
|
||||||
require.Equal(t, 0, p.Pos)
|
require.Equal(t, 0, p.Pos)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("invalid number", func(t *testing.T) {
|
t.Run("invalid number", func(t *testing.T) {
|
||||||
_, p := runParser("-.", parser)
|
_, p := runParser("-.", parser)
|
||||||
require.Equal(t, "offset 0: Expected number", p.Error.Error())
|
require.Equal(t, "offset 0: expected number", p.Error.Error())
|
||||||
require.Equal(t, 0, p.Pos)
|
require.Equal(t, 0, p.Pos)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
87
parser.go
87
parser.go
@ -1,18 +1,28 @@
|
|||||||
package goparsify
|
package goparsify
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Node struct {
|
// Result is the output of a parser. Usually only one of its fields will be set and should be though of
|
||||||
|
// more as a union type. having it avoids interface{} littered all through the parsing code and makes
|
||||||
|
// the it easy to do the two most common operations, getting a token and finding a child.
|
||||||
|
type Result struct {
|
||||||
Token string
|
Token string
|
||||||
Child []Node
|
Child []Result
|
||||||
Result interface{}
|
Result interface{}
|
||||||
}
|
}
|
||||||
|
|
||||||
type Parser func(*State) Node
|
// Parser is the workhorse of parsify. A parser takes a State and returns a result, consuming some
|
||||||
|
// of the State in the process.
|
||||||
|
// Given state is shared there are a few rules that should be followed:
|
||||||
|
// - A parser that errors must set state.Error
|
||||||
|
// - A parser that errors must not change state.Pos
|
||||||
|
// - A parser that consumed some input should advance state.Pos
|
||||||
|
type Parser func(*State) Result
|
||||||
|
|
||||||
// Parserish types are any type that can be turned into a Parser by Parsify
|
// Parserish types are any type that can be turned into a Parser by Parsify
|
||||||
// These currently include *Parser and string literals.
|
// These currently include *Parser and string literals.
|
||||||
@ -30,17 +40,22 @@ type Parser func(*State) Node
|
|||||||
// ```
|
// ```
|
||||||
type Parserish interface{}
|
type Parserish interface{}
|
||||||
|
|
||||||
|
// Parsify takes a Parserish and makes a Parser out of it. It should be called by
|
||||||
|
// any Parser that accepts a Parser as an argument. It should never be called during
|
||||||
|
// instead call it during parser creation so there is no runtime cost.
|
||||||
|
//
|
||||||
|
// See Parserish for details.
|
||||||
func Parsify(p Parserish) Parser {
|
func Parsify(p Parserish) Parser {
|
||||||
switch p := p.(type) {
|
switch p := p.(type) {
|
||||||
case nil:
|
case nil:
|
||||||
return nil
|
return nil
|
||||||
case func(*State) Node:
|
case func(*State) Result:
|
||||||
return NewParser("anonymous func", p)
|
return NewParser("anonymous func", p)
|
||||||
case Parser:
|
case Parser:
|
||||||
return p
|
return p
|
||||||
case *Parser:
|
case *Parser:
|
||||||
// Todo: Maybe capture this stack and on nil show it? Is there a good error library to do this?
|
// Todo: Maybe capture this stack and on nil show it? Is there a good error library to do this?
|
||||||
return func(ptr *State) Node {
|
return func(ptr *State) Result {
|
||||||
return (*p)(ptr)
|
return (*p)(ptr)
|
||||||
}
|
}
|
||||||
case string:
|
case string:
|
||||||
@ -50,6 +65,7 @@ func Parsify(p Parserish) Parser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ParsifyAll calls Parsify on all parsers
|
||||||
func ParsifyAll(parsers ...Parserish) []Parser {
|
func ParsifyAll(parsers ...Parserish) []Parser {
|
||||||
ret := make([]Parser, len(parsers))
|
ret := make([]Parser, len(parsers))
|
||||||
for i, parser := range parsers {
|
for i, parser := range parsers {
|
||||||
@ -58,53 +74,61 @@ func ParsifyAll(parsers ...Parserish) []Parser {
|
|||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WS will consume whitespace, it should only be needed when AutoWS is turned off
|
||||||
func WS() Parser {
|
func WS() Parser {
|
||||||
return NewParser("AutoWS", func(ps *State) Node {
|
return NewParser("AutoWS", func(ps *State) Result {
|
||||||
ps.WS()
|
ps.WS()
|
||||||
return Node{}
|
return Result{}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func ParseString(parser Parserish, input string) (result interface{}, remaining string, err error) {
|
// Run applies some input to a parser and returns the result, failing if the input isnt fully consumed.
|
||||||
|
// It is a convenience method for the most common way to invoke a parser.
|
||||||
|
func Run(parser Parserish, input string) (result interface{}, err error) {
|
||||||
p := Parsify(parser)
|
p := Parsify(parser)
|
||||||
ps := InputString(input)
|
ps := NewState(input)
|
||||||
|
|
||||||
ret := p(ps)
|
ret := p(ps)
|
||||||
ps.AutoWS()
|
ps.AutoWS()
|
||||||
|
|
||||||
if ps.Error.Expected != "" {
|
if ps.Error.expected != "" {
|
||||||
return nil, ps.Get(), ps.Error
|
return ret.Result, ps.Error
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret.Result, ps.Get(), nil
|
if ps.Get() != "" {
|
||||||
|
return ret.Result, errors.New("left unparsed: " + ps.Get())
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret.Result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Exact will fully match the exact string supplied, or error. The match will be stored in .Token
|
||||||
func Exact(match string) Parser {
|
func Exact(match string) Parser {
|
||||||
if len(match) == 1 {
|
if len(match) == 1 {
|
||||||
matchByte := match[0]
|
matchByte := match[0]
|
||||||
return NewParser(match, func(ps *State) Node {
|
return NewParser(match, func(ps *State) Result {
|
||||||
ps.AutoWS()
|
ps.AutoWS()
|
||||||
if ps.Pos >= len(ps.Input) || ps.Input[ps.Pos] != matchByte {
|
if ps.Pos >= len(ps.Input) || ps.Input[ps.Pos] != matchByte {
|
||||||
ps.ErrorHere(match)
|
ps.ErrorHere(match)
|
||||||
return Node{}
|
return Result{}
|
||||||
}
|
}
|
||||||
|
|
||||||
ps.Advance(1)
|
ps.Advance(1)
|
||||||
|
|
||||||
return Node{Token: match}
|
return Result{Token: match}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
return NewParser(match, func(ps *State) Node {
|
return NewParser(match, func(ps *State) Result {
|
||||||
ps.AutoWS()
|
ps.AutoWS()
|
||||||
if !strings.HasPrefix(ps.Get(), match) {
|
if !strings.HasPrefix(ps.Get(), match) {
|
||||||
ps.ErrorHere(match)
|
ps.ErrorHere(match)
|
||||||
return Node{}
|
return Result{}
|
||||||
}
|
}
|
||||||
|
|
||||||
ps.Advance(len(match))
|
ps.Advance(len(match))
|
||||||
|
|
||||||
return Node{Token: match}
|
return Result{Token: match}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -125,9 +149,9 @@ func parseRepetition(defaultMin, defaultMax int, repetition ...int) (min int, ma
|
|||||||
}
|
}
|
||||||
|
|
||||||
// parseMatcher turns a string in the format a-f01234A-F into:
|
// parseMatcher turns a string in the format a-f01234A-F into:
|
||||||
// - a set string of matches string(01234)
|
// - an alphabet of matches string(01234)
|
||||||
// - a set of ranges [][]rune{{'a', 'f'}, {'A', 'F'}}
|
// - a set of ranges [][]rune{{'a', 'f'}, {'A', 'F'}}
|
||||||
func parseMatcher(matcher string) (matches string, ranges [][]rune) {
|
func parseMatcher(matcher string) (alphabet string, ranges [][]rune) {
|
||||||
runes := []rune(matcher)
|
runes := []rune(matcher)
|
||||||
|
|
||||||
for i := 0; i < len(runes); i++ {
|
for i := 0; i < len(runes); i++ {
|
||||||
@ -141,29 +165,36 @@ func parseMatcher(matcher string) (matches string, ranges [][]rune) {
|
|||||||
ranges = append(ranges, []rune{end, start})
|
ranges = append(ranges, []rune{end, start})
|
||||||
}
|
}
|
||||||
} else if i+1 < len(runes) && runes[i] == '\\' {
|
} else if i+1 < len(runes) && runes[i] == '\\' {
|
||||||
matches += string(runes[i+1])
|
alphabet += string(runes[i+1])
|
||||||
} else {
|
} else {
|
||||||
matches += string(runes[i])
|
alphabet += string(runes[i])
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return matches, ranges
|
return alphabet, ranges
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Chars is the swiss army knife of character matches. It can match:
|
||||||
|
// - ranges: Chars("a-z") will match one or more lowercase letter
|
||||||
|
// - alphabets: Chars("abcd") will match one or more of the letters abcd in any order
|
||||||
|
// - min and max: Chars("a-z0-9", 4, 6) will match 4-6 lowercase alphanumeric characters
|
||||||
|
// the above can be combined in any order
|
||||||
func Chars(matcher string, repetition ...int) Parser {
|
func Chars(matcher string, repetition ...int) Parser {
|
||||||
return NewParser("["+matcher+"]", charsImpl(matcher, false, repetition...))
|
return NewParser("["+matcher+"]", charsImpl(matcher, false, repetition...))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NotChars accepts the full range of input from Chars, but it will stop when any
|
||||||
|
// character matches.
|
||||||
func NotChars(matcher string, repetition ...int) Parser {
|
func NotChars(matcher string, repetition ...int) Parser {
|
||||||
return NewParser("!["+matcher+"]", charsImpl(matcher, true, repetition...))
|
return NewParser("!["+matcher+"]", charsImpl(matcher, true, repetition...))
|
||||||
}
|
}
|
||||||
|
|
||||||
func charsImpl(matcher string, stopOn bool, repetition ...int) Parser {
|
func charsImpl(matcher string, stopOn bool, repetition ...int) Parser {
|
||||||
min, max := parseRepetition(1, -1, repetition...)
|
min, max := parseRepetition(1, -1, repetition...)
|
||||||
matches, ranges := parseMatcher(matcher)
|
alphabet, ranges := parseMatcher(matcher)
|
||||||
|
|
||||||
return func(ps *State) Node {
|
return func(ps *State) Result {
|
||||||
ps.AutoWS()
|
ps.AutoWS()
|
||||||
matched := 0
|
matched := 0
|
||||||
for ps.Pos+matched < len(ps.Input) {
|
for ps.Pos+matched < len(ps.Input) {
|
||||||
@ -173,7 +204,7 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser {
|
|||||||
|
|
||||||
r, w := utf8.DecodeRuneInString(ps.Input[ps.Pos+matched:])
|
r, w := utf8.DecodeRuneInString(ps.Input[ps.Pos+matched:])
|
||||||
|
|
||||||
anyMatched := strings.ContainsRune(matches, r)
|
anyMatched := strings.ContainsRune(alphabet, r)
|
||||||
if !anyMatched {
|
if !anyMatched {
|
||||||
for _, rng := range ranges {
|
for _, rng := range ranges {
|
||||||
if r >= rng[0] && r <= rng[1] {
|
if r >= rng[0] && r <= rng[1] {
|
||||||
@ -191,11 +222,11 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser {
|
|||||||
|
|
||||||
if matched < min {
|
if matched < min {
|
||||||
ps.ErrorHere(matcher)
|
ps.ErrorHere(matcher)
|
||||||
return Node{}
|
return Result{}
|
||||||
}
|
}
|
||||||
|
|
||||||
result := ps.Input[ps.Pos : ps.Pos+matched]
|
result := ps.Input[ps.Pos : ps.Pos+matched]
|
||||||
ps.Advance(matched)
|
ps.Advance(matched)
|
||||||
return Node{Token: result}
|
return Result{Token: result}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -9,17 +9,17 @@ import (
|
|||||||
func TestParsify(t *testing.T) {
|
func TestParsify(t *testing.T) {
|
||||||
|
|
||||||
t.Run("strings", func(t *testing.T) {
|
t.Run("strings", func(t *testing.T) {
|
||||||
require.Equal(t, "ff", Parsify("ff")(InputString("ffooo")).Token)
|
require.Equal(t, "ff", Parsify("ff")(NewState("ffooo")).Token)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("parsers", func(t *testing.T) {
|
t.Run("parsers", func(t *testing.T) {
|
||||||
require.Equal(t, "ff", Parsify(Chars("f"))(InputString("ffooo")).Token)
|
require.Equal(t, "ff", Parsify(Chars("f"))(NewState("ffooo")).Token)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("parser funcs", func(t *testing.T) {
|
t.Run("parser funcs", func(t *testing.T) {
|
||||||
node := Parsify(func(p *State) Node {
|
node := Parsify(func(p *State) Result {
|
||||||
return Node{Token: "hello"}
|
return Result{Token: "hello"}
|
||||||
})(InputString("ffooo"))
|
})(NewState("ffooo"))
|
||||||
|
|
||||||
require.Equal(t, "hello", node.Token)
|
require.Equal(t, "hello", node.Token)
|
||||||
})
|
})
|
||||||
@ -29,7 +29,7 @@ func TestParsify(t *testing.T) {
|
|||||||
parserfied := Parsify(&parser)
|
parserfied := Parsify(&parser)
|
||||||
parser = Chars("f")
|
parser = Chars("f")
|
||||||
|
|
||||||
node := parserfied(InputString("ffooo"))
|
node := parserfied(NewState("ffooo"))
|
||||||
require.Equal(t, "ff", node.Token)
|
require.Equal(t, "ff", node.Token)
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -41,10 +41,10 @@ func TestParsify(t *testing.T) {
|
|||||||
func TestParsifyAll(t *testing.T) {
|
func TestParsifyAll(t *testing.T) {
|
||||||
parsers := ParsifyAll("ff", "gg")
|
parsers := ParsifyAll("ff", "gg")
|
||||||
|
|
||||||
result := parsers[0](InputString("ffooo"))
|
result := parsers[0](NewState("ffooo"))
|
||||||
require.Equal(t, "ff", result.Token)
|
require.Equal(t, "ff", result.Token)
|
||||||
|
|
||||||
result = parsers[1](InputString("ffooo"))
|
result = parsers[1](NewState("ffooo"))
|
||||||
require.Equal(t, "", result.Token)
|
require.Equal(t, "", result.Token)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -63,19 +63,19 @@ func TestExact(t *testing.T) {
|
|||||||
|
|
||||||
t.Run("error", func(t *testing.T) {
|
t.Run("error", func(t *testing.T) {
|
||||||
_, ps := runParser("foobar", Exact("bar"))
|
_, ps := runParser("foobar", Exact("bar"))
|
||||||
require.Equal(t, "bar", ps.Error.Expected)
|
require.Equal(t, "bar", ps.Error.expected)
|
||||||
require.Equal(t, 0, ps.Pos)
|
require.Equal(t, 0, ps.Pos)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("error char", func(t *testing.T) {
|
t.Run("error char", func(t *testing.T) {
|
||||||
_, ps := runParser("foobar", Exact("o"))
|
_, ps := runParser("foobar", Exact("o"))
|
||||||
require.Equal(t, "o", ps.Error.Expected)
|
require.Equal(t, "o", ps.Error.expected)
|
||||||
require.Equal(t, 0, ps.Pos)
|
require.Equal(t, 0, ps.Pos)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("eof char", func(t *testing.T) {
|
t.Run("eof char", func(t *testing.T) {
|
||||||
_, ps := runParser("", Exact("o"))
|
_, ps := runParser("", Exact("o"))
|
||||||
require.Equal(t, "o", ps.Error.Expected)
|
require.Equal(t, "o", ps.Error.expected)
|
||||||
require.Equal(t, 0, ps.Pos)
|
require.Equal(t, 0, ps.Pos)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -104,13 +104,13 @@ func TestChars(t *testing.T) {
|
|||||||
|
|
||||||
t.Run("no match", func(t *testing.T) {
|
t.Run("no match", func(t *testing.T) {
|
||||||
_, ps := runParser("ffffff", Chars("0-9"))
|
_, ps := runParser("ffffff", Chars("0-9"))
|
||||||
require.Equal(t, "offset 0: Expected 0-9", ps.Error.Error())
|
require.Equal(t, "offset 0: expected 0-9", ps.Error.Error())
|
||||||
require.Equal(t, 0, ps.Pos)
|
require.Equal(t, 0, ps.Pos)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("no match with min", func(t *testing.T) {
|
t.Run("no match with min", func(t *testing.T) {
|
||||||
_, ps := runParser("ffffff", Chars("0-9", 4))
|
_, ps := runParser("ffffff", Chars("0-9", 4))
|
||||||
require.Equal(t, "0-9", ps.Error.Expected)
|
require.Equal(t, "0-9", ps.Error.expected)
|
||||||
require.Equal(t, 0, ps.Pos)
|
require.Equal(t, 0, ps.Pos)
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -134,26 +134,31 @@ func TestChars(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestParseString(t *testing.T) {
|
func TestParseString(t *testing.T) {
|
||||||
Y := Map("hello", func(n Node) Node { return Node{Result: n.Token} })
|
Y := Map("hello", func(n Result) Result { return Result{Result: n.Token} })
|
||||||
t.Run("partial match", func(t *testing.T) {
|
|
||||||
result, remaining, err := ParseString(Y, "hello world")
|
t.Run("full match", func(t *testing.T) {
|
||||||
|
result, err := Run(Y, "hello")
|
||||||
require.Equal(t, "hello", result)
|
require.Equal(t, "hello", result)
|
||||||
require.Equal(t, "world", remaining)
|
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("error", func(t *testing.T) {
|
t.Run("partial match", func(t *testing.T) {
|
||||||
result, remaining, err := ParseString(Y, "world")
|
result, err := Run(Y, "hello world")
|
||||||
require.Nil(t, result)
|
require.Equal(t, "hello", result)
|
||||||
require.Equal(t, "world", remaining)
|
|
||||||
require.Error(t, err)
|
require.Error(t, err)
|
||||||
require.Equal(t, "offset 0: Expected hello", err.Error())
|
require.Equal(t, "left unparsed: world", err.Error())
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("error", func(t *testing.T) {
|
||||||
|
result, err := Run(Y, "world")
|
||||||
|
require.Nil(t, result)
|
||||||
|
require.Error(t, err)
|
||||||
|
require.Equal(t, "offset 0: expected hello", err.Error())
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func runParser(input string, parser Parser) (Result, *State) {
|
||||||
func runParser(input string, parser Parser) (Node, *State) {
|
ps := NewState(input)
|
||||||
ps := InputString(input)
|
|
||||||
result := parser(ps)
|
result := parser(ps)
|
||||||
return result, ps
|
return result, ps
|
||||||
}
|
}
|
||||||
|
110
state.go
110
state.go
@ -4,63 +4,38 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Error represents a parse error. These will often be set, the parser will back up a little and
|
||||||
|
// find another viable path. In general when combining errors the longest error should be returned.
|
||||||
type Error struct {
|
type Error struct {
|
||||||
pos int
|
pos int
|
||||||
Expected string
|
expected string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e Error) Pos() int { return e.pos }
|
// Pos is the offset into the document the error was found
|
||||||
func (e Error) Error() string { return fmt.Sprintf("offset %d: Expected %s", e.pos, e.Expected) }
|
func (e Error) Pos() int { return e.pos }
|
||||||
|
|
||||||
|
// Error satisfies the golang error interface
|
||||||
|
func (e Error) Error() string { return fmt.Sprintf("offset %d: expected %s", e.pos, e.expected) }
|
||||||
|
|
||||||
|
// WSFunc matches a byte and returns true if it is whitespace
|
||||||
type WSFunc func(c byte) bool
|
type WSFunc func(c byte) bool
|
||||||
|
|
||||||
|
// State is the current parse state. It is entirely public because parsers are expected to mutate it during the parse.
|
||||||
type State struct {
|
type State struct {
|
||||||
Input string
|
// The full input string
|
||||||
Pos int
|
Input string
|
||||||
Error Error
|
// An offset into the string, pointing to the current tip
|
||||||
|
Pos int
|
||||||
|
// Error is a secondary return channel from parsers, but used so heavily
|
||||||
|
// in backtracking that it has been inlined to avoid allocations.
|
||||||
|
Error Error
|
||||||
|
// Called to determine what to ignore when WS is called, or when AutoWS fires
|
||||||
WSFunc WSFunc
|
WSFunc WSFunc
|
||||||
NoAutoWS bool
|
NoAutoWS bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *State) Advance(i int) {
|
// NewState creates a new State from a string
|
||||||
s.Pos += i
|
func NewState(input string) *State {
|
||||||
}
|
|
||||||
|
|
||||||
// AutoWS consumes all whitespace
|
|
||||||
func (s *State) AutoWS() {
|
|
||||||
if s.NoAutoWS {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
s.WS()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *State) WS() {
|
|
||||||
for s.Pos < len(s.Input) && s.WSFunc(s.Input[s.Pos]) {
|
|
||||||
s.Pos++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *State) Get() string {
|
|
||||||
if s.Pos > len(s.Input) {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
return s.Input[s.Pos:]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *State) ErrorHere(expected string) {
|
|
||||||
s.Error.pos = s.Pos
|
|
||||||
s.Error.Expected = expected
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *State) ClearError() {
|
|
||||||
s.Error.Expected = ""
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *State) Errored() bool {
|
|
||||||
return s.Error.Expected != ""
|
|
||||||
}
|
|
||||||
|
|
||||||
func InputString(input string) *State {
|
|
||||||
return &State{
|
return &State{
|
||||||
Input: input,
|
Input: input,
|
||||||
WSFunc: func(b byte) bool {
|
WSFunc: func(b byte) bool {
|
||||||
@ -72,3 +47,48 @@ func InputString(input string) *State {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Advance the Pos along by i bytes
|
||||||
|
func (s *State) Advance(i int) {
|
||||||
|
s.Pos += i
|
||||||
|
}
|
||||||
|
|
||||||
|
// AutoWS consumes all whitespace and advances Pos but can be disabled by the NoAutWS() parser.
|
||||||
|
func (s *State) AutoWS() {
|
||||||
|
if s.NoAutoWS {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s.WS()
|
||||||
|
}
|
||||||
|
|
||||||
|
// WS consumes all whitespace and advances Pos.
|
||||||
|
func (s *State) WS() {
|
||||||
|
for s.Pos < len(s.Input) && s.WSFunc(s.Input[s.Pos]) {
|
||||||
|
s.Pos++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the remaining input.
|
||||||
|
func (s *State) Get() string {
|
||||||
|
if s.Pos > len(s.Input) {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return s.Input[s.Pos:]
|
||||||
|
}
|
||||||
|
|
||||||
|
// ErrorHere raises an error at the current position.
|
||||||
|
func (s *State) ErrorHere(expected string) {
|
||||||
|
s.Error.pos = s.Pos
|
||||||
|
s.Error.expected = expected
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover from the current error. Often called by combinators that can match
|
||||||
|
// when one of their children succeed, but others have failed.
|
||||||
|
func (s *State) Recover() {
|
||||||
|
s.Error.expected = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Errored returns true if the current parser has failed.
|
||||||
|
func (s *State) Errored() bool {
|
||||||
|
return s.Error.expected != ""
|
||||||
|
}
|
||||||
|
@ -7,7 +7,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func TestState_Advance(t *testing.T) {
|
func TestState_Advance(t *testing.T) {
|
||||||
ps := InputString("fooo")
|
ps := NewState("fooo")
|
||||||
require.Equal(t, 0, ps.Pos)
|
require.Equal(t, 0, ps.Pos)
|
||||||
ps.Advance(2)
|
ps.Advance(2)
|
||||||
require.Equal(t, 2, ps.Pos)
|
require.Equal(t, 2, ps.Pos)
|
||||||
@ -16,7 +16,7 @@ func TestState_Advance(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestState_Get(t *testing.T) {
|
func TestState_Get(t *testing.T) {
|
||||||
ps := InputString("fooo")
|
ps := NewState("fooo")
|
||||||
require.Equal(t, "fooo", ps.Get())
|
require.Equal(t, "fooo", ps.Get())
|
||||||
ps.Advance(1)
|
ps.Advance(1)
|
||||||
require.Equal(t, "ooo", ps.Get())
|
require.Equal(t, "ooo", ps.Get())
|
||||||
@ -27,19 +27,19 @@ func TestState_Get(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestState_Errors(t *testing.T) {
|
func TestState_Errors(t *testing.T) {
|
||||||
ps := InputString("fooo")
|
ps := NewState("fooo")
|
||||||
|
|
||||||
ps.ErrorHere("hello")
|
ps.ErrorHere("hello")
|
||||||
require.Equal(t, "offset 0: Expected hello", ps.Error.Error())
|
require.Equal(t, "offset 0: expected hello", ps.Error.Error())
|
||||||
require.Equal(t, 0, ps.Error.Pos())
|
require.Equal(t, 0, ps.Error.Pos())
|
||||||
require.True(t, ps.Errored())
|
require.True(t, ps.Errored())
|
||||||
|
|
||||||
ps.ClearError()
|
ps.Recover()
|
||||||
require.False(t, ps.Errored())
|
require.False(t, ps.Errored())
|
||||||
|
|
||||||
ps.Advance(2)
|
ps.Advance(2)
|
||||||
ps.ErrorHere("hello2")
|
ps.ErrorHere("hello2")
|
||||||
require.Equal(t, "offset 2: Expected hello2", ps.Error.Error())
|
require.Equal(t, "offset 2: expected hello2", ps.Error.Error())
|
||||||
require.Equal(t, 2, ps.Error.Pos())
|
require.Equal(t, 2, ps.Error.Pos())
|
||||||
require.True(t, ps.Errored())
|
require.True(t, ps.Errored())
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user