Eliminate a bunch of allocations

This commit is contained in:
Adam Scarr 2017-08-06 23:32:10 +10:00
parent 9d7779e8ca
commit 666ea93dba
16 changed files with 459 additions and 459 deletions

View File

@ -5,12 +5,13 @@ import (
"fmt"
)
func Nil(p Pointer) (Node, Pointer) {
return nil, p
func Nil(ps *State) interface{} {
return nil
}
func Never(p Pointer) (Node, Pointer) {
return Error{p.pos, "Never matches"}, p
func Never(ps *State) interface{} {
ps.ErrorHere("not anything")
return nil
}
func And(parsers ...Parserish) Parser {
@ -18,23 +19,22 @@ func And(parsers ...Parserish) Parser {
return Nil
}
ps := ParsifyAll(parsers...)
parserfied := ParsifyAll(parsers...)
return func(p Pointer) (Node, Pointer) {
var nodes = make([]Node, 0, len(ps))
var node Node
newP := p
for _, parser := range ps {
node, newP = parser(newP)
if node == nil {
continue
}
if IsError(node) {
return node, p
return func(ps *State) interface{} {
var nodes = make([]interface{}, 0, len(parserfied))
startpos := ps.Pos
for _, parser := range parserfied {
node := parser(ps)
if ps.Errored() {
ps.Pos = startpos
return nil
}
if node != nil {
nodes = append(nodes, node)
}
return nodes, newP
}
return nodes
}
}
@ -43,27 +43,26 @@ func Any(parsers ...Parserish) Parser {
return Nil
}
ps := ParsifyAll(parsers...)
parserfied := ParsifyAll(parsers...)
return func(p Pointer) (Node, Pointer) {
errors := []Error{}
for _, parser := range ps {
node, newP := parser(p)
if err, isErr := node.(Error); isErr {
errors = append(errors, err)
return func(ps *State) interface{} {
longestError := Error{}
startpos := ps.Pos
for _, parser := range parserfied {
node := parser(ps)
if ps.Errored() {
if ps.Error.pos > longestError.pos {
longestError = ps.Error
}
ps.ClearError()
continue
}
return node, newP
return node
}
longestError := errors[0]
for _, e := range errors[1:] {
if e.pos > longestError.pos {
longestError = e
}
}
return longestError, p
ps.Error = longestError
ps.Pos = startpos
return nil
}
}
@ -91,67 +90,81 @@ func manyImpl(min int, op Parserish, until Parserish, sep ...Parserish) Parser {
sepParser = Parsify(sep[0])
}
return func(p Pointer) (Node, Pointer) {
var node Node
nodes := make([]Node, 0)
newP := p
return func(ps *State) interface{} {
var node interface{}
nodes := make([]interface{}, 0, 20)
startpos := ps.Pos
for {
if node, _ := untilParser(newP); !IsError(node) {
tempPos := ps.Pos
node = untilParser(ps)
if !ps.Errored() {
ps.Pos = tempPos
if len(nodes) < min {
return NewError(newP.pos, "Unexpected input"), p
ps.Pos = startpos
ps.ErrorHere("something else")
return nil
}
break
}
ps.ClearError()
node = opParser(ps)
if ps.Errored() {
if len(nodes) < min {
ps.Pos = startpos
return nil
}
ps.ClearError()
break
}
if node, newP = opParser(newP); IsError(node) {
if len(nodes) < min {
return node, p
}
break
}
nodes = append(nodes, node)
if node, newP = sepParser(newP); IsError(node) {
node = sepParser(ps)
if ps.Errored() {
ps.ClearError()
break
}
}
return nodes, newP
return nodes
}
}
func Maybe(parser Parserish) Parser {
realParser := Parsify(parser)
parserfied := Parsify(parser)
return func(p Pointer) (Node, Pointer) {
node, newP := realParser(p)
if IsError(node) {
return nil, p
return func(ps *State) interface{} {
node := parserfied(ps)
if ps.Errored() {
ps.ClearError()
return nil
}
return node, newP
return node
}
}
func Map(parser Parserish, f func(n Node) Node) Parser {
func Map(parser Parserish, f func(n interface{}) interface{}) Parser {
p := Parsify(parser)
return func(ptr Pointer) (Node, Pointer) {
node, newPtr := p(ptr)
if IsError(node) {
return node, ptr
return func(ps *State) interface{} {
node := p(ps)
if ps.Errored() {
return nil
}
return f(node), newPtr
return f(node)
}
}
func flatten(n Node) string {
func flatten(n interface{}) interface{} {
if s, ok := n.(string); ok {
return s
}
if nodes, ok := n.([]Node); ok {
if nodes, ok := n.([]interface{}); ok {
sbuf := &bytes.Buffer{}
for _, node := range nodes {
sbuf.WriteString(flatten(node))
sbuf.WriteString(flatten(node).(string))
}
return sbuf.String()
}
@ -160,7 +173,5 @@ func flatten(n Node) string {
}
func Merge(parser Parserish) Parser {
return Map(parser, func(n Node) Node {
return flatten(n)
})
return Map(parser, flatten)
}

View File

@ -7,26 +7,35 @@ import (
)
func TestNil(t *testing.T) {
p := Pointer{"hello world", 0}
node, p2 := runParser("hello world", Nil)
node, p2 := Nil(p)
require.Equal(t, nil, node)
require.Equal(t, p, p2)
require.Equal(t, 0, p2.Pos)
require.False(t, p2.Errored())
}
func TestNever(t *testing.T) {
node, p2 := runParser("hello world", Never)
require.Equal(t, nil, node)
require.Equal(t, 0, p2.Pos)
require.True(t, p2.Errored())
}
func TestAnd(t *testing.T) {
p := Pointer{"hello world", 0}
parser := And("hello", WS, "world")
t.Run("matches sequence", func(t *testing.T) {
node, p2 := And("hello", WS, "world")(p)
require.Equal(t, []Node{"hello", "world"}, node)
node, p2 := runParser("hello world", parser)
require.Equal(t, []interface{}{"hello", "world"}, node)
require.Equal(t, "", p2.Get())
})
t.Run("returns errors", func(t *testing.T) {
e, p3 := And("hello", WS, "there")(p)
require.Equal(t, NewError(6, "Expected there"), e)
require.Equal(t, 0, p3.pos)
_, p2 := runParser("hello there", parser)
require.Equal(t, "world", p2.Error.Expected)
require.Equal(t, 6, p2.Error.pos)
require.Equal(t, 0, p2.Pos)
})
t.Run("No parsers", func(t *testing.T) {
@ -35,44 +44,42 @@ func TestAnd(t *testing.T) {
}
func TestMaybe(t *testing.T) {
p := Pointer{"hello world", 0}
t.Run("matches sequence", func(t *testing.T) {
node, p2 := Maybe("hello")(p)
node, p2 := runParser("hello world", Maybe("hello"))
require.Equal(t, "hello", node)
require.Equal(t, " world", p2.Get())
})
t.Run("returns no errors", func(t *testing.T) {
e, p3 := Maybe("world")(p)
require.Equal(t, nil, e)
require.Equal(t, 0, p3.pos)
node, p3 := runParser("hello world", Maybe("world"))
require.Equal(t, nil, node)
require.False(t, p3.Errored())
require.Equal(t, 0, p3.Pos)
})
}
func TestAny(t *testing.T) {
p := Pointer{"hello world!", 0}
t.Run("Matches any", func(t *testing.T) {
node, p2 := Any("hello", "world")(p)
node, p2 := runParser("hello world!", Any("hello", "world"))
require.Equal(t, "hello", node)
require.Equal(t, 5, p2.pos)
require.Equal(t, 5, p2.Pos)
})
t.Run("Returns longest error", func(t *testing.T) {
err, p2 := Any(
Exact("nope"),
And(Exact("hello"), WS, Exact("world"), Exact(".")),
And(Exact("hello"), WS, Exact("brother")),
)(p)
require.Equal(t, NewError(11, "Expected ."), err)
require.Equal(t, 0, p2.pos)
_, p2 := runParser("hello world!", Any(
"nope",
And("hello", WS, "world", "."),
And("hello", WS, "brother"),
))
require.Equal(t, "offset 11: Expected .", p2.Error.Error())
require.Equal(t, 11, p2.Error.Pos())
require.Equal(t, 0, p2.Pos)
})
t.Run("Accepts nil matches", func(t *testing.T) {
node, p2 := Any(Exact("ffffff"), WS)(p)
node, p2 := runParser("hello world!", Any(Exact("ffffff"), WS))
require.Equal(t, nil, node)
require.Equal(t, 0, p2.pos)
require.Equal(t, 0, p2.Pos)
})
t.Run("No parsers", func(t *testing.T) {
@ -81,87 +88,79 @@ func TestAny(t *testing.T) {
}
func TestKleene(t *testing.T) {
p := Pointer{"a,b,c,d,e,", 0}
t.Run("Matches sequence with sep", func(t *testing.T) {
node, p2 := Kleene(CharRun("abcdefg"), Exact(","))(p)
require.Equal(t, []Node{"a", "b", "c", "d", "e"}, node)
require.Equal(t, 10, p2.pos)
node, p2 := runParser("a,b,c,d,e,", Kleene(Chars("a-g"), ","))
require.False(t, p2.Errored())
require.Equal(t, []interface{}{"a", "b", "c", "d", "e"}, node)
require.Equal(t, 10, p2.Pos)
})
t.Run("Matches sequence without sep", func(t *testing.T) {
node, p2 := Kleene(Any(CharRun("abcdefg"), Exact(",")))(p)
require.Equal(t, []Node{"a", ",", "b", ",", "c", ",", "d", ",", "e", ","}, node)
require.Equal(t, 10, p2.pos)
node, p2 := runParser("a,b,c,d,e,", Kleene(Any(Chars("a-g"), ",")))
require.Equal(t, []interface{}{"a", ",", "b", ",", "c", ",", "d", ",", "e", ","}, node)
require.Equal(t, 10, p2.Pos)
})
t.Run("Stops on error", func(t *testing.T) {
node, p2 := Kleene(CharRun("abc"), Exact(","))(p)
require.Equal(t, []Node{"a", "b", "c"}, node)
require.Equal(t, 6, p2.pos)
node, p2 := runParser("a,b,c,d,e,", Kleene(Chars("a-c"), ","))
require.Equal(t, []interface{}{"a", "b", "c"}, node)
require.Equal(t, 6, p2.Pos)
require.Equal(t, "d,e,", p2.Get())
})
}
func TestMany(t *testing.T) {
p := Pointer{"a,b,c,d,e,", 0}
t.Run("Matches sequence with sep", func(t *testing.T) {
node, p2 := Many(CharRun("abcdefg"), Exact(","))(p)
require.Equal(t, []Node{"a", "b", "c", "d", "e"}, node)
require.Equal(t, 10, p2.pos)
node, p2 := runParser("a,b,c,d,e,", Many(Chars("a-g"), Exact(",")))
require.Equal(t, []interface{}{"a", "b", "c", "d", "e"}, node)
require.Equal(t, 10, p2.Pos)
})
t.Run("Matches sequence without sep", func(t *testing.T) {
node, p2 := Many(Any(CharRun("abcdefg"), Exact(",")))(p)
require.Equal(t, []Node{"a", ",", "b", ",", "c", ",", "d", ",", "e", ","}, node)
require.Equal(t, 10, p2.pos)
node, p2 := runParser("a,b,c,d,e,", Many(Any(Chars("abcdefg"), Exact(","))))
require.Equal(t, []interface{}{"a", ",", "b", ",", "c", ",", "d", ",", "e", ","}, node)
require.Equal(t, 10, p2.Pos)
})
t.Run("Stops on error", func(t *testing.T) {
node, p2 := Many(CharRun("abc"), Exact(","))(p)
require.Equal(t, []Node{"a", "b", "c"}, node)
require.Equal(t, 6, p2.pos)
node, p2 := runParser("a,b,c,d,e,", Many(Chars("abc"), Exact(",")))
require.Equal(t, []interface{}{"a", "b", "c"}, node)
require.Equal(t, 6, p2.Pos)
require.Equal(t, "d,e,", p2.Get())
})
t.Run("Returns error if nothing matches", func(t *testing.T) {
node, p2 := Many(CharRun("def"), Exact(","))(p)
require.Equal(t, NewError(0, "Expected some of def"), node)
require.Equal(t, 0, p2.pos)
_, p2 := runParser("a,b,c,d,e,", Many(Chars("def"), Exact(",")))
require.Equal(t, "offset 0: Expected def", p2.Error.Error())
require.Equal(t, "a,b,c,d,e,", p2.Get())
})
}
func TestKleeneUntil(t *testing.T) {
p := Pointer{"a,b,c,d,e,fg", 0}
t.Run("Matches sequence with sep", func(t *testing.T) {
node, p2 := KleeneUntil(CharRun("abcde"), CharRun("d"), Exact(","))(p)
require.Equal(t, []Node{"a", "b", "c"}, node)
require.Equal(t, 6, p2.pos)
node, p2 := runParser("a,b,c,d,e,fg", KleeneUntil(Chars("abcde"), "d", ","))
require.Equal(t, []interface{}{"a", "b", "c"}, node)
require.Equal(t, "d,e,fg", p2.Get())
})
t.Run("Breaks if separator does not match", func(t *testing.T) {
node, p2 := KleeneUntil(Char("abcdefg"), Char("y"), Exact(","))(p)
require.Equal(t, []Node{"a", "b", "c", "d", "e", "f"}, node)
require.Equal(t, 11, p2.pos)
node, p2 := runParser("a,b,c,d,e,fg", KleeneUntil(Chars("abcdefg", 1, 1), "y", ","))
require.Equal(t, []interface{}{"a", "b", "c", "d", "e", "f"}, node)
require.Equal(t, "g", p2.Get())
})
}
func TestManyUntil(t *testing.T) {
p := Pointer{"a,b,c,d,e,", 0}
t.Run("Matches sequence until", func(t *testing.T) {
node, p2 := ManyUntil(CharRun("abcdefg"), Char("d"), Exact(","))(p)
require.Equal(t, []Node{"a", "b", "c"}, node)
require.Equal(t, 6, p2.pos)
node, p2 := runParser("a,b,c,d,e,", ManyUntil(Chars("abcdefg"), "d", ","))
require.Equal(t, []interface{}{"a", "b", "c"}, node)
require.Equal(t, 6, p2.Pos)
})
t.Run("Returns error until matches early", func(t *testing.T) {
node, p2 := ManyUntil(CharRun("abc"), Exact("a"), Exact(","))(p)
require.Equal(t, NewError(0, "Unexpected input"), node)
require.Equal(t, 0, p2.pos)
_, p2 := runParser("a,b,c,d,e,", ManyUntil(Chars("abc"), "a", ","))
require.Equal(t, "offset 0: Expected something else", p2.Error.Error())
require.Equal(t, 0, p2.Pos)
require.Equal(t, "a,b,c,d,e,", p2.Get())
})
}
@ -171,19 +170,19 @@ type htmlTag struct {
}
func TestMap(t *testing.T) {
parser := Map(And("<", Range("a-zA-Z0-9"), ">"), func(n Node) Node {
return htmlTag{n.([]Node)[1].(string)}
parser := Map(And("<", Chars("a-zA-Z0-9"), ">"), func(n interface{}) interface{} {
return htmlTag{n.([]interface{})[1].(string)}
})
t.Run("sucess", func(t *testing.T) {
result, _ := parser(Pointer{"<html>", 0})
result, _ := runParser("<html>", parser)
require.Equal(t, htmlTag{"html"}, result)
})
t.Run("error", func(t *testing.T) {
result, ptr := parser(Pointer{"<html", 0})
require.Equal(t, NewError(5, "Expected >"), result)
require.Equal(t, 0, ptr.pos)
_, ps := runParser("<html", parser)
require.Equal(t, "offset 5: Expected >", ps.Error.Error())
require.Equal(t, 0, ps.Pos)
})
}
@ -193,14 +192,14 @@ func TestMerge(t *testing.T) {
parser := Merge(bracer)
t.Run("sucess", func(t *testing.T) {
result, _ := parser(Pointer{"((()))", 0})
result, _ := runParser("((()))", parser)
require.Equal(t, "((()))", result)
})
t.Run("error", func(t *testing.T) {
result, ptr := parser(Pointer{"((())", 0})
require.Equal(t, NewError(5, "Expected )"), result)
require.Equal(t, 0, ptr.pos)
_, ps := runParser("((())", parser)
require.Equal(t, "offset 5: Expected )", ps.Error.Error())
require.Equal(t, 0, ps.Pos)
})
require.Panics(t, func() {
@ -209,8 +208,7 @@ func TestMerge(t *testing.T) {
}
func assertNilParser(t *testing.T, parser Parser) {
p := Pointer{"fff", 0}
node, p2 := parser(p)
node, p2 := runParser("fff", parser)
require.Equal(t, nil, node)
require.Equal(t, p, p2)
require.Equal(t, 0, p2.Pos)
}

View File

@ -2,32 +2,32 @@ package html
import . "github.com/vektah/goparsify"
func Parse(input string) (result Node, remaining string, err error) {
func Parse(input string) (result interface{}, remaining string, err error) {
return ParseString(tag, input)
}
type Tag struct {
Name string
Attributes map[string]string
Body []Node
Body []interface{}
}
var (
tag Parser
identifier = Merge(And(Range("a-z", 1, 1), Range("a-zA-Z0-9", 0)))
text = CharRunUntil("<>")
identifier = Merge(And(Chars("a-z", 1, 1), Chars("a-zA-Z0-9", 0)))
text = NotChars("<>")
element = Any(text, &tag)
elements = Kleene(element)
//attr := And(identifier, equal, String())
attr = And(WS, identifier, WS, "=", WS, Any(String('"'), String('\'')))
attrs = Map(Kleene(attr, WS), func(node Node) Node {
nodes := node.([]Node)
attrs = Map(Kleene(attr, WS), func(node interface{}) interface{} {
nodes := node.([]interface{})
attr := map[string]string{}
for _, attrNode := range nodes {
attrNodes := attrNode.([]Node)
attrNodes := attrNode.([]interface{})
attr[attrNodes[0].(string)] = attrNodes[2].(string)
}
@ -39,13 +39,13 @@ var (
)
func init() {
tag = Map(And(tstart, elements, tend), func(node Node) Node {
nodes := node.([]Node)
openTag := nodes[0].([]Node)
tag = Map(And(tstart, elements, tend), func(node interface{}) interface{} {
nodes := node.([]interface{})
openTag := nodes[0].([]interface{})
return Tag{
Name: openTag[1].(string),
Attributes: openTag[2].(map[string]string),
Body: nodes[1].([]Node),
Body: nodes[1].([]interface{}),
}
})

View File

@ -4,14 +4,13 @@ import (
"testing"
"github.com/stretchr/testify/require"
. "github.com/vektah/goparsify"
)
func TestParse(t *testing.T) {
result, _, err := Parse(`<body>hello <p color="blue">world</p></body>`)
require.NoError(t, err)
require.Equal(t, Tag{Name: "body", Attributes: map[string]string{}, Body: []Node{
require.Equal(t, Tag{Name: "body", Attributes: map[string]string{}, Body: []interface{}{
"hello ",
Tag{Name: "p", Attributes: map[string]string{"color": "blue"}, Body: []Node{"world"}},
Tag{Name: "p", Attributes: map[string]string{"color": "blue"}, Body: []interface{}{"world"}},
}}, result)
}

View File

@ -9,15 +9,15 @@ import (
var (
value Parser
array = Map(And(WS, "[", Kleene(&value, And(WS, ",")), "]"), func(n Node) Node {
return n.([]Node)[1].([]Node)
array = Map(And(WS, "[", Kleene(&value, And(WS, ",")), "]"), func(n interface{}) interface{} {
return n.([]interface{})[1].([]interface{})
})
properties = Kleene(And(WS, String('"'), WS, ":", WS, &value), ",")
object = Map(And(WS, "{", WS, properties, WS, "}"), func(n Node) Node {
object = Map(And(WS, "{", WS, properties, WS, "}"), func(n interface{}) interface{} {
ret := map[string]interface{}{}
for _, prop := range n.([]Node)[1].([]Node) {
vals := prop.([]Node)
for _, prop := range n.([]interface{})[1].([]interface{}) {
vals := prop.([]interface{})
if len(vals) == 3 {
ret[vals[0].(string)] = vals[2]
} else {
@ -28,20 +28,20 @@ var (
return ret
})
_null = Map(And(WS, "null"), func(n Node) Node {
_null = Map(And(WS, "null"), func(n interface{}) interface{} {
return nil
})
_true = Map(And(WS, "true"), func(n Node) Node {
_true = Map(And(WS, "true"), func(n interface{}) interface{} {
return true
})
_false = Map(And(WS, "false"), func(n Node) Node {
_false = Map(And(WS, "false"), func(n interface{}) interface{} {
return false
})
Y = Map(And(&value, WS), func(n Node) Node {
nodes := n.([]Node)
Y = Map(And(&value, WS), func(n interface{}) interface{} {
nodes := n.([]interface{})
if len(nodes) > 0 {
return nodes[0]
}

View File

@ -1,10 +1,11 @@
package json
import (
stdlibJson "encoding/json"
"testing"
parsecJson "github.com/prataprc/goparsec/json"
"github.com/stretchr/testify/require"
. "github.com/vektah/goparsify"
)
func TestUnmarshal(t *testing.T) {
@ -29,7 +30,7 @@ func TestUnmarshal(t *testing.T) {
t.Run("array", func(t *testing.T) {
result, err := Unmarshal(`[true, null, false]`)
require.NoError(t, err)
require.Equal(t, []Node{true, nil, false}, result)
require.Equal(t, []interface{}{true, nil, false}, result)
})
t.Run("object", func(t *testing.T) {
@ -41,16 +42,16 @@ func TestUnmarshal(t *testing.T) {
const benchmarkString = `{"true":true, "false":false, "null": null}`
//func BenchmarkUnmarshalParsec(b *testing.B) {
// bytes := []byte(benchmarkString)
//
// for i := 0; i < b.N; i++ {
// scanner := parsecJson.NewJSONScanner(bytes)
// _, remaining := parsecJson.Y(scanner)
//
// require.True(b, remaining.Endof())
// }
//}
func BenchmarkUnmarshalParsec(b *testing.B) {
bytes := []byte(benchmarkString)
for i := 0; i < b.N; i++ {
scanner := parsecJson.NewJSONScanner(bytes)
_, remaining := parsecJson.Y(scanner)
require.True(b, remaining.Endof())
}
}
func BenchmarkUnmarshalParsify(b *testing.B) {
for i := 0; i < b.N; i++ {
@ -59,12 +60,11 @@ func BenchmarkUnmarshalParsify(b *testing.B) {
}
}
//
//func BenchmarkUnmarshalStdlib(b *testing.B) {
// bytes := []byte(benchmarkString)
// var result interface{}
// for i := 0; i < b.N; i++ {
// err := stdlibJson.Unmarshal(bytes, &result)
// require.NoError(b, err)
// }
//}
func BenchmarkUnmarshalStdlib(b *testing.B) {
bytes := []byte(benchmarkString)
var result interface{}
for i := 0; i < b.N; i++ {
err := stdlibJson.Unmarshal(bytes, &result)
require.NoError(b, err)
}
}

View File

@ -1,3 +1,3 @@
go build
profile.exe -cpuprofile cpu.out
go tool pprof --inuse_objects profile.exe cpu.out
go tool pprof profile.exe cpu.out

View File

@ -31,11 +31,13 @@ func main() {
}
}()
}
max := 1000000
if *memprofile != "" {
runtime.MemProfileRate = 1
max = 10000
}
for i := 0; i < 10000; i++ {
for i := 0; i < max; i++ {
_, err := json.Unmarshal(`{"true":true, "false":false, "null": null}`)
if err != nil {
panic(err)

View File

@ -1,3 +1,3 @@
go build
profile.exe -memprofile mem.out
go tool pprof --inuse_objects profile.exe mem.out
go tool pprof profile.exe mem.out

View File

@ -1,23 +0,0 @@
package parsec
import "fmt"
type Node interface {
}
type Error struct {
pos int
Message string
}
func (e Error) Pos() int { return e.pos }
func (e Error) Error() string { return fmt.Sprintf("offset %d: %s", e.pos, e.Message) }
func NewError(pos int, message string) Error {
return Error{pos, message}
}
func IsError(n interface{}) bool {
_, isErr := n.(Error)
return isErr
}

170
parser.go
View File

@ -7,7 +7,7 @@ import (
"unicode/utf8"
)
type Parser func(Pointer) (Node, Pointer)
type Parser func(*State) interface{}
// Parserish types are any type that can be turned into a Parser by Parsify
// These currently include *Parser and string literals.
@ -27,13 +27,13 @@ type Parserish interface{}
func Parsify(p Parserish) Parser {
switch p := p.(type) {
case func(Pointer) (Node, Pointer):
case func(*State) interface{}:
return Parser(p)
case Parser:
return p
case *Parser:
// Todo: Maybe capture this stack and on nil show it? Is there a good error library to do this?
return func(ptr Pointer) (Node, Pointer) {
return func(ptr *State) interface{} {
return (*p)(ptr)
}
case string:
@ -51,80 +51,34 @@ func ParsifyAll(parsers ...Parserish) []Parser {
return ret
}
func ParseString(parser Parserish, input string) (result Node, remaining string, err error) {
func ParseString(parser Parserish, input string) (result interface{}, remaining string, err error) {
p := Parsify(parser)
result, pointer := p(Pointer{input, 0})
ps := &State{input, 0, Error{}}
result = p(ps)
if err, isErr := result.(Error); isErr {
return nil, pointer.Get(), err
if ps.Error.Expected != "" {
return nil, ps.Get(), ps.Error
}
return result, pointer.Get(), nil
return result, ps.Get(), nil
}
func Exact(match string) Parser {
return func(p Pointer) (Node, Pointer) {
if !strings.HasPrefix(p.Get(), match) {
return NewError(p.pos, "Expected "+match), p
return func(ps *State) interface{} {
if !strings.HasPrefix(ps.Get(), match) {
ps.ErrorHere(match)
return nil
}
return match, p.Advance(len(match))
ps.Advance(len(match))
return match
}
}
func Char(match string) Parser {
return func(p Pointer) (Node, Pointer) {
r, w := utf8.DecodeRuneInString(p.Get())
if !strings.ContainsRune(match, r) {
return NewError(p.pos, "Expected one of "+string(match)), p
}
return string(r), p.Advance(w)
}
}
func CharRun(match string) Parser {
return func(p Pointer) (Node, Pointer) {
matched := 0
for p.pos+matched < len(p.input) {
r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:])
if !strings.ContainsRune(match, r) {
break
}
matched += w
}
if matched == 0 {
return NewError(p.pos, "Expected some of "+match), p
}
return p.input[p.pos : p.pos+matched], p.Advance(matched)
}
}
func CharRunUntil(match string) Parser {
return func(p Pointer) (Node, Pointer) {
matched := 0
for p.pos+matched < len(p.input) {
r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:])
if strings.ContainsRune(match, r) {
break
}
matched += w
}
if matched == 0 {
return NewError(p.pos, "Expected some of "+match), p
}
return p.input[p.pos : p.pos+matched], p.Advance(matched)
}
}
func Range(r string, repetition ...int) Parser {
min := int(1)
max := int(-1)
func parseRepetition(defaultMin, defaultMax int, repetition ...int) (min int, max int) {
min = defaultMin
max = defaultMax
switch len(repetition) {
case 0:
case 1:
@ -135,14 +89,18 @@ func Range(r string, repetition ...int) Parser {
default:
panic(fmt.Errorf("Dont know what %d repetion args mean", len(repetition)))
}
return min, max
}
runes := []rune(r)
if len(runes)%3 != 0 {
panic("ranges should be in the form a-z0-9")
}
// parseMatcher turns a string in the format a-f01234A-F into:
// - a set string of matches string(01234)
// - a set of ranges [][]rune{{'a', 'f'}, {'A', 'F'}}
func parseMatcher(matcher string) (matches string, ranges [][]rune) {
runes := []rune(matcher)
var ranges [][]rune
for i := 0; i < len(runes); i += 3 {
for i := 0; i < len(runes); i++ {
if i+2 < len(runes) && runes[i+1] == '-' {
start := runes[i]
end := runes[i+2]
if start <= end {
@ -150,24 +108,48 @@ func Range(r string, repetition ...int) Parser {
} else {
ranges = append(ranges, []rune{end, start})
}
} else if i+1 < len(runes) && runes[i] == '\\' {
matches += string(runes[i+1])
} else {
matches += string(runes[i])
}
return func(p Pointer) (Node, Pointer) {
}
return matches, ranges
}
func Chars(matcher string, repetition ...int) Parser {
return charsImpl(matcher, false, repetition...)
}
func NotChars(matcher string, repetition ...int) Parser {
return charsImpl(matcher, true, repetition...)
}
func charsImpl(matcher string, stopOn bool, repetition ...int) Parser {
min, max := parseRepetition(1, -1, repetition...)
matches, ranges := parseMatcher(matcher)
return func(ps *State) interface{} {
matched := 0
for p.pos+matched < len(p.input) {
for ps.Pos+matched < len(ps.Input) {
if max != -1 && matched >= max {
break
}
r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:])
r, w := utf8.DecodeRuneInString(ps.Input[ps.Pos+matched:])
anyMatched := false
anyMatched := strings.ContainsRune(matches, r)
if !anyMatched {
for _, rng := range ranges {
if r >= rng[0] && r <= rng[1] {
anyMatched = true
}
}
if !anyMatched {
}
if anyMatched == stopOn {
break
}
@ -175,47 +157,55 @@ func Range(r string, repetition ...int) Parser {
}
if matched < min {
return NewError(p.pos+matched, fmt.Sprintf("Expected at least %d more of %s", min-matched, r)), p
ps.ErrorHere(matcher)
return nil
}
return p.input[p.pos : p.pos+matched], p.Advance(matched)
result := ps.Input[ps.Pos : ps.Pos+matched]
ps.Advance(matched)
return result
}
}
func WS(p Pointer) (Node, Pointer) {
_, p2 := CharRun("\t\n\v\f\r \x85\xA0")(p)
return nil, p2
var ws = Chars("\t\n\v\f\r \x85\xA0", 0)
func WS(ps *State) interface{} {
ws(ps)
return nil
}
func String(quote rune) Parser {
return func(p Pointer) (Node, Pointer) {
return func(ps *State) interface{} {
var r rune
var w int
r, w = utf8.DecodeRuneInString(p.input[p.pos:])
var matched int
r, matched = utf8.DecodeRuneInString(ps.Input[ps.Pos:])
if r != quote {
return NewError(p.pos, `Expected "`), p
ps.ErrorHere("\"")
return nil
}
matched := w
result := &bytes.Buffer{}
for p.pos+matched < len(p.input) {
r, w = utf8.DecodeRuneInString(p.input[p.pos+matched:])
for ps.Pos+matched < len(ps.Input) {
r, w = utf8.DecodeRuneInString(ps.Input[ps.Pos+matched:])
matched += w
if r == '\\' {
r, w = utf8.DecodeRuneInString(p.input[p.pos+matched:])
r, w = utf8.DecodeRuneInString(ps.Input[ps.Pos+matched:])
result.WriteRune(r)
matched += w
continue
}
if r == quote {
return result.String(), p.Advance(matched)
ps.Advance(matched)
return result.String()
}
result.WriteRune(r)
}
return NewError(p.pos, "Unterminated string"), p
ps.ErrorHere("\"")
return nil
}
}

View File

@ -7,31 +7,29 @@ import (
)
func TestParsify(t *testing.T) {
p := Pointer{"ffooo", 0}
t.Run("strings", func(t *testing.T) {
node, _ := Parsify("ff")(p)
require.Equal(t, "ff", node)
require.Equal(t, "ff", Parsify("ff")(InputString("ffooo")))
})
t.Run("parsers", func(t *testing.T) {
node, _ := Parsify(CharRun("f"))(p)
require.Equal(t, "ff", node)
require.Equal(t, "ff", Parsify(Chars("f"))(InputString("ffooo")))
})
t.Run("parser funcs", func(t *testing.T) {
node, _ := Parsify(func(p Pointer) (Node, Pointer) {
return "hello", p
})(p)
node := Parsify(func(p *State) interface{} {
return "hello"
})(InputString("ffooo"))
require.Equal(t, "hello", node)
})
t.Run("*parsers", func(t *testing.T) {
var parser Parser
parserfied := Parsify(&parser)
parser = CharRun("f")
parser = Chars("f")
node, _ := parserfied(p)
node := parserfied(InputString("ffooo"))
require.Equal(t, "ff", node)
})
@ -43,122 +41,77 @@ func TestParsify(t *testing.T) {
func TestParsifyAll(t *testing.T) {
parsers := ParsifyAll("ff", "gg")
result, _ := parsers[0](Pointer{"ffooo", 0})
result := parsers[0](InputString("ffooo"))
require.Equal(t, "ff", result)
result, _ = parsers[1](Pointer{"ffooo", 0})
require.Equal(t, NewError(0, "Expected gg"), result)
result = parsers[1](InputString("ffooo"))
require.Equal(t, nil, result)
}
func TestExact(t *testing.T) {
p := Pointer{"fooo", 0}
t.Run("success", func(t *testing.T) {
node, p2 := Exact("fo")(p)
node, ps := runParser("foobar", Exact("fo"))
require.Equal(t, "fo", node)
require.Equal(t, p.Advance(2), p2)
require.Equal(t, "obar", ps.Get())
})
t.Run("error", func(t *testing.T) {
node, p2 := Exact("bar")(p)
require.Equal(t, NewError(0, "Expected bar"), node)
require.Equal(t, 0, p2.pos)
_, ps := runParser("foobar", Exact("bar"))
require.Equal(t, "bar", ps.Error.Expected)
require.Equal(t, 0, ps.Pos)
})
}
func TestChar(t *testing.T) {
p := Pointer{"foobar", 0}
t.Run("success", func(t *testing.T) {
node, p2 := Char("fo")(p)
require.Equal(t, "f", node)
require.Equal(t, p.Advance(1), p2)
})
t.Run("error", func(t *testing.T) {
node, p2 := Char("bar")(p)
require.Equal(t, NewError(0, "Expected one of bar"), node)
require.Equal(t, 0, p2.pos)
})
}
func TestCharRun(t *testing.T) {
p := Pointer{"foobar", 0}
t.Run("success", func(t *testing.T) {
node, p2 := CharRun("fo")(p)
require.Equal(t, "foo", node)
require.Equal(t, p.Advance(3), p2)
})
t.Run("error", func(t *testing.T) {
node, p2 := CharRun("bar")(p)
require.Equal(t, NewError(0, "Expected some of bar"), node)
require.Equal(t, 0, p2.pos)
})
}
func TestCharUntil(t *testing.T) {
p := Pointer{"foobar", 0}
t.Run("success", func(t *testing.T) {
node, p2 := CharRunUntil("z")(p)
require.Equal(t, "foobar", node)
require.Equal(t, p.Advance(6), p2)
})
t.Run("error", func(t *testing.T) {
node, p2 := CharRunUntil("f")(p)
require.Equal(t, NewError(0, "Expected some of f"), node)
require.Equal(t, 0, p2.pos)
})
}
func TestWS(t *testing.T) {
p := Pointer{" fooo", 0}
node, p2 := WS(p)
require.Equal(t, nil, node)
require.Equal(t, p.Advance(2), p2)
}
func TestRange(t *testing.T) {
func TestChars(t *testing.T) {
t.Run("full match", func(t *testing.T) {
node, p := Range("a-z")(Pointer{"foobar", 0})
node, ps := runParser("foobar", Chars("a-z"))
require.Equal(t, "foobar", node)
require.Equal(t, "", p.Get())
require.Equal(t, "", ps.Get())
require.False(t, ps.Errored())
})
t.Run("partial match", func(t *testing.T) {
node, p := Range("1-4d-a")(Pointer{"a1b2c3d4efg", 0})
node, ps := runParser("a1b2c3d4efg", Chars("1-4d-a"))
require.Equal(t, "a1b2c3d4", node)
require.Equal(t, "efg", p.Get())
require.Equal(t, "efg", ps.Get())
require.False(t, ps.Errored())
})
t.Run("limited match", func(t *testing.T) {
node, p := Range("1-4d-a", 1, 2)(Pointer{"a1b2c3d4efg", 0})
node, ps := runParser("a1b2c3d4efg", Chars("1-4d-a", 1, 2))
require.Equal(t, "a1", node)
require.Equal(t, "b2c3d4efg", p.Get())
require.Equal(t, "b2c3d4efg", ps.Get())
require.False(t, ps.Errored())
})
t.Run("no match", func(t *testing.T) {
node, p := Range("0-9")(Pointer{"ffffff", 0})
require.Equal(t, NewError(0, "Expected at least 1 more of 0-9"), node)
require.Equal(t, 0, p.pos)
_, ps := runParser("ffffff", Chars("0-9"))
require.Equal(t, "offset 0: Expected 0-9", ps.Error.Error())
require.Equal(t, 0, ps.Pos)
})
t.Run("no match with min", func(t *testing.T) {
node, p := Range("0-9", 4)(Pointer{"ffffff", 0})
require.Equal(t, NewError(0, "Expected at least 4 more of 0-9"), node)
require.Equal(t, 0, p.pos)
_, ps := runParser("ffffff", Chars("0-9", 4))
require.Equal(t, "0-9", ps.Error.Expected)
require.Equal(t, 0, ps.Pos)
})
t.Run("test exact matches", func(t *testing.T) {
node, ps := runParser("aaff", Chars("abcd"))
require.Equal(t, "aa", node)
require.Equal(t, 2, ps.Pos)
require.False(t, ps.Errored())
})
t.Run("test not matches", func(t *testing.T) {
node, ps := runParser("aaff", NotChars("ff"))
require.Equal(t, "aa", node)
require.Equal(t, 2, ps.Pos)
require.False(t, ps.Errored())
})
require.Panics(t, func() {
Range("abcd")
})
require.Panics(t, func() {
Range("a-b", 1, 2, 3)
Chars("a-b", 1, 2, 3)
})
}
@ -181,26 +134,48 @@ func TestParseString(t *testing.T) {
func TestString(t *testing.T) {
t.Run("test basic match", func(t *testing.T) {
result, p := String('"')(Pointer{`"hello"`, 0})
result, p := runParser(`"hello"`, String('"'))
require.Equal(t, `hello`, result)
require.Equal(t, "", p.Get())
})
t.Run("test non match", func(t *testing.T) {
result, p := String('"')(Pointer{`1`, 0})
require.Equal(t, NewError(0, `Expected "`), result)
_, p := runParser(`1`, String('"'))
require.Equal(t, `"`, p.Error.Expected)
require.Equal(t, `1`, p.Get())
})
t.Run("test unterminated string", func(t *testing.T) {
result, p := String('"')(Pointer{`"hello `, 0})
require.Equal(t, NewError(0, `Unterminated string`), result)
_, p := runParser(`"hello `, String('"'))
require.Equal(t, `"`, p.Error.Expected)
require.Equal(t, `"hello `, p.Get())
})
t.Run("test escaping", func(t *testing.T) {
result, p := String('"')(Pointer{`"hello \"world\""`, 0})
result, p := runParser(`"hello \"world\""`, String('"'))
require.Equal(t, `hello "world"`, result)
require.Equal(t, ``, p.Get())
})
}
func TestWS(t *testing.T) {
t.Run("consumes all whitespace", func(t *testing.T) {
result, p := runParser(" asdf", WS)
require.Equal(t, nil, result)
require.Equal(t, "asdf", p.Get())
require.False(t, p.Errored())
})
t.Run("never errors", func(t *testing.T) {
result, p := runParser("asdf", WS)
require.Equal(t, nil, result)
require.Equal(t, "asdf", p.Get())
require.False(t, p.Errored())
})
}
func runParser(input string, parser Parser) (interface{}, *State) {
ps := InputString(input)
result := parser(ps)
return result, ps
}

View File

@ -1,17 +0,0 @@
package parsec
type Pointer struct {
input string
pos int
}
func (p Pointer) Advance(i int) Pointer {
return Pointer{p.input, p.pos + i}
}
func (p Pointer) Get() string {
if p.pos > len(p.input) {
return ""
}
return p.input[p.pos:]
}

View File

@ -1,25 +0,0 @@
package parsec
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestPointer(t *testing.T) {
p := Pointer{"fooo", 0}
t.Run("Advances", func(t *testing.T) {
p2 := p.Advance(2)
require.Equal(t, Pointer{"fooo", 2}, p2)
require.Equal(t, Pointer{"fooo", 0}, p)
require.Equal(t, Pointer{"fooo", 3}, p2.Advance(1))
})
t.Run("Get", func(t *testing.T) {
require.Equal(t, "fooo", p.Get())
require.Equal(t, "ooo", p.Advance(1).Get())
require.Equal(t, "", p.Advance(4).Get())
require.Equal(t, "", p.Advance(10).Get())
})
}

45
state.go Normal file
View File

@ -0,0 +1,45 @@
package parsec
import "fmt"
type Error struct {
pos int
Expected string
}
func (e Error) Pos() int { return e.pos }
func (e Error) Error() string { return fmt.Sprintf("offset %d: Expected %s", e.pos, e.Expected) }
type State struct {
Input string
Pos int
Error Error
}
func (s *State) Advance(i int) {
s.Pos += i
}
func (s *State) Get() string {
if s.Pos > len(s.Input) {
return ""
}
return s.Input[s.Pos:]
}
func (s *State) ErrorHere(expected string) {
s.Error.pos = s.Pos
s.Error.Expected = expected
}
func (s *State) ClearError() {
s.Error.Expected = ""
}
func (s *State) Errored() bool {
return s.Error.Expected != ""
}
func InputString(input string) *State {
return &State{Input: input}
}

45
state_test.go Normal file
View File

@ -0,0 +1,45 @@
package parsec
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestState_Advance(t *testing.T) {
ps := InputString("fooo")
require.Equal(t, 0, ps.Pos)
ps.Advance(2)
require.Equal(t, 2, ps.Pos)
ps.Advance(1)
require.Equal(t, 3, ps.Pos)
}
func TestState_Get(t *testing.T) {
ps := InputString("fooo")
require.Equal(t, "fooo", ps.Get())
ps.Advance(1)
require.Equal(t, "ooo", ps.Get())
ps.Advance(4)
require.Equal(t, "", ps.Get())
ps.Advance(10)
require.Equal(t, "", ps.Get())
}
func TestState_Errors(t *testing.T) {
ps := InputString("fooo")
ps.ErrorHere("hello")
require.Equal(t, "offset 0: Expected hello", ps.Error.Error())
require.Equal(t, 0, ps.Error.Pos())
require.True(t, ps.Errored())
ps.ClearError()
require.False(t, ps.Errored())
ps.Advance(2)
ps.ErrorHere("hello2")
require.Equal(t, "offset 2: Expected hello2", ps.Error.Error())
require.Equal(t, 2, ps.Error.Pos())
require.True(t, ps.Errored())
}