diff --git a/calc/calc.go b/calc/calc.go new file mode 100644 index 0000000..d24dcea --- /dev/null +++ b/calc/calc.go @@ -0,0 +1,80 @@ +package calc + +import ( + "errors" + "fmt" + + . "github.com/vektah/goparsify" +) + +var ( + value Parser + + sumOp = Chars("+-", 1, 1) + prodOp = Chars("/*", 1, 1) + + groupExpr = Map(And("(", sum, ")"), func(n Node) Node { + return Node{Result: n.Child[1].Result} + }) + + number = Map(NumberLit(), func(n Node) Node { + switch i := n.Result.(type) { + case int64: + return Node{Result: float64(i)} + case float64: + return Node{Result: i} + default: + panic(fmt.Errorf("unknown value %#v", i)) + } + }) + + sum = Map(And(prod, Kleene(And(sumOp, prod))), func(n Node) Node { + i := n.Child[0].Result.(float64) + + for _, op := range n.Child[1].Child { + switch op.Child[0].Token { + case "+": + i += op.Child[1].Result.(float64) + case "-": + i -= op.Child[1].Result.(float64) + } + } + + return Node{Result: i} + }) + + prod = Map(And(&value, Kleene(And(prodOp, &value))), func(n Node) Node { + i := n.Child[0].Result.(float64) + + for _, op := range n.Child[1].Child { + switch op.Child[0].Token { + case "/": + i /= op.Child[1].Result.(float64) + case "*": + i *= op.Child[1].Result.(float64) + } + } + + return Node{Result: i} + }) + + Y = Maybe(sum) +) + +func init() { + value = Any(number, groupExpr) +} + +func Calc(input string) (float64, error) { + result, remaining, err := ParseString(Y, input) + + if err != nil { + return 0, err + } + + if remaining != "" { + return result.(float64), errors.New("left unparsed: " + remaining) + } + + return result.(float64), nil +} diff --git a/calc/calc_test.go b/calc/calc_test.go new file mode 100644 index 0000000..13b2f5b --- /dev/null +++ b/calc/calc_test.go @@ -0,0 +1,54 @@ +package calc + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestNumbers(t *testing.T) { + result, err := Calc(`1`) + require.NoError(t, err) + require.EqualValues(t, 1, result) +} + +func TestAddition(t *testing.T) { + result, err := Calc(`1+1`) + require.NoError(t, err) + require.EqualValues(t, 2, result) +} + +func TestSubtraction(t *testing.T) { + result, err := Calc(`1-1`) + require.NoError(t, err) + require.EqualValues(t, 0, result) +} + +func TestDivision(t *testing.T) { + result, err := Calc(`1/2`) + require.NoError(t, err) + require.EqualValues(t, .5, result) +} + +func TestMultiplication(t *testing.T) { + result, err := Calc(`1*2`) + require.NoError(t, err) + require.EqualValues(t, 2, result) +} + +func TestOrderOfOperations(t *testing.T) { + result, err := Calc(`1+10*2`) + require.NoError(t, err) + require.EqualValues(t, 21, result) +} +func TestParenthesis(t *testing.T) { + result, err := Calc(`(1+10)*2`) + require.NoError(t, err) + require.EqualValues(t, 22, result) +} + +func TestRecursive(t *testing.T) { + result, err := Calc(`(1+(2*(3-(4/(5)))))`) + require.NoError(t, err) + require.EqualValues(t, 5.4, result) +} diff --git a/combinator.go b/combinator.go index e83a0c1..76cd93a 100644 --- a/combinator.go +++ b/combinator.go @@ -16,10 +16,10 @@ func And(parsers ...Parserish) Parser { parserfied := ParsifyAll(parsers...) return NewParser("And()", func(ps *State) Node { - result := Node{Children: make([]Node, len(parserfied))} + result := Node{Child: make([]Node, len(parserfied))} startpos := ps.Pos for i, parser := range parserfied { - result.Children[i] = parser(ps) + result.Child[i] = parser(ps) if ps.Errored() { ps.Pos = startpos return result @@ -90,14 +90,14 @@ func manyImpl(min int, op Parserish, sep ...Parserish) Parser { for { node := opParser(ps) if ps.Errored() { - if len(result.Children) < min { + if len(result.Child) < min { ps.Pos = startpos return result } ps.ClearError() return result } - result.Children = append(result.Children, node) + result.Child = append(result.Child, node) if sepParser != nil { sepParser(ps) @@ -153,9 +153,9 @@ func flatten(n Node) string { return n.Token } - if len(n.Children) > 0 { + if len(n.Child) > 0 { sbuf := &bytes.Buffer{} - for _, node := range n.Children { + for _, node := range n.Child { sbuf.WriteString(flatten(node)) } return sbuf.String() diff --git a/combinator_test.go b/combinator_test.go index 4ac96b1..401c5d6 100644 --- a/combinator_test.go +++ b/combinator_test.go @@ -140,7 +140,7 @@ type htmlTag struct { func TestMap(t *testing.T) { parser := Map(And("<", Chars("a-zA-Z0-9"), ">"), func(n Node) Node { - return Node{Result: htmlTag{n.Children[1].Token}} + return Node{Result: htmlTag{n.Child[1].Token}} }) t.Run("sucess", func(t *testing.T) { @@ -182,7 +182,7 @@ func assertSequence(t *testing.T, node Node, expected ...string) { require.NotNil(t, node) actual := []string{} - for _, child := range node.Children { + for _, child := range node.Child { actual = append(actual, child.Token) } diff --git a/html/html.go b/html/html.go index 748065f..27bf7d1 100644 --- a/html/html.go +++ b/html/html.go @@ -25,7 +25,7 @@ var ( element = Any(text, &tag) elements = Map(Kleene(element), func(n Node) Node { ret := []interface{}{} - for _, child := range n.Children { + for _, child := range n.Child { ret = append(ret, child.Result) } return Node{Result: ret} @@ -35,8 +35,8 @@ var ( attrs = Map(Kleene(attr), func(node Node) Node { attr := map[string]string{} - for _, attrNode := range node.Children { - attr[attrNode.Children[0].Token] = attrNode.Children[2].Result.(string) + for _, attrNode := range node.Child { + attr[attrNode.Child[0].Token] = attrNode.Child[2].Result.(string) } return Node{Result: attr} @@ -48,11 +48,11 @@ var ( func init() { tag = Map(And(tstart, elements, tend), func(node Node) Node { - openTag := node.Children[0] + openTag := node.Child[0] return Node{Result: Tag{ - Name: openTag.Children[1].Token, - Attributes: openTag.Children[2].Result.(map[string]string), - Body: node.Children[1].Result.([]interface{}), + Name: openTag.Child[1].Token, + Attributes: openTag.Child[2].Result.(map[string]string), + Body: node.Child[1].Result.([]interface{}), }} }) diff --git a/json/json.go b/json/json.go index 81b9523..724e020 100644 --- a/json/json.go +++ b/json/json.go @@ -14,7 +14,7 @@ var ( _array = Map(And("[", Kleene(&_value, ","), "]"), func(n Node) Node { ret := []interface{}{} - for _, child := range n.Children[1].Children { + for _, child := range n.Child[1].Child { ret = append(ret, child.Result) } return Node{Result: ret} @@ -23,8 +23,8 @@ var ( _object = Map(And("{", _properties, "}"), func(n Node) Node { ret := map[string]interface{}{} - for _, prop := range n.Children[1].Children { - ret[prop.Children[0].Result.(string)] = prop.Children[2].Result + for _, prop := range n.Child[1].Child { + ret[prop.Child[0].Result.(string)] = prop.Child[2].Result } return Node{Result: ret} diff --git a/json/profile/json.go b/json/profile/json.go index a61e966..639a19e 100644 --- a/json/profile/json.go +++ b/json/profile/json.go @@ -7,6 +7,7 @@ import ( "runtime" "runtime/pprof" + "github.com/vektah/goparsify" "github.com/vektah/goparsify/json" ) @@ -31,7 +32,7 @@ func main() { } }() } - max := 100000 + max := 1000 if *memprofile != "" { runtime.MemProfileRate = 1 max = 1000 @@ -52,6 +53,7 @@ func main() { panic(err) } } + goparsify.DumpDebugStats() } // This string was taken from http://json.org/example.html diff --git a/licence.md b/licence.md new file mode 100644 index 0000000..35caca4 --- /dev/null +++ b/licence.md @@ -0,0 +1,19 @@ +Copyright (c) 2017 Adam Scarr + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/parser.go b/parser.go index d7daaf6..35c772b 100644 --- a/parser.go +++ b/parser.go @@ -7,9 +7,9 @@ import ( ) type Node struct { - Token string - Children []Node - Result interface{} + Token string + Child []Node + Result interface{} } type Parser func(*State) Node diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..30b2da5 --- /dev/null +++ b/readme.md @@ -0,0 +1,151 @@ +goparsify +========= + +A parser-combinator library for building easy to test, read and maintain parsers using functional composition. + +### todo + - godoc: I've been slack and the interfaces have been changing rapidly. + - fatal errors: Some way for a parser to say "Ive found a good match, the input is broken, stop here with an error" + - better errors: currently only the longest error is returned, but it would be nice to show all expected tokens that could follow. + + +### benchmarks +I dont have many benchmarks set up yet, but the json parser is very promising. Nearly keeping up with the stdlib for raw speed: +``` +$ go test -bench=. -benchtime=2s -benchmem ./json +BenchmarkUnmarshalParsec-8 50000 71447 ns/op 50464 B/op 1318 allocs/op +BenchmarkUnmarshalParsify-8 50000 56414 ns/op 43887 B/op 334 allocs/op +BenchmarkUnmarshalStdlib-8 50000 50187 ns/op 13949 B/op 262 allocs/op +PASS +ok github.com/vektah/goparsify/json 10.840s +``` + +### debugging mode +If you build the parser with -tags debug it will instrument each parser and a call to DumpDebugStats() will show stats: +``` + Any() 415.7136ms 87000 calls json.go:35 + Map() 309.6569ms 12000 calls json.go:31 + And() 298.6519ms 12000 calls json.go:23 + Kleene() 290.6462ms 12000 calls json.go:13 + And() 272.6392ms 81000 calls json.go:13 + And() 78.0404ms 13000 calls json.go:15 + Map() 78.0404ms 13000 calls json.go:21 + Kleene() 77.0401ms 1000 calls json.go:15 + string literal 7.5053ms 81000 calls json.go:13 + string literal 4.5031ms 84000 calls json.go:11 + , 4.0008ms 81000 calls json.go:13 + false 2.0018ms 85000 calls json.go:10 + null 2.0005ms 87000 calls json.go:8 + true 1.501ms 87000 calls json.go:9 + : 500.8µs 81000 calls json.go:13 + [ 0s 13000 calls json.go:15 + } 0s 12000 calls json.go:23 + { 0s 12000 calls json.go:23 + number literal 0s 31000 calls json.go:12 + ] 0s 1000 calls json.go:15 + Nil 0s 0 calls profile/json.go:148 + , 0s 5000 calls json.go:15 +``` +All times are cumulative, it would be nice to break this down into a parse tree with relative times. This is a nice addition to pprof as it will break down the parsers based on where they are used instead of grouping them all by type. + +This is **free** when the debug tag isnt used. + +### example calculator +Lets say we wanted to build a calculator that could take an expression and calculate the result. + +Lets start with test: +```go +func TestNumbers(t *testing.T) { + result, err := Calc(`1`) + require.NoError(t, err) + require.EqualValues(t, 1, result) +} +``` + +Then define a parser for numbers +```go +var number = Map(NumberLit(), func(n Node) Node { + switch i := n.Result.(type) { + case int64: + return Node{Result: float64(i)} + case float64: + return Node{Result: i} + default: + panic(fmt.Errorf("unknown value %#v", i)) + } +}) + +func Calc(input string) (float64, error) { + result, remaining, err := ParseString(number, input) + + if err != nil { + return 0, err + } + + if remaining != "" { + return result.(float64), errors.New("left unparsed: " + remaining) + } + + return result.(float64), nil +} +``` + +This parser will return numbers either as float64 or int depending on the literal, for this calculator we only want floats so we Map the results and type cast. + +Run the tests and make sure everything is ok. + +Time to add addition + +```go +func TestAddition(t *testing.T) { + result, err := Calc(`1+1`) + require.NoError(t, err) + require.EqualValues(t, 2, result) +} + + +var sumOp = Chars("+-", 1, 1) + +sum = Map(And(number, Kleene(And(sumOp, number))), func(n Node) Node { + i := n.Child[0].Result.(float64) + + for _, op := range n.Child[1].Child { + switch op.Child[0].Token { + case "+": + i += op.Child[1].Result.(float64) + case "-": + i -= op.Child[1].Result.(float64) + } + } + + return Node{Result: i} +}) + +// and update Calc to point to the new root parser -> `result, remaining, err := ParseString(sum, input)` +``` + +This parser will match number ([+-] number)+, then map its to be the sum. See how the Child map directly to the positions in the parsers? n is the result of the and, n.Child[0] is its first argument, n.Child[1] is the result of the Kleene parser, n.Child[1].Child[0] is the result of the first And and so fourth. Given how closely tied the parser and the Map are it is good to keep the two together. + +You can continue like this and add multiplication and parenthesis fairly easily. Eventually if you keep adding parsers you will end up with a loop, and go will give you a handy error message like: +``` +typechecking loop involving value = goparsify.Any(number, groupExpr) +``` + +we need to break the loop using a pointer, then set its value in init +``` +var ( + value Parser + prod = And(&value, Kleene(And(prodOp, &value))) +) + +func init() { + value = Any(number, groupExpr) +} +``` + +Take a look at [calc](calc/calc.go) for a full example. + + +### prior art + +Inspired by https://github.com/prataprc/goparsec but the interfaces have been cleaned up a lot.