From 47badae641b9cd8862f327864d2143a57b8e30af Mon Sep 17 00:00:00 2001 From: Adam Scarr Date: Wed, 9 Aug 2017 21:18:14 +1000 Subject: [PATCH] Add godoc --- .circleci/config.yml | 4 +- calc/calc.go | 28 +++++------ combinator.go | 60 ++++++++++++++--------- combinator_test.go | 20 ++++---- debugoff.go | 8 ++-- debugon.go | 12 +++-- html/html.go | 20 ++++---- html/html_test.go | 2 +- json/json.go | 21 ++------- json/profile/json.go | 8 ++-- literals.go | 40 ++++++++-------- literals_test.go | 18 +++---- parser.go | 87 +++++++++++++++++++++++----------- parser_test.go | 55 ++++++++++++---------- state.go | 110 +++++++++++++++++++++++++------------------ state_test.go | 12 ++--- 16 files changed, 284 insertions(+), 221 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 7b7c3d5..bad883d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -6,6 +6,8 @@ jobs: working_directory: /go/src/github.com/vektah/goparsify steps: - checkout - - run: go get -u github.com/golang/dep/cmd/dep + - run: go get -u github.com/golang/dep/cmd/dep github.com/alecthomas/gometalinter && gometalinter --install - run: dep ensure --vendor-only + - run: go vet ./... - run: go test -v ./... + - run: gometalinter . --disable gocyclo diff --git a/calc/calc.go b/calc/calc.go index 4f2490d..574ea84 100644 --- a/calc/calc.go +++ b/calc/calc.go @@ -1,7 +1,6 @@ package calc import ( - "errors" "fmt" . "github.com/vektah/goparsify" @@ -13,22 +12,22 @@ var ( sumOp = Chars("+-", 1, 1) prodOp = Chars("/*", 1, 1) - groupExpr = Map(Seq("(", sum, ")"), func(n Node) Node { - return Node{Result: n.Child[1].Result} + groupExpr = Map(Seq("(", sum, ")"), func(n Result) Result { + return Result{Result: n.Child[1].Result} }) - number = Map(NumberLit(), func(n Node) Node { + number = Map(NumberLit(), func(n Result) Result { switch i := n.Result.(type) { case int64: - return Node{Result: float64(i)} + return Result{Result: float64(i)} case float64: - return Node{Result: i} + return Result{Result: i} default: panic(fmt.Errorf("unknown value %#v", i)) } }) - sum = Map(Seq(prod, Some(Seq(sumOp, prod))), func(n Node) Node { + sum = Map(Seq(prod, Some(Seq(sumOp, prod))), func(n Result) Result { i := n.Child[0].Result.(float64) for _, op := range n.Child[1].Child { @@ -40,10 +39,10 @@ var ( } } - return Node{Result: i} + return Result{Result: i} }) - prod = Map(Seq(&value, Some(Seq(prodOp, &value))), func(n Node) Node { + prod = Map(Seq(&value, Some(Seq(prodOp, &value))), func(n Result) Result { i := n.Child[0].Result.(float64) for _, op := range n.Child[1].Child { @@ -55,10 +54,10 @@ var ( } } - return Node{Result: i} + return Result{Result: i} }) - Y = Maybe(sum) + y = Maybe(sum) ) func init() { @@ -66,15 +65,10 @@ func init() { } func Calc(input string) (float64, error) { - result, remaining, err := ParseString(Y, input) - + result, err := Run(y, input) if err != nil { return 0, err } - if remaining != "" { - return result.(float64), errors.New("left unparsed: " + remaining) - } - return result.(float64), nil } diff --git a/combinator.go b/combinator.go index 186d8f7..9cb661f 100644 --- a/combinator.go +++ b/combinator.go @@ -4,11 +4,12 @@ import ( "bytes" ) +// Seq matches all of the given parsers in order and returns their nodes as .Child[n] func Seq(parsers ...Parserish) Parser { parserfied := ParsifyAll(parsers...) - return NewParser("Seq()", func(ps *State) Node { - result := Node{Child: make([]Node, len(parserfied))} + return NewParser("Seq()", func(ps *State) Result { + result := Result{Child: make([]Result, len(parserfied))} startpos := ps.Pos for i, parser := range parserfied { result.Child[i] = parser(ps) @@ -21,9 +22,10 @@ func Seq(parsers ...Parserish) Parser { }) } +// NoAutoWS disables automatically ignoring whitespace between tokens for all parsers underneath func NoAutoWS(parser Parserish) Parser { parserfied := Parsify(parser) - return func(ps *State) Node { + return func(ps *State) Result { ps.NoAutoWS = true ret := parserfied(ps) @@ -33,10 +35,11 @@ func NoAutoWS(parser Parserish) Parser { } } +// Any matches the first successful parser and returns its node func Any(parsers ...Parserish) Parser { parserfied := ParsifyAll(parsers...) - return NewParser("Any()", func(ps *State) Node { + return NewParser("Any()", func(ps *State) Result { longestError := Error{} startpos := ps.Pos for _, parser := range parserfied { @@ -45,7 +48,7 @@ func Any(parsers ...Parserish) Parser { if ps.Error.pos > longestError.pos { longestError = ps.Error } - ps.ClearError() + ps.Recover() continue } return node @@ -53,16 +56,22 @@ func Any(parsers ...Parserish) Parser { ps.Error = longestError ps.Pos = startpos - return Node{} + return Result{} }) } -func Some(opScan Parserish, sepScan ...Parserish) Parser { - return NewParser("Some()", manyImpl(0, opScan, sepScan...)) +// Some matches one or more parsers and returns the value as .Child[n] +// an optional separator can be provided and that value will be consumed +// but not returned. Only one separator can be provided. +func Some(parser Parserish, separator ...Parserish) Parser { + return NewParser("Some()", manyImpl(0, parser, separator...)) } -func Many(opScan Parserish, sepScan ...Parserish) Parser { - return NewParser("Many()", manyImpl(1, opScan, sepScan...)) +// Many matches zero or more parsers and returns the value as .Child[n] +// an optional separator can be provided and that value will be consumed +// but not returned. Only one separator can be provided. +func Many(parser Parserish, separator ...Parserish) Parser { + return NewParser("Many()", manyImpl(1, parser, separator...)) } func manyImpl(min int, op Parserish, sep ...Parserish) Parser { @@ -72,8 +81,8 @@ func manyImpl(min int, op Parserish, sep ...Parserish) Parser { sepParser = Parsify(sep[0]) } - return func(ps *State) Node { - var result Node + return func(ps *State) Result { + var result Result startpos := ps.Pos for { node := opParser(ps) @@ -82,7 +91,7 @@ func manyImpl(min int, op Parserish, sep ...Parserish) Parser { ps.Pos = startpos return result } - ps.ClearError() + ps.Recover() return result } result.Child = append(result.Child, node) @@ -90,7 +99,7 @@ func manyImpl(min int, op Parserish, sep ...Parserish) Parser { if sepParser != nil { sepParser(ps) if ps.Errored() { - ps.ClearError() + ps.Recover() return result } } @@ -98,23 +107,27 @@ func manyImpl(min int, op Parserish, sep ...Parserish) Parser { } } +// Maybe will 0 or 1 of the parser func Maybe(parser Parserish) Parser { parserfied := Parsify(parser) - return NewParser("Maybe()", func(ps *State) Node { + return NewParser("Maybe()", func(ps *State) Result { node := parserfied(ps) if ps.Errored() { - ps.ClearError() + ps.Recover() } return node }) } +// Bind will set the node .Result when the given parser matches +// This is useful for giving a value to keywords and constant literals +// like true and false. See the json parser for an example. func Bind(parser Parserish, val interface{}) Parser { p := Parsify(parser) - return func(ps *State) Node { + return func(ps *State) Result { node := p(ps) if ps.Errored() { return node @@ -124,10 +137,12 @@ func Bind(parser Parserish, val interface{}) Parser { } } -func Map(parser Parserish, f func(n Node) Node) Parser { +// Map applies the callback if the parser matches. This is used to set the Result +// based on the matched result. +func Map(parser Parserish, f func(n Result) Result) Parser { p := Parsify(parser) - return NewParser("Map()", func(ps *State) Node { + return NewParser("Map()", func(ps *State) Result { node := p(ps) if ps.Errored() { return node @@ -136,7 +151,7 @@ func Map(parser Parserish, f func(n Node) Node) Parser { }) } -func flatten(n Node) string { +func flatten(n Result) string { if n.Token != "" { return n.Token } @@ -152,8 +167,9 @@ func flatten(n Node) string { return "" } +// Merge all child Tokens together recursively func Merge(parser Parserish) Parser { - return NewParser("Merge()", Map(parser, func(n Node) Node { - return Node{Token: flatten(n)} + return NewParser("Merge()", Map(parser, func(n Result) Result { + return Result{Token: flatten(n)} })) } diff --git a/combinator_test.go b/combinator_test.go index c7ae794..e5f3b01 100644 --- a/combinator_test.go +++ b/combinator_test.go @@ -17,7 +17,7 @@ func TestSeq(t *testing.T) { t.Run("returns errors", func(t *testing.T) { _, p2 := runParser("hello there", parser) - require.Equal(t, "world", p2.Error.Expected) + require.Equal(t, "world", p2.Error.expected) require.Equal(t, 6, p2.Error.pos) require.Equal(t, 0, p2.Pos) }) @@ -32,7 +32,7 @@ func TestMaybe(t *testing.T) { t.Run("returns no errors", func(t *testing.T) { node, p3 := runParser("hello world", Maybe("world")) - require.Equal(t, Node{}, node) + require.Equal(t, Result{}, node) require.False(t, p3.Errored()) require.Equal(t, 0, p3.Pos) }) @@ -51,14 +51,14 @@ func TestAny(t *testing.T) { Seq("hello", "world", "."), Seq("hello", "brother"), )) - require.Equal(t, "offset 11: Expected .", p2.Error.Error()) + require.Equal(t, "offset 11: expected .", p2.Error.Error()) require.Equal(t, 11, p2.Error.Pos()) require.Equal(t, 0, p2.Pos) }) t.Run("Accepts nil matches", func(t *testing.T) { node, p2 := runParser("hello world!", Any(Exact("ffffff"))) - require.Equal(t, Node{}, node) + require.Equal(t, Result{}, node) require.Equal(t, 0, p2.Pos) }) } @@ -113,7 +113,7 @@ func TestMany(t *testing.T) { t.Run("Returns error if nothing matches", func(t *testing.T) { _, p2 := runParser("a,b,c,d,e,", Many(Chars("def"), Exact(","))) - require.Equal(t, "offset 0: Expected def", p2.Error.Error()) + require.Equal(t, "offset 0: expected def", p2.Error.Error()) require.Equal(t, "a,b,c,d,e,", p2.Get()) }) } @@ -123,8 +123,8 @@ type htmlTag struct { } func TestMap(t *testing.T) { - parser := Map(Seq("<", Chars("a-zA-Z0-9"), ">"), func(n Node) Node { - return Node{Result: htmlTag{n.Child[1].Token}} + parser := Map(Seq("<", Chars("a-zA-Z0-9"), ">"), func(n Result) Result { + return Result{Result: htmlTag{n.Child[1].Token}} }) t.Run("sucess", func(t *testing.T) { @@ -134,7 +134,7 @@ func TestMap(t *testing.T) { t.Run("error", func(t *testing.T) { _, ps := runParser("", ps.Error.Error()) + require.Equal(t, "offset 5: expected >", ps.Error.Error()) require.Equal(t, 0, ps.Pos) }) } @@ -151,12 +151,12 @@ func TestMerge(t *testing.T) { t.Run("error", func(t *testing.T) { _, ps := runParser("((())", parser) - require.Equal(t, "offset 5: Expected )", ps.Error.Error()) + require.Equal(t, "offset 5: expected )", ps.Error.Error()) require.Equal(t, 0, ps.Pos) }) } -func assertSequence(t *testing.T, node Node, expected ...string) { +func assertSequence(t *testing.T, node Result, expected ...string) { require.NotNil(t, node) actual := []string{} diff --git a/debugoff.go b/debugoff.go index 8eb4790..ba0ef33 100644 --- a/debugoff.go +++ b/debugoff.go @@ -2,10 +2,12 @@ package goparsify +// NewParser should be called around the creation of every Parser. +// It does nothing normally and should incur no runtime overhead, but when building with -tags debug +// it will instrument every parser to collect valuable timing information displayable with DumpDebugStats. func NewParser(description string, p Parser) Parser { return p } -func DumpDebugStats() { - -} +// DumpDebugStats will print out the curring timings for each parser if built with -tags debug +func DumpDebugStats() {} diff --git a/debugon.go b/debugon.go index f80b6ea..cf94f75 100644 --- a/debugon.go +++ b/debugon.go @@ -10,9 +10,9 @@ import ( "time" ) -var parsers []*DebugParser +var parsers []*debugParser -type DebugParser struct { +type debugParser struct { Description string Caller string Next Parser @@ -20,7 +20,7 @@ type DebugParser struct { Calls int } -func (dp *DebugParser) Parse(ps *State) Node { +func (dp *debugParser) Parse(ps *State) Result { start := time.Now() ret := dp.Next(ps) @@ -42,6 +42,9 @@ func getPackageName(f *runtime.Func) string { } } +// NewParser should be called around the creation of every Parser. +// It does nothing normally and should incur no runtime overhead, but when building with -tags debug +// it will instrument every parser to collect valuable timing information displayable with DumpDebugStats. func NewParser(description string, p Parser) Parser { fpcs := make([]uintptr, 1) caller := "" @@ -61,7 +64,7 @@ func NewParser(description string, p Parser) Parser { } } - dp := &DebugParser{ + dp := &debugParser{ Description: description, Next: p, Caller: caller, @@ -71,6 +74,7 @@ func NewParser(description string, p Parser) Parser { return dp.Parse } +// DumpDebugStats will print out the curring timings for each parser if built with -tags debug func DumpDebugStats() { sort.Slice(parsers, func(i, j int) bool { return parsers[i].Time >= parsers[j].Time diff --git a/html/html.go b/html/html.go index 6d1a2d3..01ae9c4 100644 --- a/html/html.go +++ b/html/html.go @@ -4,8 +4,8 @@ import ( . "github.com/vektah/goparsify" ) -func Parse(input string) (result interface{}, remaining string, err error) { - return ParseString(tag, input) +func Parse(input string) (result interface{}, err error) { + return Run(tag, input) } type Tag struct { @@ -18,28 +18,28 @@ var ( tag Parser identifier = NoAutoWS(Merge(Seq(WS(), Chars("a-zA-Z", 1), Chars("a-zA-Z0-9", 0)))) - text = Map(NotChars("<>"), func(n Node) Node { - return Node{Result: n.Token} + text = Map(NotChars("<>"), func(n Result) Result { + return Result{Result: n.Token} }) element = Any(text, &tag) - elements = Map(Some(element), func(n Node) Node { + elements = Map(Some(element), func(n Result) Result { ret := []interface{}{} for _, child := range n.Child { ret = append(ret, child.Result) } - return Node{Result: ret} + return Result{Result: ret} }) attr = Seq(identifier, "=", StringLit(`"'`)) - attrs = Map(Some(attr), func(node Node) Node { + attrs = Map(Some(attr), func(node Result) Result { attr := map[string]string{} for _, attrNode := range node.Child { attr[attrNode.Child[0].Token] = attrNode.Child[2].Result.(string) } - return Node{Result: attr} + return Result{Result: attr} }) tstart = Seq("<", identifier, attrs, ">") @@ -47,9 +47,9 @@ var ( ) func init() { - tag = Map(Seq(tstart, elements, tend), func(node Node) Node { + tag = Map(Seq(tstart, elements, tend), func(node Result) Result { openTag := node.Child[0] - return Node{Result: Tag{ + return Result{Result: Tag{ Name: openTag.Child[1].Token, Attributes: openTag.Child[2].Result.(map[string]string), Body: node.Child[1].Result.([]interface{}), diff --git a/html/html_test.go b/html/html_test.go index f2f8e6f..f614b4d 100644 --- a/html/html_test.go +++ b/html/html_test.go @@ -7,7 +7,7 @@ import ( ) func TestParse(t *testing.T) { - result, _, err := Parse(`hello

world

`) + result, err := Parse(`hello

world

`) require.NoError(t, err) require.Equal(t, Tag{Name: "body", Attributes: map[string]string{}, Body: []interface{}{ "hello ", diff --git a/json/json.go b/json/json.go index e2ab252..2748108 100644 --- a/json/json.go +++ b/json/json.go @@ -1,6 +1,5 @@ package json -import "errors" import . "github.com/vektah/goparsify" var ( @@ -12,22 +11,22 @@ var ( _number = NumberLit() _properties = Some(Seq(StringLit(`"`), ":", &_value), ",") - _array = Map(Seq("[", Some(&_value, ","), "]"), func(n Node) Node { + _array = Map(Seq("[", Some(&_value, ","), "]"), func(n Result) Result { ret := []interface{}{} for _, child := range n.Child[1].Child { ret = append(ret, child.Result) } - return Node{Result: ret} + return Result{Result: ret} }) - _object = Map(Seq("{", _properties, "}"), func(n Node) Node { + _object = Map(Seq("{", _properties, "}"), func(n Result) Result { ret := map[string]interface{}{} for _, prop := range n.Child[1].Child { ret[prop.Child[0].Result.(string)] = prop.Child[2].Result } - return Node{Result: ret} + return Result{Result: ret} }) ) @@ -36,15 +35,5 @@ func init() { } func Unmarshal(input string) (interface{}, error) { - result, remaining, err := ParseString(_value, input) - - if err != nil { - return result, err - } - - if remaining != "" { - return result, errors.New("left unparsed: " + remaining) - } - - return result, err + return Run(_value, input) } diff --git a/json/profile/json.go b/json/profile/json.go index 639a19e..b7d9733 100644 --- a/json/profile/json.go +++ b/json/profile/json.go @@ -22,7 +22,7 @@ func main() { log.Fatal(err) } - pprof.StartCPUProfile(f) + _ = pprof.StartCPUProfile(f) defer func() { pprof.StopCPUProfile() @@ -32,7 +32,7 @@ func main() { } }() } - max := 1000 + max := 100000 if *memprofile != "" { runtime.MemProfileRate = 1 max = 1000 @@ -42,8 +42,8 @@ func main() { log.Fatal(err) } - pprof.WriteHeapProfile(f) - f.Close() + _ = pprof.WriteHeapProfile(f) + _ = f.Close() }() } diff --git a/literals.go b/literals.go index 70832f9..ec508e3 100644 --- a/literals.go +++ b/literals.go @@ -6,22 +6,21 @@ import ( "unicode/utf8" ) +// StringLit matches a quoted string and returns it in .Result. It may contain: +// - unicode +// - escaped characters, eg \" or \n +// - unicode sequences, eg \uBEEF func StringLit(allowedQuotes string) Parser { - return NewParser("string literal", func(ps *State) Node { + return NewParser("string literal", func(ps *State) Result { ps.AutoWS() - for i := 0; i < len(allowedQuotes); i++ { - if ps.Input[ps.Pos] == allowedQuotes[i] { - - } - } if !stringContainsByte(allowedQuotes, ps.Input[ps.Pos]) { ps.ErrorHere(allowedQuotes) - return Node{} + return Result{} } quote := ps.Input[ps.Pos] - var end int = ps.Pos + 1 + var end = ps.Pos + 1 inputLen := len(ps.Input) var buf *bytes.Buffer @@ -31,7 +30,7 @@ func StringLit(allowedQuotes string) Parser { case '\\': if end+1 >= inputLen { ps.ErrorHere(string(quote)) - return Node{} + return Result{} } if buf == nil { @@ -41,16 +40,16 @@ func StringLit(allowedQuotes string) Parser { c := ps.Input[end+1] if c == 'u' { if end+6 >= inputLen { - ps.Error.Expected = "[a-f0-9]{4}" + ps.Error.expected = "[a-f0-9]{4}" ps.Error.pos = end + 2 - return Node{} + return Result{} } r, ok := unhex(ps.Input[end+2 : end+6]) if !ok { - ps.Error.Expected = "[a-f0-9]" + ps.Error.expected = "[a-f0-9]" ps.Error.pos = end + 2 - return Node{} + return Result{} } buf.WriteRune(r) end += 6 @@ -62,10 +61,10 @@ func StringLit(allowedQuotes string) Parser { if buf == nil { result := ps.Input[ps.Pos+1 : end] ps.Pos = end + 1 - return Node{Result: result} + return Result{Result: result} } ps.Pos = end + 1 - return Node{Result: buf.String()} + return Result{Result: buf.String()} default: if buf == nil { if ps.Input[end] < 127 { @@ -83,12 +82,13 @@ func StringLit(allowedQuotes string) Parser { } ps.ErrorHere(string(quote)) - return Node{} + return Result{} }) } +// NumberLit matches a floating point or integer number and returns it as a int64 or float64 in .Result func NumberLit() Parser { - return NewParser("number literal", func(ps *State) Node { + return NewParser("number literal", func(ps *State) Result { ps.AutoWS() end := ps.Pos float := false @@ -126,7 +126,7 @@ func NumberLit() Parser { if end == ps.Pos { ps.ErrorHere("number") - return Node{} + return Result{} } var result interface{} @@ -138,10 +138,10 @@ func NumberLit() Parser { } if err != nil { ps.ErrorHere("number") - return Node{} + return Result{} } ps.Pos = end - return Node{Result: result} + return Result{Result: result} }) } diff --git a/literals_test.go b/literals_test.go index e96f303..7cdb5e1 100644 --- a/literals_test.go +++ b/literals_test.go @@ -28,25 +28,25 @@ func TestStringLit(t *testing.T) { t.Run("test non match", func(t *testing.T) { _, p := runParser(`1`, parser) - require.Equal(t, `"'`, p.Error.Expected) + require.Equal(t, `"'`, p.Error.expected) require.Equal(t, `1`, p.Get()) }) t.Run("test unterminated string", func(t *testing.T) { _, p := runParser(`"hello `, parser) - require.Equal(t, `"`, p.Error.Expected) + require.Equal(t, `"`, p.Error.expected) require.Equal(t, `"hello `, p.Get()) }) t.Run("test unmatched quotes", func(t *testing.T) { _, p := runParser(`"hello '`, parser) - require.Equal(t, `"`, p.Error.Expected) + require.Equal(t, `"`, p.Error.expected) require.Equal(t, 0, p.Pos) }) t.Run("test unterminated escape", func(t *testing.T) { _, p := runParser(`"hello \`, parser) - require.Equal(t, `"`, p.Error.Expected) + require.Equal(t, `"`, p.Error.expected) require.Equal(t, 0, p.Pos) }) @@ -64,20 +64,20 @@ func TestStringLit(t *testing.T) { t.Run("test escaped unicode", func(t *testing.T) { result, p := runParser(`"hello \ubeef cake"`, parser) - require.Equal(t, "", p.Error.Expected) + require.Equal(t, "", p.Error.expected) require.Equal(t, "hello \uBEEF cake", result.Result) require.Equal(t, ``, p.Get()) }) t.Run("test invalid escaped unicode", func(t *testing.T) { _, p := runParser(`"hello \ucake"`, parser) - require.Equal(t, "offset 9: Expected [a-f0-9]", p.Error.Error()) + require.Equal(t, "offset 9: expected [a-f0-9]", p.Error.Error()) require.Equal(t, 0, p.Pos) }) t.Run("test incomplete escaped unicode", func(t *testing.T) { _, p := runParser(`"hello \uca"`, parser) - require.Equal(t, "offset 9: Expected [a-f0-9]{4}", p.Error.Error()) + require.Equal(t, "offset 9: expected [a-f0-9]{4}", p.Error.Error()) require.Equal(t, 0, p.Pos) }) } @@ -164,13 +164,13 @@ func TestNumberLit(t *testing.T) { t.Run("non matching string", func(t *testing.T) { _, p := runParser("foo", parser) - require.Equal(t, "offset 0: Expected number", p.Error.Error()) + require.Equal(t, "offset 0: expected number", p.Error.Error()) require.Equal(t, 0, p.Pos) }) t.Run("invalid number", func(t *testing.T) { _, p := runParser("-.", parser) - require.Equal(t, "offset 0: Expected number", p.Error.Error()) + require.Equal(t, "offset 0: expected number", p.Error.Error()) require.Equal(t, 0, p.Pos) }) } diff --git a/parser.go b/parser.go index 5802b9d..7b45004 100644 --- a/parser.go +++ b/parser.go @@ -1,18 +1,28 @@ package goparsify import ( + "errors" "fmt" "strings" "unicode/utf8" ) -type Node struct { +// Result is the output of a parser. Usually only one of its fields will be set and should be though of +// more as a union type. having it avoids interface{} littered all through the parsing code and makes +// the it easy to do the two most common operations, getting a token and finding a child. +type Result struct { Token string - Child []Node + Child []Result Result interface{} } -type Parser func(*State) Node +// Parser is the workhorse of parsify. A parser takes a State and returns a result, consuming some +// of the State in the process. +// Given state is shared there are a few rules that should be followed: +// - A parser that errors must set state.Error +// - A parser that errors must not change state.Pos +// - A parser that consumed some input should advance state.Pos +type Parser func(*State) Result // Parserish types are any type that can be turned into a Parser by Parsify // These currently include *Parser and string literals. @@ -30,17 +40,22 @@ type Parser func(*State) Node // ``` type Parserish interface{} +// Parsify takes a Parserish and makes a Parser out of it. It should be called by +// any Parser that accepts a Parser as an argument. It should never be called during +// instead call it during parser creation so there is no runtime cost. +// +// See Parserish for details. func Parsify(p Parserish) Parser { switch p := p.(type) { case nil: return nil - case func(*State) Node: + case func(*State) Result: return NewParser("anonymous func", p) case Parser: return p case *Parser: // Todo: Maybe capture this stack and on nil show it? Is there a good error library to do this? - return func(ptr *State) Node { + return func(ptr *State) Result { return (*p)(ptr) } case string: @@ -50,6 +65,7 @@ func Parsify(p Parserish) Parser { } } +// ParsifyAll calls Parsify on all parsers func ParsifyAll(parsers ...Parserish) []Parser { ret := make([]Parser, len(parsers)) for i, parser := range parsers { @@ -58,53 +74,61 @@ func ParsifyAll(parsers ...Parserish) []Parser { return ret } +// WS will consume whitespace, it should only be needed when AutoWS is turned off func WS() Parser { - return NewParser("AutoWS", func(ps *State) Node { + return NewParser("AutoWS", func(ps *State) Result { ps.WS() - return Node{} + return Result{} }) } -func ParseString(parser Parserish, input string) (result interface{}, remaining string, err error) { +// Run applies some input to a parser and returns the result, failing if the input isnt fully consumed. +// It is a convenience method for the most common way to invoke a parser. +func Run(parser Parserish, input string) (result interface{}, err error) { p := Parsify(parser) - ps := InputString(input) + ps := NewState(input) ret := p(ps) ps.AutoWS() - if ps.Error.Expected != "" { - return nil, ps.Get(), ps.Error + if ps.Error.expected != "" { + return ret.Result, ps.Error } - return ret.Result, ps.Get(), nil + if ps.Get() != "" { + return ret.Result, errors.New("left unparsed: " + ps.Get()) + } + + return ret.Result, nil } +// Exact will fully match the exact string supplied, or error. The match will be stored in .Token func Exact(match string) Parser { if len(match) == 1 { matchByte := match[0] - return NewParser(match, func(ps *State) Node { + return NewParser(match, func(ps *State) Result { ps.AutoWS() if ps.Pos >= len(ps.Input) || ps.Input[ps.Pos] != matchByte { ps.ErrorHere(match) - return Node{} + return Result{} } ps.Advance(1) - return Node{Token: match} + return Result{Token: match} }) } - return NewParser(match, func(ps *State) Node { + return NewParser(match, func(ps *State) Result { ps.AutoWS() if !strings.HasPrefix(ps.Get(), match) { ps.ErrorHere(match) - return Node{} + return Result{} } ps.Advance(len(match)) - return Node{Token: match} + return Result{Token: match} }) } @@ -125,9 +149,9 @@ func parseRepetition(defaultMin, defaultMax int, repetition ...int) (min int, ma } // parseMatcher turns a string in the format a-f01234A-F into: -// - a set string of matches string(01234) +// - an alphabet of matches string(01234) // - a set of ranges [][]rune{{'a', 'f'}, {'A', 'F'}} -func parseMatcher(matcher string) (matches string, ranges [][]rune) { +func parseMatcher(matcher string) (alphabet string, ranges [][]rune) { runes := []rune(matcher) for i := 0; i < len(runes); i++ { @@ -141,29 +165,36 @@ func parseMatcher(matcher string) (matches string, ranges [][]rune) { ranges = append(ranges, []rune{end, start}) } } else if i+1 < len(runes) && runes[i] == '\\' { - matches += string(runes[i+1]) + alphabet += string(runes[i+1]) } else { - matches += string(runes[i]) + alphabet += string(runes[i]) } } - return matches, ranges + return alphabet, ranges } +// Chars is the swiss army knife of character matches. It can match: +// - ranges: Chars("a-z") will match one or more lowercase letter +// - alphabets: Chars("abcd") will match one or more of the letters abcd in any order +// - min and max: Chars("a-z0-9", 4, 6) will match 4-6 lowercase alphanumeric characters +// the above can be combined in any order func Chars(matcher string, repetition ...int) Parser { return NewParser("["+matcher+"]", charsImpl(matcher, false, repetition...)) } +// NotChars accepts the full range of input from Chars, but it will stop when any +// character matches. func NotChars(matcher string, repetition ...int) Parser { return NewParser("!["+matcher+"]", charsImpl(matcher, true, repetition...)) } func charsImpl(matcher string, stopOn bool, repetition ...int) Parser { min, max := parseRepetition(1, -1, repetition...) - matches, ranges := parseMatcher(matcher) + alphabet, ranges := parseMatcher(matcher) - return func(ps *State) Node { + return func(ps *State) Result { ps.AutoWS() matched := 0 for ps.Pos+matched < len(ps.Input) { @@ -173,7 +204,7 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser { r, w := utf8.DecodeRuneInString(ps.Input[ps.Pos+matched:]) - anyMatched := strings.ContainsRune(matches, r) + anyMatched := strings.ContainsRune(alphabet, r) if !anyMatched { for _, rng := range ranges { if r >= rng[0] && r <= rng[1] { @@ -191,11 +222,11 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser { if matched < min { ps.ErrorHere(matcher) - return Node{} + return Result{} } result := ps.Input[ps.Pos : ps.Pos+matched] ps.Advance(matched) - return Node{Token: result} + return Result{Token: result} } } diff --git a/parser_test.go b/parser_test.go index 08a8f7b..8868593 100644 --- a/parser_test.go +++ b/parser_test.go @@ -9,17 +9,17 @@ import ( func TestParsify(t *testing.T) { t.Run("strings", func(t *testing.T) { - require.Equal(t, "ff", Parsify("ff")(InputString("ffooo")).Token) + require.Equal(t, "ff", Parsify("ff")(NewState("ffooo")).Token) }) t.Run("parsers", func(t *testing.T) { - require.Equal(t, "ff", Parsify(Chars("f"))(InputString("ffooo")).Token) + require.Equal(t, "ff", Parsify(Chars("f"))(NewState("ffooo")).Token) }) t.Run("parser funcs", func(t *testing.T) { - node := Parsify(func(p *State) Node { - return Node{Token: "hello"} - })(InputString("ffooo")) + node := Parsify(func(p *State) Result { + return Result{Token: "hello"} + })(NewState("ffooo")) require.Equal(t, "hello", node.Token) }) @@ -29,7 +29,7 @@ func TestParsify(t *testing.T) { parserfied := Parsify(&parser) parser = Chars("f") - node := parserfied(InputString("ffooo")) + node := parserfied(NewState("ffooo")) require.Equal(t, "ff", node.Token) }) @@ -41,10 +41,10 @@ func TestParsify(t *testing.T) { func TestParsifyAll(t *testing.T) { parsers := ParsifyAll("ff", "gg") - result := parsers[0](InputString("ffooo")) + result := parsers[0](NewState("ffooo")) require.Equal(t, "ff", result.Token) - result = parsers[1](InputString("ffooo")) + result = parsers[1](NewState("ffooo")) require.Equal(t, "", result.Token) } @@ -63,19 +63,19 @@ func TestExact(t *testing.T) { t.Run("error", func(t *testing.T) { _, ps := runParser("foobar", Exact("bar")) - require.Equal(t, "bar", ps.Error.Expected) + require.Equal(t, "bar", ps.Error.expected) require.Equal(t, 0, ps.Pos) }) t.Run("error char", func(t *testing.T) { _, ps := runParser("foobar", Exact("o")) - require.Equal(t, "o", ps.Error.Expected) + require.Equal(t, "o", ps.Error.expected) require.Equal(t, 0, ps.Pos) }) t.Run("eof char", func(t *testing.T) { _, ps := runParser("", Exact("o")) - require.Equal(t, "o", ps.Error.Expected) + require.Equal(t, "o", ps.Error.expected) require.Equal(t, 0, ps.Pos) }) } @@ -104,13 +104,13 @@ func TestChars(t *testing.T) { t.Run("no match", func(t *testing.T) { _, ps := runParser("ffffff", Chars("0-9")) - require.Equal(t, "offset 0: Expected 0-9", ps.Error.Error()) + require.Equal(t, "offset 0: expected 0-9", ps.Error.Error()) require.Equal(t, 0, ps.Pos) }) t.Run("no match with min", func(t *testing.T) { _, ps := runParser("ffffff", Chars("0-9", 4)) - require.Equal(t, "0-9", ps.Error.Expected) + require.Equal(t, "0-9", ps.Error.expected) require.Equal(t, 0, ps.Pos) }) @@ -134,26 +134,31 @@ func TestChars(t *testing.T) { } func TestParseString(t *testing.T) { - Y := Map("hello", func(n Node) Node { return Node{Result: n.Token} }) - t.Run("partial match", func(t *testing.T) { - result, remaining, err := ParseString(Y, "hello world") + Y := Map("hello", func(n Result) Result { return Result{Result: n.Token} }) + + t.Run("full match", func(t *testing.T) { + result, err := Run(Y, "hello") require.Equal(t, "hello", result) - require.Equal(t, "world", remaining) require.NoError(t, err) }) - t.Run("error", func(t *testing.T) { - result, remaining, err := ParseString(Y, "world") - require.Nil(t, result) - require.Equal(t, "world", remaining) + t.Run("partial match", func(t *testing.T) { + result, err := Run(Y, "hello world") + require.Equal(t, "hello", result) require.Error(t, err) - require.Equal(t, "offset 0: Expected hello", err.Error()) + require.Equal(t, "left unparsed: world", err.Error()) + }) + + t.Run("error", func(t *testing.T) { + result, err := Run(Y, "world") + require.Nil(t, result) + require.Error(t, err) + require.Equal(t, "offset 0: expected hello", err.Error()) }) } - -func runParser(input string, parser Parser) (Node, *State) { - ps := InputString(input) +func runParser(input string, parser Parser) (Result, *State) { + ps := NewState(input) result := parser(ps) return result, ps } diff --git a/state.go b/state.go index d153df8..1860bc4 100644 --- a/state.go +++ b/state.go @@ -4,63 +4,38 @@ import ( "fmt" ) +// Error represents a parse error. These will often be set, the parser will back up a little and +// find another viable path. In general when combining errors the longest error should be returned. type Error struct { pos int - Expected string + expected string } -func (e Error) Pos() int { return e.pos } -func (e Error) Error() string { return fmt.Sprintf("offset %d: Expected %s", e.pos, e.Expected) } +// Pos is the offset into the document the error was found +func (e Error) Pos() int { return e.pos } +// Error satisfies the golang error interface +func (e Error) Error() string { return fmt.Sprintf("offset %d: expected %s", e.pos, e.expected) } + +// WSFunc matches a byte and returns true if it is whitespace type WSFunc func(c byte) bool +// State is the current parse state. It is entirely public because parsers are expected to mutate it during the parse. type State struct { - Input string - Pos int - Error Error + // The full input string + Input string + // An offset into the string, pointing to the current tip + Pos int + // Error is a secondary return channel from parsers, but used so heavily + // in backtracking that it has been inlined to avoid allocations. + Error Error + // Called to determine what to ignore when WS is called, or when AutoWS fires WSFunc WSFunc NoAutoWS bool } -func (s *State) Advance(i int) { - s.Pos += i -} - -// AutoWS consumes all whitespace -func (s *State) AutoWS() { - if s.NoAutoWS { - return - } - s.WS() -} - -func (s *State) WS() { - for s.Pos < len(s.Input) && s.WSFunc(s.Input[s.Pos]) { - s.Pos++ - } -} - -func (s *State) Get() string { - if s.Pos > len(s.Input) { - return "" - } - return s.Input[s.Pos:] -} - -func (s *State) ErrorHere(expected string) { - s.Error.pos = s.Pos - s.Error.Expected = expected -} - -func (s *State) ClearError() { - s.Error.Expected = "" -} - -func (s *State) Errored() bool { - return s.Error.Expected != "" -} - -func InputString(input string) *State { +// NewState creates a new State from a string +func NewState(input string) *State { return &State{ Input: input, WSFunc: func(b byte) bool { @@ -72,3 +47,48 @@ func InputString(input string) *State { }, } } + +// Advance the Pos along by i bytes +func (s *State) Advance(i int) { + s.Pos += i +} + +// AutoWS consumes all whitespace and advances Pos but can be disabled by the NoAutWS() parser. +func (s *State) AutoWS() { + if s.NoAutoWS { + return + } + s.WS() +} + +// WS consumes all whitespace and advances Pos. +func (s *State) WS() { + for s.Pos < len(s.Input) && s.WSFunc(s.Input[s.Pos]) { + s.Pos++ + } +} + +// Get the remaining input. +func (s *State) Get() string { + if s.Pos > len(s.Input) { + return "" + } + return s.Input[s.Pos:] +} + +// ErrorHere raises an error at the current position. +func (s *State) ErrorHere(expected string) { + s.Error.pos = s.Pos + s.Error.expected = expected +} + +// Recover from the current error. Often called by combinators that can match +// when one of their children succeed, but others have failed. +func (s *State) Recover() { + s.Error.expected = "" +} + +// Errored returns true if the current parser has failed. +func (s *State) Errored() bool { + return s.Error.expected != "" +} diff --git a/state_test.go b/state_test.go index c735353..5ecf367 100644 --- a/state_test.go +++ b/state_test.go @@ -7,7 +7,7 @@ import ( ) func TestState_Advance(t *testing.T) { - ps := InputString("fooo") + ps := NewState("fooo") require.Equal(t, 0, ps.Pos) ps.Advance(2) require.Equal(t, 2, ps.Pos) @@ -16,7 +16,7 @@ func TestState_Advance(t *testing.T) { } func TestState_Get(t *testing.T) { - ps := InputString("fooo") + ps := NewState("fooo") require.Equal(t, "fooo", ps.Get()) ps.Advance(1) require.Equal(t, "ooo", ps.Get()) @@ -27,19 +27,19 @@ func TestState_Get(t *testing.T) { } func TestState_Errors(t *testing.T) { - ps := InputString("fooo") + ps := NewState("fooo") ps.ErrorHere("hello") - require.Equal(t, "offset 0: Expected hello", ps.Error.Error()) + require.Equal(t, "offset 0: expected hello", ps.Error.Error()) require.Equal(t, 0, ps.Error.Pos()) require.True(t, ps.Errored()) - ps.ClearError() + ps.Recover() require.False(t, ps.Errored()) ps.Advance(2) ps.ErrorHere("hello2") - require.Equal(t, "offset 2: Expected hello2", ps.Error.Error()) + require.Equal(t, "offset 2: expected hello2", ps.Error.Error()) require.Equal(t, 2, ps.Error.Pos()) require.True(t, ps.Errored()) }