summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--combinator.go12
-rw-r--r--combinator_test.go9
-rw-r--r--debug/frames.go66
-rw-r--r--debug/frames_test.go24
-rw-r--r--debugoff.go8
-rw-r--r--debugon.go129
-rw-r--r--html/html.go8
-rw-r--r--json/json.go12
-rw-r--r--parser.go9
-rw-r--r--readme.md72
-rw-r--r--state.go14
11 files changed, 306 insertions, 57 deletions
diff --git a/combinator.go b/combinator.go
index f55977c..7a811bc 100644
--- a/combinator.go
+++ b/combinator.go
@@ -45,6 +45,10 @@ func Any(parsers ...Parserish) Parser {
for _, parser := range parserfied {
node := parser(ps)
if ps.Errored() {
+ if ps.Cut > startpos {
+ longestError = ps.Error
+ break
+ }
if ps.Error.pos > longestError.pos {
longestError = ps.Error
}
@@ -142,13 +146,13 @@ func Bind(parser Parserish, val interface{}) Parser {
func Map(parser Parserish, f func(n Result) Result) Parser {
p := Parsify(parser)
- return NewParser("Map()", func(ps *State) Result {
+ return func(ps *State) Result {
node := p(ps)
if ps.Errored() {
return node
}
return f(node)
- })
+ }
}
func flatten(n Result) string {
@@ -169,7 +173,7 @@ func flatten(n Result) string {
// Merge all child Tokens together recursively
func Merge(parser Parserish) Parser {
- return NewParser("Merge()", Map(parser, func(n Result) Result {
+ return Map(parser, func(n Result) Result {
return Result{Token: flatten(n)}
- }))
+ })
}
diff --git a/combinator_test.go b/combinator_test.go
index 283bb2a..5ac227f 100644
--- a/combinator_test.go
+++ b/combinator_test.go
@@ -162,6 +162,15 @@ func TestBind(t *testing.T) {
})
}
+func TestCut(t *testing.T) {
+ // does backtracking happen anywhere else?
+ t.Run("test any", func(t *testing.T) {
+ _, ps := runParser("var world", Any(Seq("var", Cut, "hello"), "var world"))
+ require.Equal(t, "offset 4: expected hello", ps.Error.Error())
+ require.Equal(t, 0, ps.Pos)
+ })
+}
+
func TestMerge(t *testing.T) {
var bracer Parser
bracer = Seq("(", Maybe(&bracer), ")")
diff --git a/debug/frames.go b/debug/frames.go
new file mode 100644
index 0000000..88a48f0
--- /dev/null
+++ b/debug/frames.go
@@ -0,0 +1,66 @@
+package debug
+
+import (
+ "bufio"
+ "fmt"
+ "os"
+ "path/filepath"
+ "regexp"
+ "runtime"
+ "strings"
+)
+
+var varRegex = regexp.MustCompile(`(?:var)?\s*(\w*)\s*:?=`)
+
+func getPackageName(f runtime.Frame) string {
+ parts := strings.Split(f.Func.Name(), ".")
+ pl := len(parts)
+
+ if parts[pl-2][0] == '(' {
+ return strings.Join(parts[0:pl-2], ".")
+ } else {
+ return strings.Join(parts[0:pl-1], ".")
+ }
+}
+
+func getVarName(filename string, lineNo int) string {
+ f, err := os.Open(filename)
+ if err != nil {
+ return ""
+ }
+
+ scanner := bufio.NewScanner(f)
+ for i := 0; i < lineNo; i++ {
+ scanner.Scan()
+ }
+
+ line := scanner.Text()
+ if matches := varRegex.FindStringSubmatch(line); matches != nil {
+ return matches[1]
+ }
+ return ""
+}
+
+// GetDefinition returns the name of the variable and location this parser was defined by walking up the stack
+func GetDefinition() (varName string, location string) {
+ pc := make([]uintptr, 64)
+ n := runtime.Callers(3, pc)
+ frames := runtime.CallersFrames(pc[:n])
+
+ var frame runtime.Frame
+ more := true
+ for more {
+ frame, more = frames.Next()
+ pkg := getPackageName(frame)
+ if pkg == "github.com/vektah/goparsify" || pkg == "github.com/vektah/goparsify/debug" {
+ continue
+ }
+
+ varName := getVarName(frame.File, frame.Line)
+ if varName != "" {
+ return varName, fmt.Sprintf("%s:%d", filepath.Base(frame.File), frame.Line)
+ }
+ }
+
+ return "", ""
+}
diff --git a/debug/frames_test.go b/debug/frames_test.go
new file mode 100644
index 0000000..40c1086
--- /dev/null
+++ b/debug/frames_test.go
@@ -0,0 +1,24 @@
+package debug
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestRegex(t *testing.T) {
+ tests := map[string]string{
+ "attrs": ` attrs = Map(Some(attr), func(node Result) Result {`,
+ "_value": ` _value = Any(_null, _true, _false, _string, _number, _array, _object)`,
+ "_object": `_object = Map(Seq("{", Cut, _properties, "}"), func(n Result) Result {`,
+ "expr": `var expr = Exact("foo")`,
+ "number": `number := NumberLit()`,
+ }
+ for expected, input := range tests {
+ t.Run(input, func(t *testing.T) {
+ matches := varRegex.FindStringSubmatch(input)
+ require.NotNil(t, matches)
+ require.Equal(t, expected, matches[1])
+ })
+ }
+}
diff --git a/debugoff.go b/debugoff.go
index ba0ef33..7afcce6 100644
--- a/debugoff.go
+++ b/debugoff.go
@@ -2,6 +2,8 @@
package goparsify
+import "io"
+
// NewParser should be called around the creation of every Parser.
// It does nothing normally and should incur no runtime overhead, but when building with -tags debug
// it will instrument every parser to collect valuable timing information displayable with DumpDebugStats.
@@ -11,3 +13,9 @@ func NewParser(description string, p Parser) Parser {
// DumpDebugStats will print out the curring timings for each parser if built with -tags debug
func DumpDebugStats() {}
+
+// EnableLogging will write logs to the given writer as the next parse happens
+func EnableLogging(w io.Writer) {}
+
+// DisableLogging will stop writing logs
+func DisableLogging() {}
diff --git a/debugon.go b/debugon.go
index cf94f75..4e68db3 100644
--- a/debugon.go
+++ b/debugon.go
@@ -3,77 +3,134 @@
package goparsify
import (
+ "bytes"
+ "flag"
"fmt"
- "runtime"
+ "io"
+ "os"
"sort"
+ "strconv"
"strings"
"time"
+
+ "github.com/vektah/goparsify/debug"
)
+var log io.Writer = nil
var parsers []*debugParser
+var pendingOpenLog = ""
+var activeParsers []*debugParser
+var longestLocation = 0
type debugParser struct {
- Description string
- Caller string
- Next Parser
- Time time.Duration
- Calls int
+ Match string
+ Var string
+ Location string
+ Next Parser
+ Time time.Duration
+ Calls int
+}
+
+func (dp *debugParser) Name() string {
+ if len(activeParsers) > 2 && activeParsers[len(activeParsers)-2].Var == dp.Var {
+ return dp.Match
+ }
+ return dp.Var
+}
+
+func (dp *debugParser) logf(ps *State, result *Result, format string, args ...interface{}) string {
+ buf := &bytes.Buffer{}
+ buf.WriteString(fmt.Sprintf("%"+strconv.Itoa(longestLocation)+"s | ", dp.Location))
+ buf.WriteString(fmt.Sprintf("%-15s", ps.Preview(15)))
+ buf.WriteString(" | ")
+ output := ""
+ if ps.Errored() {
+ output = "fail"
+ } else if result != nil {
+ output = result.Token
+ }
+ buf.WriteString(fmt.Sprintf("%-10s | ", output))
+ buf.WriteString(strings.Repeat(" ", len(activeParsers)-1))
+ buf.WriteString(fmt.Sprintf(format, args...))
+
+ buf.WriteRune('\n')
+ return buf.String()
+}
+
+func (dp *debugParser) logStart(ps *State) {
+ if log != nil {
+ if pendingOpenLog != "" {
+ fmt.Fprint(log, pendingOpenLog)
+ pendingOpenLog = ""
+ }
+ pendingOpenLog = dp.logf(ps, nil, dp.Name())
+ }
+}
+
+func (dp *debugParser) logEnd(ps *State, result *Result) {
+ if log != nil {
+ if pendingOpenLog != "" {
+ fmt.Fprintf(log, dp.logf(ps, result, dp.Name()))
+ pendingOpenLog = ""
+ }
+ }
}
func (dp *debugParser) Parse(ps *State) Result {
+ activeParsers = append(activeParsers, dp)
start := time.Now()
+ dp.logStart(ps)
ret := dp.Next(ps)
+ dp.logEnd(ps, &ret)
dp.Time = dp.Time + time.Since(start)
dp.Calls++
+ activeParsers = activeParsers[0 : len(activeParsers)-1]
return ret
}
-func getPackageName(f *runtime.Func) string {
- parts := strings.Split(f.Name(), ".")
- pl := len(parts)
+func init() {
+ logs := flag.Bool("parselogs", false, "print detailed parsing logs")
+ flag.Parse()
- if parts[pl-2][0] == '(' {
- return strings.Join(parts[0:pl-2], ".")
- } else {
- return strings.Join(parts[0:pl-1], ".")
+ if *logs {
+ EnableLogging(os.Stdout)
}
}
// NewParser should be called around the creation of every Parser.
// It does nothing normally and should incur no runtime overhead, but when building with -tags debug
-// it will instrument every parser to collect valuable timing information displayable with DumpDebugStats.
-func NewParser(description string, p Parser) Parser {
- fpcs := make([]uintptr, 1)
- caller := ""
-
- for i := 1; i < 10; i++ {
- n := runtime.Callers(i, fpcs)
-
- if n != 0 {
- frame := runtime.FuncForPC(fpcs[0] - 1)
- pkg := getPackageName(frame)
-
- if pkg != "github.com/vektah/goparsify" {
- file, line := frame.FileLine(fpcs[0] - 1)
- caller = fmt.Sprintf("%s:%d", file, line)
- break
- }
- }
- }
+// it will instrument every parser to collect valuable timing and debug information.
+func NewParser(name string, p Parser) Parser {
+ description, location := debug.GetDefinition()
dp := &debugParser{
- Description: description,
- Next: p,
- Caller: caller,
+ Match: name,
+ Var: description,
+ Location: location,
+ Next: p,
+ }
+
+ if len(dp.Location) > longestLocation {
+ longestLocation = len(dp.Location)
}
parsers = append(parsers, dp)
return dp.Parse
}
+// EnableLogging will write logs to the given writer as the next parse happens
+func EnableLogging(w io.Writer) {
+ log = w
+}
+
+// DisableLogging will stop writing logs
+func DisableLogging() {
+ log = nil
+}
+
// DumpDebugStats will print out the curring timings for each parser if built with -tags debug
func DumpDebugStats() {
sort.Slice(parsers, func(i, j int) bool {
@@ -82,6 +139,6 @@ func DumpDebugStats() {
fmt.Println("Parser stats:")
for _, parser := range parsers {
- fmt.Printf("%20s\t%10s\t%10d\tcalls\t%s\n", parser.Description, parser.Time.String(), parser.Calls, parser.Caller)
+ fmt.Printf("%20s\t%10s\t%10d\tcalls\t%s\n", parser.Name(), parser.Time.String(), parser.Calls, parser.Location)
}
}
diff --git a/html/html.go b/html/html.go
index 2bbe921..0def587 100644
--- a/html/html.go
+++ b/html/html.go
@@ -42,16 +42,16 @@ var (
return Result{Result: attr}
})
- tstart = Seq("<", identifier, attrs, ">")
- tend = Seq("</", identifier, ">")
+ tstart = Seq("<", Cut, identifier, attrs, ">")
+ tend = Seq("</", Cut, identifier, ">")
)
func init() {
tag = Map(Seq(tstart, elements, tend), func(node Result) Result {
openTag := node.Child[0]
return Result{Result: Tag{
- Name: openTag.Child[1].Token,
- Attributes: openTag.Child[2].Result.(map[string]string),
+ Name: openTag.Child[2].Token,
+ Attributes: openTag.Child[3].Result.(map[string]string),
Body: node.Child[1].Result.([]interface{}),
}}
diff --git a/json/json.go b/json/json.go
index 2748108..0d78cf5 100644
--- a/json/json.go
+++ b/json/json.go
@@ -1,6 +1,8 @@
package json
-import . "github.com/vektah/goparsify"
+import (
+ . "github.com/vektah/goparsify"
+)
var (
_value Parser
@@ -11,18 +13,18 @@ var (
_number = NumberLit()
_properties = Some(Seq(StringLit(`"`), ":", &_value), ",")
- _array = Map(Seq("[", Some(&_value, ","), "]"), func(n Result) Result {
+ _array = Map(Seq("[", Cut, Some(&_value, ","), "]"), func(n Result) Result {
ret := []interface{}{}
- for _, child := range n.Child[1].Child {
+ for _, child := range n.Child[2].Child {
ret = append(ret, child.Result)
}
return Result{Result: ret}
})
- _object = Map(Seq("{", _properties, "}"), func(n Result) Result {
+ _object = Map(Seq("{", Cut, _properties, "}"), func(n Result) Result {
ret := map[string]interface{}{}
- for _, prop := range n.Child[1].Child {
+ for _, prop := range n.Child[2].Child {
ret[prop.Child[0].Result.(string)] = prop.Child[2].Result
}
diff --git a/parser.go b/parser.go
index 7590ffa..7c3f866 100644
--- a/parser.go
+++ b/parser.go
@@ -52,7 +52,7 @@ type Parserish interface{}
func Parsify(p Parserish) Parser {
switch p := p.(type) {
case func(*State) Result:
- return NewParser("anonymous func", p)
+ return p
case Parser:
return p
case *Parser:
@@ -84,6 +84,13 @@ func WS() Parser {
})
}
+// Cut prevents backtracking beyond this point. Usually used after keywords when you
+// are sure this is the correct path. Improves performance and error reporting.
+func Cut(ps *State) Result {
+ ps.Cut = ps.Pos
+ return Result{}
+}
+
// Run applies some input to a parser and returns the result, failing if the input isnt fully consumed.
// It is a convenience method for the most common way to invoke a parser.
func Run(parser Parserish, input string) (result interface{}, err error) {
diff --git a/readme.md b/readme.md
index 967e5c5..9e712b0 100644
--- a/readme.md
+++ b/readme.md
@@ -1,12 +1,7 @@
goparsify [![CircleCI](https://circleci.com/gh/Vektah/goparsify/tree/master.svg?style=shield)](https://circleci.com/gh/Vektah/goparsify/tree/master) [![godoc](http://b.repl.ca/v1/godoc-reference-blue.png)](https://godoc.org/github.com/Vektah/goparsify)
=========
-A parser-combinator library for building easy to test, read and maintain parsers using functional composition.
-
-### todo
- - fatal errors: Some way for a parser to say "Ive found a good match, the input is broken, stop here with an error"
- - better errors: currently only the longest error is returned, but it would be nice to show all expected tokens that could follow.
-
+A parser-combinator library for building easy to test, read and maintain parsers using functional composition.
### benchmarks
I dont have many benchmarks set up yet, but the json parser is very promising. Nearly keeping up with the stdlib for raw speed:
@@ -19,7 +14,70 @@ PASS
ok github.com/vektah/goparsify/json 10.840s
```
-### debugging mode
+### debugging parsers
+
+When a parser isnt working as you intended you can build with debugging and enable logging to get a detailed log of exactly what the parser is doing.
+
+1. First build with debug using `-tags debug`
+2. enable logging by passing a runtime flag -parselogs or calling `EnableLogging(os.Stdout)` in your code.
+
+This works great with tests, eg in the goparsify source tree
+```
+$ cd html
+$ go test -tags debug -parselogs
+html.go:50 | <body>hello <p | | tag
+html.go:45 | <body>hello <p | | tstart
+html.go:45 | body>hello <p c | < | <
+html.go:20 | >hello <p color | body | identifier
+html.go:35 | >hello <p color | | attrs
+html.go:34 | >hello <p color | | attr
+html.go:20 | >hello <p color | fail | identifier
+html.go:45 | hello <p color= | > | >
+html.go:26 | hello <p color= | | elements
+html.go:25 | hello <p color= | | element
+html.go:21 | <p color="blue" | hello | text
+html.go:25 | <p color="blue" | | element
+html.go:21 | <p color="blue" | fail | text
+html.go:50 | <p color="blue" | | tag
+html.go:45 | <p color="blue" | | tstart
+html.go:45 | p color="blue"> | < | <
+html.go:20 | color="blue">w | p | identifier
+html.go:35 | color="blue">w | | attrs
+html.go:34 | color="blue">w | | attr
+html.go:20 | ="blue">world</ | color | identifier
+html.go:34 | "blue">world</p | = | =
+html.go:34 | >world</p></bod | | string literal
+html.go:34 | >world</p></bod | | attr
+html.go:20 | >world</p></bod | fail | identifier
+html.go:45 | world</p></body | > | >
+html.go:26 | world</p></body | | elements
+html.go:25 | world</p></body | | element
+html.go:21 | </p></body> | world | text
+html.go:25 | </p></body> | | element
+html.go:21 | </p></body> | fail | text
+html.go:50 | </p></body> | | tag
+html.go:45 | </p></body> | | tstart
+html.go:45 | /p></body> | < | <
+html.go:20 | /p></body> | fail | identifier
+html.go:46 | </p></body> | | tend
+html.go:46 | p></body> | </ | </
+html.go:20 | ></body> | p | identifier
+html.go:46 | </body> | > | >
+html.go:25 | </body> | | element
+html.go:21 | </body> | fail | text
+html.go:50 | </body> | | tag
+html.go:45 | </body> | | tstart
+html.go:45 | /body> | < | <
+html.go:20 | /body> | fail | identifier
+html.go:46 | </body> | | tend
+html.go:46 | body> | </ | </
+html.go:20 | > | body | identifier
+html.go:46 | | > | >
+PASS
+ok github.com/vektah/goparsify/html 0.118s
+```
+
+### debugging performance
If you build the parser with -tags debug it will instrument each parser and a call to DumpDebugStats() will show stats:
```
Any() 415.7136ms 87000 calls json.go:35
diff --git a/state.go b/state.go
index 3c7de43..384eb32 100644
--- a/state.go
+++ b/state.go
@@ -25,6 +25,8 @@ type State struct {
Input string
// An offset into the string, pointing to the current tip
Pos int
+ // Do not backtrack past this point
+ Cut int
// Error is a secondary return channel from parsers, but used so heavily
// in backtracking that it has been inlined to avoid allocations.
Error Error
@@ -88,6 +90,18 @@ func (s *State) Get() string {
return s.Input[s.Pos:]
}
+// Preview of the the next x characters
+func (s *State) Preview(x int) string {
+ if s.Pos > len(s.Input) {
+ return ""
+ }
+ if len(s.Input)-s.Pos >= x {
+ return s.Input[s.Pos : s.Pos+x]
+ }
+
+ return s.Input[s.Pos:]
+}
+
// ErrorHere raises an error at the current position.
func (s *State) ErrorHere(expected string) {
s.Error.pos = s.Pos