Add parse logging

This commit is contained in:
Adam Scarr 2017-08-10 21:04:14 +10:00
parent b64fcfaa61
commit af542eff9e
11 changed files with 306 additions and 57 deletions

View File

@ -45,6 +45,10 @@ func Any(parsers ...Parserish) Parser {
for _, parser := range parserfied { for _, parser := range parserfied {
node := parser(ps) node := parser(ps)
if ps.Errored() { if ps.Errored() {
if ps.Cut > startpos {
longestError = ps.Error
break
}
if ps.Error.pos > longestError.pos { if ps.Error.pos > longestError.pos {
longestError = ps.Error longestError = ps.Error
} }
@ -142,13 +146,13 @@ func Bind(parser Parserish, val interface{}) Parser {
func Map(parser Parserish, f func(n Result) Result) Parser { func Map(parser Parserish, f func(n Result) Result) Parser {
p := Parsify(parser) p := Parsify(parser)
return NewParser("Map()", func(ps *State) Result { return func(ps *State) Result {
node := p(ps) node := p(ps)
if ps.Errored() { if ps.Errored() {
return node return node
} }
return f(node) return f(node)
}) }
} }
func flatten(n Result) string { func flatten(n Result) string {
@ -169,7 +173,7 @@ func flatten(n Result) string {
// Merge all child Tokens together recursively // Merge all child Tokens together recursively
func Merge(parser Parserish) Parser { func Merge(parser Parserish) Parser {
return NewParser("Merge()", Map(parser, func(n Result) Result { return Map(parser, func(n Result) Result {
return Result{Token: flatten(n)} return Result{Token: flatten(n)}
})) })
} }

View File

@ -162,6 +162,15 @@ func TestBind(t *testing.T) {
}) })
} }
func TestCut(t *testing.T) {
// does backtracking happen anywhere else?
t.Run("test any", func(t *testing.T) {
_, ps := runParser("var world", Any(Seq("var", Cut, "hello"), "var world"))
require.Equal(t, "offset 4: expected hello", ps.Error.Error())
require.Equal(t, 0, ps.Pos)
})
}
func TestMerge(t *testing.T) { func TestMerge(t *testing.T) {
var bracer Parser var bracer Parser
bracer = Seq("(", Maybe(&bracer), ")") bracer = Seq("(", Maybe(&bracer), ")")

66
debug/frames.go Normal file
View File

@ -0,0 +1,66 @@
package debug
import (
"bufio"
"fmt"
"os"
"path/filepath"
"regexp"
"runtime"
"strings"
)
var varRegex = regexp.MustCompile(`(?:var)?\s*(\w*)\s*:?=`)
func getPackageName(f runtime.Frame) string {
parts := strings.Split(f.Func.Name(), ".")
pl := len(parts)
if parts[pl-2][0] == '(' {
return strings.Join(parts[0:pl-2], ".")
} else {
return strings.Join(parts[0:pl-1], ".")
}
}
func getVarName(filename string, lineNo int) string {
f, err := os.Open(filename)
if err != nil {
return ""
}
scanner := bufio.NewScanner(f)
for i := 0; i < lineNo; i++ {
scanner.Scan()
}
line := scanner.Text()
if matches := varRegex.FindStringSubmatch(line); matches != nil {
return matches[1]
}
return ""
}
// GetDefinition returns the name of the variable and location this parser was defined by walking up the stack
func GetDefinition() (varName string, location string) {
pc := make([]uintptr, 64)
n := runtime.Callers(3, pc)
frames := runtime.CallersFrames(pc[:n])
var frame runtime.Frame
more := true
for more {
frame, more = frames.Next()
pkg := getPackageName(frame)
if pkg == "github.com/vektah/goparsify" || pkg == "github.com/vektah/goparsify/debug" {
continue
}
varName := getVarName(frame.File, frame.Line)
if varName != "" {
return varName, fmt.Sprintf("%s:%d", filepath.Base(frame.File), frame.Line)
}
}
return "", ""
}

24
debug/frames_test.go Normal file
View File

@ -0,0 +1,24 @@
package debug
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestRegex(t *testing.T) {
tests := map[string]string{
"attrs": ` attrs = Map(Some(attr), func(node Result) Result {`,
"_value": ` _value = Any(_null, _true, _false, _string, _number, _array, _object)`,
"_object": `_object = Map(Seq("{", Cut, _properties, "}"), func(n Result) Result {`,
"expr": `var expr = Exact("foo")`,
"number": `number := NumberLit()`,
}
for expected, input := range tests {
t.Run(input, func(t *testing.T) {
matches := varRegex.FindStringSubmatch(input)
require.NotNil(t, matches)
require.Equal(t, expected, matches[1])
})
}
}

View File

@ -2,6 +2,8 @@
package goparsify package goparsify
import "io"
// NewParser should be called around the creation of every Parser. // NewParser should be called around the creation of every Parser.
// It does nothing normally and should incur no runtime overhead, but when building with -tags debug // It does nothing normally and should incur no runtime overhead, but when building with -tags debug
// it will instrument every parser to collect valuable timing information displayable with DumpDebugStats. // it will instrument every parser to collect valuable timing information displayable with DumpDebugStats.
@ -11,3 +13,9 @@ func NewParser(description string, p Parser) Parser {
// DumpDebugStats will print out the curring timings for each parser if built with -tags debug // DumpDebugStats will print out the curring timings for each parser if built with -tags debug
func DumpDebugStats() {} func DumpDebugStats() {}
// EnableLogging will write logs to the given writer as the next parse happens
func EnableLogging(w io.Writer) {}
// DisableLogging will stop writing logs
func DisableLogging() {}

View File

@ -3,77 +3,134 @@
package goparsify package goparsify
import ( import (
"bytes"
"flag"
"fmt" "fmt"
"runtime" "io"
"os"
"sort" "sort"
"strconv"
"strings" "strings"
"time" "time"
"github.com/vektah/goparsify/debug"
) )
var log io.Writer = nil
var parsers []*debugParser var parsers []*debugParser
var pendingOpenLog = ""
var activeParsers []*debugParser
var longestLocation = 0
type debugParser struct { type debugParser struct {
Description string Match string
Caller string Var string
Location string
Next Parser Next Parser
Time time.Duration Time time.Duration
Calls int Calls int
} }
func (dp *debugParser) Name() string {
if len(activeParsers) > 2 && activeParsers[len(activeParsers)-2].Var == dp.Var {
return dp.Match
}
return dp.Var
}
func (dp *debugParser) logf(ps *State, result *Result, format string, args ...interface{}) string {
buf := &bytes.Buffer{}
buf.WriteString(fmt.Sprintf("%"+strconv.Itoa(longestLocation)+"s | ", dp.Location))
buf.WriteString(fmt.Sprintf("%-15s", ps.Preview(15)))
buf.WriteString(" | ")
output := ""
if ps.Errored() {
output = "fail"
} else if result != nil {
output = result.Token
}
buf.WriteString(fmt.Sprintf("%-10s | ", output))
buf.WriteString(strings.Repeat(" ", len(activeParsers)-1))
buf.WriteString(fmt.Sprintf(format, args...))
buf.WriteRune('\n')
return buf.String()
}
func (dp *debugParser) logStart(ps *State) {
if log != nil {
if pendingOpenLog != "" {
fmt.Fprint(log, pendingOpenLog)
pendingOpenLog = ""
}
pendingOpenLog = dp.logf(ps, nil, dp.Name())
}
}
func (dp *debugParser) logEnd(ps *State, result *Result) {
if log != nil {
if pendingOpenLog != "" {
fmt.Fprintf(log, dp.logf(ps, result, dp.Name()))
pendingOpenLog = ""
}
}
}
func (dp *debugParser) Parse(ps *State) Result { func (dp *debugParser) Parse(ps *State) Result {
activeParsers = append(activeParsers, dp)
start := time.Now() start := time.Now()
dp.logStart(ps)
ret := dp.Next(ps) ret := dp.Next(ps)
dp.logEnd(ps, &ret)
dp.Time = dp.Time + time.Since(start) dp.Time = dp.Time + time.Since(start)
dp.Calls++ dp.Calls++
activeParsers = activeParsers[0 : len(activeParsers)-1]
return ret return ret
} }
func getPackageName(f *runtime.Func) string { func init() {
parts := strings.Split(f.Name(), ".") logs := flag.Bool("parselogs", false, "print detailed parsing logs")
pl := len(parts) flag.Parse()
if parts[pl-2][0] == '(' { if *logs {
return strings.Join(parts[0:pl-2], ".") EnableLogging(os.Stdout)
} else {
return strings.Join(parts[0:pl-1], ".")
} }
} }
// NewParser should be called around the creation of every Parser. // NewParser should be called around the creation of every Parser.
// It does nothing normally and should incur no runtime overhead, but when building with -tags debug // It does nothing normally and should incur no runtime overhead, but when building with -tags debug
// it will instrument every parser to collect valuable timing information displayable with DumpDebugStats. // it will instrument every parser to collect valuable timing and debug information.
func NewParser(description string, p Parser) Parser { func NewParser(name string, p Parser) Parser {
fpcs := make([]uintptr, 1) description, location := debug.GetDefinition()
caller := ""
for i := 1; i < 10; i++ {
n := runtime.Callers(i, fpcs)
if n != 0 {
frame := runtime.FuncForPC(fpcs[0] - 1)
pkg := getPackageName(frame)
if pkg != "github.com/vektah/goparsify" {
file, line := frame.FileLine(fpcs[0] - 1)
caller = fmt.Sprintf("%s:%d", file, line)
break
}
}
}
dp := &debugParser{ dp := &debugParser{
Description: description, Match: name,
Var: description,
Location: location,
Next: p, Next: p,
Caller: caller, }
if len(dp.Location) > longestLocation {
longestLocation = len(dp.Location)
} }
parsers = append(parsers, dp) parsers = append(parsers, dp)
return dp.Parse return dp.Parse
} }
// EnableLogging will write logs to the given writer as the next parse happens
func EnableLogging(w io.Writer) {
log = w
}
// DisableLogging will stop writing logs
func DisableLogging() {
log = nil
}
// DumpDebugStats will print out the curring timings for each parser if built with -tags debug // DumpDebugStats will print out the curring timings for each parser if built with -tags debug
func DumpDebugStats() { func DumpDebugStats() {
sort.Slice(parsers, func(i, j int) bool { sort.Slice(parsers, func(i, j int) bool {
@ -82,6 +139,6 @@ func DumpDebugStats() {
fmt.Println("Parser stats:") fmt.Println("Parser stats:")
for _, parser := range parsers { for _, parser := range parsers {
fmt.Printf("%20s\t%10s\t%10d\tcalls\t%s\n", parser.Description, parser.Time.String(), parser.Calls, parser.Caller) fmt.Printf("%20s\t%10s\t%10d\tcalls\t%s\n", parser.Name(), parser.Time.String(), parser.Calls, parser.Location)
} }
} }

View File

@ -42,16 +42,16 @@ var (
return Result{Result: attr} return Result{Result: attr}
}) })
tstart = Seq("<", identifier, attrs, ">") tstart = Seq("<", Cut, identifier, attrs, ">")
tend = Seq("</", identifier, ">") tend = Seq("</", Cut, identifier, ">")
) )
func init() { func init() {
tag = Map(Seq(tstart, elements, tend), func(node Result) Result { tag = Map(Seq(tstart, elements, tend), func(node Result) Result {
openTag := node.Child[0] openTag := node.Child[0]
return Result{Result: Tag{ return Result{Result: Tag{
Name: openTag.Child[1].Token, Name: openTag.Child[2].Token,
Attributes: openTag.Child[2].Result.(map[string]string), Attributes: openTag.Child[3].Result.(map[string]string),
Body: node.Child[1].Result.([]interface{}), Body: node.Child[1].Result.([]interface{}),
}} }}

View File

@ -1,6 +1,8 @@
package json package json
import . "github.com/vektah/goparsify" import (
. "github.com/vektah/goparsify"
)
var ( var (
_value Parser _value Parser
@ -11,18 +13,18 @@ var (
_number = NumberLit() _number = NumberLit()
_properties = Some(Seq(StringLit(`"`), ":", &_value), ",") _properties = Some(Seq(StringLit(`"`), ":", &_value), ",")
_array = Map(Seq("[", Some(&_value, ","), "]"), func(n Result) Result { _array = Map(Seq("[", Cut, Some(&_value, ","), "]"), func(n Result) Result {
ret := []interface{}{} ret := []interface{}{}
for _, child := range n.Child[1].Child { for _, child := range n.Child[2].Child {
ret = append(ret, child.Result) ret = append(ret, child.Result)
} }
return Result{Result: ret} return Result{Result: ret}
}) })
_object = Map(Seq("{", _properties, "}"), func(n Result) Result { _object = Map(Seq("{", Cut, _properties, "}"), func(n Result) Result {
ret := map[string]interface{}{} ret := map[string]interface{}{}
for _, prop := range n.Child[1].Child { for _, prop := range n.Child[2].Child {
ret[prop.Child[0].Result.(string)] = prop.Child[2].Result ret[prop.Child[0].Result.(string)] = prop.Child[2].Result
} }

View File

@ -52,7 +52,7 @@ type Parserish interface{}
func Parsify(p Parserish) Parser { func Parsify(p Parserish) Parser {
switch p := p.(type) { switch p := p.(type) {
case func(*State) Result: case func(*State) Result:
return NewParser("anonymous func", p) return p
case Parser: case Parser:
return p return p
case *Parser: case *Parser:
@ -84,6 +84,13 @@ func WS() Parser {
}) })
} }
// Cut prevents backtracking beyond this point. Usually used after keywords when you
// are sure this is the correct path. Improves performance and error reporting.
func Cut(ps *State) Result {
ps.Cut = ps.Pos
return Result{}
}
// Run applies some input to a parser and returns the result, failing if the input isnt fully consumed. // Run applies some input to a parser and returns the result, failing if the input isnt fully consumed.
// It is a convenience method for the most common way to invoke a parser. // It is a convenience method for the most common way to invoke a parser.
func Run(parser Parserish, input string) (result interface{}, err error) { func Run(parser Parserish, input string) (result interface{}, err error) {

View File

@ -3,11 +3,6 @@ goparsify [![CircleCI](https://circleci.com/gh/Vektah/goparsify/tree/master.svg?
A parser-combinator library for building easy to test, read and maintain parsers using functional composition. A parser-combinator library for building easy to test, read and maintain parsers using functional composition.
### todo
- fatal errors: Some way for a parser to say "Ive found a good match, the input is broken, stop here with an error"
- better errors: currently only the longest error is returned, but it would be nice to show all expected tokens that could follow.
### benchmarks ### benchmarks
I dont have many benchmarks set up yet, but the json parser is very promising. Nearly keeping up with the stdlib for raw speed: I dont have many benchmarks set up yet, but the json parser is very promising. Nearly keeping up with the stdlib for raw speed:
``` ```
@ -19,7 +14,70 @@ PASS
ok github.com/vektah/goparsify/json 10.840s ok github.com/vektah/goparsify/json 10.840s
``` ```
### debugging mode ### debugging parsers
When a parser isnt working as you intended you can build with debugging and enable logging to get a detailed log of exactly what the parser is doing.
1. First build with debug using `-tags debug`
2. enable logging by passing a runtime flag -parselogs or calling `EnableLogging(os.Stdout)` in your code.
This works great with tests, eg in the goparsify source tree
```
$ cd html
$ go test -tags debug -parselogs
html.go:50 | <body>hello <p | | tag
html.go:45 | <body>hello <p | | tstart
html.go:45 | body>hello <p c | < | <
html.go:20 | >hello <p color | body | identifier
html.go:35 | >hello <p color | | attrs
html.go:34 | >hello <p color | | attr
html.go:20 | >hello <p color | fail | identifier
html.go:45 | hello <p color= | > | >
html.go:26 | hello <p color= | | elements
html.go:25 | hello <p color= | | element
html.go:21 | <p color="blue" | hello | text
html.go:25 | <p color="blue" | | element
html.go:21 | <p color="blue" | fail | text
html.go:50 | <p color="blue" | | tag
html.go:45 | <p color="blue" | | tstart
html.go:45 | p color="blue"> | < | <
html.go:20 | color="blue">w | p | identifier
html.go:35 | color="blue">w | | attrs
html.go:34 | color="blue">w | | attr
html.go:20 | ="blue">world</ | color | identifier
html.go:34 | "blue">world</p | = | =
html.go:34 | >world</p></bod | | string literal
html.go:34 | >world</p></bod | | attr
html.go:20 | >world</p></bod | fail | identifier
html.go:45 | world</p></body | > | >
html.go:26 | world</p></body | | elements
html.go:25 | world</p></body | | element
html.go:21 | </p></body> | world | text
html.go:25 | </p></body> | | element
html.go:21 | </p></body> | fail | text
html.go:50 | </p></body> | | tag
html.go:45 | </p></body> | | tstart
html.go:45 | /p></body> | < | <
html.go:20 | /p></body> | fail | identifier
html.go:46 | </p></body> | | tend
html.go:46 | p></body> | </ | </
html.go:20 | ></body> | p | identifier
html.go:46 | </body> | > | >
html.go:25 | </body> | | element
html.go:21 | </body> | fail | text
html.go:50 | </body> | | tag
html.go:45 | </body> | | tstart
html.go:45 | /body> | < | <
html.go:20 | /body> | fail | identifier
html.go:46 | </body> | | tend
html.go:46 | body> | </ | </
html.go:20 | > | body | identifier
html.go:46 | | > | >
PASS
ok github.com/vektah/goparsify/html 0.118s
```
### debugging performance
If you build the parser with -tags debug it will instrument each parser and a call to DumpDebugStats() will show stats: If you build the parser with -tags debug it will instrument each parser and a call to DumpDebugStats() will show stats:
``` ```
Any() 415.7136ms 87000 calls json.go:35 Any() 415.7136ms 87000 calls json.go:35

View File

@ -25,6 +25,8 @@ type State struct {
Input string Input string
// An offset into the string, pointing to the current tip // An offset into the string, pointing to the current tip
Pos int Pos int
// Do not backtrack past this point
Cut int
// Error is a secondary return channel from parsers, but used so heavily // Error is a secondary return channel from parsers, but used so heavily
// in backtracking that it has been inlined to avoid allocations. // in backtracking that it has been inlined to avoid allocations.
Error Error Error Error
@ -88,6 +90,18 @@ func (s *State) Get() string {
return s.Input[s.Pos:] return s.Input[s.Pos:]
} }
// Preview of the the next x characters
func (s *State) Preview(x int) string {
if s.Pos > len(s.Input) {
return ""
}
if len(s.Input)-s.Pos >= x {
return s.Input[s.Pos : s.Pos+x]
}
return s.Input[s.Pos:]
}
// ErrorHere raises an error at the current position. // ErrorHere raises an error at the current position.
func (s *State) ErrorHere(expected string) { func (s *State) ErrorHere(expected string) {
s.Error.pos = s.Pos s.Error.pos = s.Pos