Add parse logging

This commit is contained in:
Adam Scarr 2017-08-10 21:04:14 +10:00
parent b64fcfaa61
commit af542eff9e
11 changed files with 306 additions and 57 deletions

View File

@ -45,6 +45,10 @@ func Any(parsers ...Parserish) Parser {
for _, parser := range parserfied {
node := parser(ps)
if ps.Errored() {
if ps.Cut > startpos {
longestError = ps.Error
break
}
if ps.Error.pos > longestError.pos {
longestError = ps.Error
}
@ -142,13 +146,13 @@ func Bind(parser Parserish, val interface{}) Parser {
func Map(parser Parserish, f func(n Result) Result) Parser {
p := Parsify(parser)
return NewParser("Map()", func(ps *State) Result {
return func(ps *State) Result {
node := p(ps)
if ps.Errored() {
return node
}
return f(node)
})
}
}
func flatten(n Result) string {
@ -169,7 +173,7 @@ func flatten(n Result) string {
// Merge all child Tokens together recursively
func Merge(parser Parserish) Parser {
return NewParser("Merge()", Map(parser, func(n Result) Result {
return Map(parser, func(n Result) Result {
return Result{Token: flatten(n)}
}))
})
}

View File

@ -162,6 +162,15 @@ func TestBind(t *testing.T) {
})
}
func TestCut(t *testing.T) {
// does backtracking happen anywhere else?
t.Run("test any", func(t *testing.T) {
_, ps := runParser("var world", Any(Seq("var", Cut, "hello"), "var world"))
require.Equal(t, "offset 4: expected hello", ps.Error.Error())
require.Equal(t, 0, ps.Pos)
})
}
func TestMerge(t *testing.T) {
var bracer Parser
bracer = Seq("(", Maybe(&bracer), ")")

66
debug/frames.go Normal file
View File

@ -0,0 +1,66 @@
package debug
import (
"bufio"
"fmt"
"os"
"path/filepath"
"regexp"
"runtime"
"strings"
)
var varRegex = regexp.MustCompile(`(?:var)?\s*(\w*)\s*:?=`)
func getPackageName(f runtime.Frame) string {
parts := strings.Split(f.Func.Name(), ".")
pl := len(parts)
if parts[pl-2][0] == '(' {
return strings.Join(parts[0:pl-2], ".")
} else {
return strings.Join(parts[0:pl-1], ".")
}
}
func getVarName(filename string, lineNo int) string {
f, err := os.Open(filename)
if err != nil {
return ""
}
scanner := bufio.NewScanner(f)
for i := 0; i < lineNo; i++ {
scanner.Scan()
}
line := scanner.Text()
if matches := varRegex.FindStringSubmatch(line); matches != nil {
return matches[1]
}
return ""
}
// GetDefinition returns the name of the variable and location this parser was defined by walking up the stack
func GetDefinition() (varName string, location string) {
pc := make([]uintptr, 64)
n := runtime.Callers(3, pc)
frames := runtime.CallersFrames(pc[:n])
var frame runtime.Frame
more := true
for more {
frame, more = frames.Next()
pkg := getPackageName(frame)
if pkg == "github.com/vektah/goparsify" || pkg == "github.com/vektah/goparsify/debug" {
continue
}
varName := getVarName(frame.File, frame.Line)
if varName != "" {
return varName, fmt.Sprintf("%s:%d", filepath.Base(frame.File), frame.Line)
}
}
return "", ""
}

24
debug/frames_test.go Normal file
View File

@ -0,0 +1,24 @@
package debug
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestRegex(t *testing.T) {
tests := map[string]string{
"attrs": ` attrs = Map(Some(attr), func(node Result) Result {`,
"_value": ` _value = Any(_null, _true, _false, _string, _number, _array, _object)`,
"_object": `_object = Map(Seq("{", Cut, _properties, "}"), func(n Result) Result {`,
"expr": `var expr = Exact("foo")`,
"number": `number := NumberLit()`,
}
for expected, input := range tests {
t.Run(input, func(t *testing.T) {
matches := varRegex.FindStringSubmatch(input)
require.NotNil(t, matches)
require.Equal(t, expected, matches[1])
})
}
}

View File

@ -2,6 +2,8 @@
package goparsify
import "io"
// NewParser should be called around the creation of every Parser.
// It does nothing normally and should incur no runtime overhead, but when building with -tags debug
// it will instrument every parser to collect valuable timing information displayable with DumpDebugStats.
@ -11,3 +13,9 @@ func NewParser(description string, p Parser) Parser {
// DumpDebugStats will print out the curring timings for each parser if built with -tags debug
func DumpDebugStats() {}
// EnableLogging will write logs to the given writer as the next parse happens
func EnableLogging(w io.Writer) {}
// DisableLogging will stop writing logs
func DisableLogging() {}

View File

@ -3,77 +3,134 @@
package goparsify
import (
"bytes"
"flag"
"fmt"
"runtime"
"io"
"os"
"sort"
"strconv"
"strings"
"time"
"github.com/vektah/goparsify/debug"
)
var log io.Writer = nil
var parsers []*debugParser
var pendingOpenLog = ""
var activeParsers []*debugParser
var longestLocation = 0
type debugParser struct {
Description string
Caller string
Next Parser
Time time.Duration
Calls int
Match string
Var string
Location string
Next Parser
Time time.Duration
Calls int
}
func (dp *debugParser) Name() string {
if len(activeParsers) > 2 && activeParsers[len(activeParsers)-2].Var == dp.Var {
return dp.Match
}
return dp.Var
}
func (dp *debugParser) logf(ps *State, result *Result, format string, args ...interface{}) string {
buf := &bytes.Buffer{}
buf.WriteString(fmt.Sprintf("%"+strconv.Itoa(longestLocation)+"s | ", dp.Location))
buf.WriteString(fmt.Sprintf("%-15s", ps.Preview(15)))
buf.WriteString(" | ")
output := ""
if ps.Errored() {
output = "fail"
} else if result != nil {
output = result.Token
}
buf.WriteString(fmt.Sprintf("%-10s | ", output))
buf.WriteString(strings.Repeat(" ", len(activeParsers)-1))
buf.WriteString(fmt.Sprintf(format, args...))
buf.WriteRune('\n')
return buf.String()
}
func (dp *debugParser) logStart(ps *State) {
if log != nil {
if pendingOpenLog != "" {
fmt.Fprint(log, pendingOpenLog)
pendingOpenLog = ""
}
pendingOpenLog = dp.logf(ps, nil, dp.Name())
}
}
func (dp *debugParser) logEnd(ps *State, result *Result) {
if log != nil {
if pendingOpenLog != "" {
fmt.Fprintf(log, dp.logf(ps, result, dp.Name()))
pendingOpenLog = ""
}
}
}
func (dp *debugParser) Parse(ps *State) Result {
activeParsers = append(activeParsers, dp)
start := time.Now()
dp.logStart(ps)
ret := dp.Next(ps)
dp.logEnd(ps, &ret)
dp.Time = dp.Time + time.Since(start)
dp.Calls++
activeParsers = activeParsers[0 : len(activeParsers)-1]
return ret
}
func getPackageName(f *runtime.Func) string {
parts := strings.Split(f.Name(), ".")
pl := len(parts)
func init() {
logs := flag.Bool("parselogs", false, "print detailed parsing logs")
flag.Parse()
if parts[pl-2][0] == '(' {
return strings.Join(parts[0:pl-2], ".")
} else {
return strings.Join(parts[0:pl-1], ".")
if *logs {
EnableLogging(os.Stdout)
}
}
// NewParser should be called around the creation of every Parser.
// It does nothing normally and should incur no runtime overhead, but when building with -tags debug
// it will instrument every parser to collect valuable timing information displayable with DumpDebugStats.
func NewParser(description string, p Parser) Parser {
fpcs := make([]uintptr, 1)
caller := ""
for i := 1; i < 10; i++ {
n := runtime.Callers(i, fpcs)
if n != 0 {
frame := runtime.FuncForPC(fpcs[0] - 1)
pkg := getPackageName(frame)
if pkg != "github.com/vektah/goparsify" {
file, line := frame.FileLine(fpcs[0] - 1)
caller = fmt.Sprintf("%s:%d", file, line)
break
}
}
}
// it will instrument every parser to collect valuable timing and debug information.
func NewParser(name string, p Parser) Parser {
description, location := debug.GetDefinition()
dp := &debugParser{
Description: description,
Next: p,
Caller: caller,
Match: name,
Var: description,
Location: location,
Next: p,
}
if len(dp.Location) > longestLocation {
longestLocation = len(dp.Location)
}
parsers = append(parsers, dp)
return dp.Parse
}
// EnableLogging will write logs to the given writer as the next parse happens
func EnableLogging(w io.Writer) {
log = w
}
// DisableLogging will stop writing logs
func DisableLogging() {
log = nil
}
// DumpDebugStats will print out the curring timings for each parser if built with -tags debug
func DumpDebugStats() {
sort.Slice(parsers, func(i, j int) bool {
@ -82,6 +139,6 @@ func DumpDebugStats() {
fmt.Println("Parser stats:")
for _, parser := range parsers {
fmt.Printf("%20s\t%10s\t%10d\tcalls\t%s\n", parser.Description, parser.Time.String(), parser.Calls, parser.Caller)
fmt.Printf("%20s\t%10s\t%10d\tcalls\t%s\n", parser.Name(), parser.Time.String(), parser.Calls, parser.Location)
}
}

View File

@ -42,16 +42,16 @@ var (
return Result{Result: attr}
})
tstart = Seq("<", identifier, attrs, ">")
tend = Seq("</", identifier, ">")
tstart = Seq("<", Cut, identifier, attrs, ">")
tend = Seq("</", Cut, identifier, ">")
)
func init() {
tag = Map(Seq(tstart, elements, tend), func(node Result) Result {
openTag := node.Child[0]
return Result{Result: Tag{
Name: openTag.Child[1].Token,
Attributes: openTag.Child[2].Result.(map[string]string),
Name: openTag.Child[2].Token,
Attributes: openTag.Child[3].Result.(map[string]string),
Body: node.Child[1].Result.([]interface{}),
}}

View File

@ -1,6 +1,8 @@
package json
import . "github.com/vektah/goparsify"
import (
. "github.com/vektah/goparsify"
)
var (
_value Parser
@ -11,18 +13,18 @@ var (
_number = NumberLit()
_properties = Some(Seq(StringLit(`"`), ":", &_value), ",")
_array = Map(Seq("[", Some(&_value, ","), "]"), func(n Result) Result {
_array = Map(Seq("[", Cut, Some(&_value, ","), "]"), func(n Result) Result {
ret := []interface{}{}
for _, child := range n.Child[1].Child {
for _, child := range n.Child[2].Child {
ret = append(ret, child.Result)
}
return Result{Result: ret}
})
_object = Map(Seq("{", _properties, "}"), func(n Result) Result {
_object = Map(Seq("{", Cut, _properties, "}"), func(n Result) Result {
ret := map[string]interface{}{}
for _, prop := range n.Child[1].Child {
for _, prop := range n.Child[2].Child {
ret[prop.Child[0].Result.(string)] = prop.Child[2].Result
}

View File

@ -52,7 +52,7 @@ type Parserish interface{}
func Parsify(p Parserish) Parser {
switch p := p.(type) {
case func(*State) Result:
return NewParser("anonymous func", p)
return p
case Parser:
return p
case *Parser:
@ -84,6 +84,13 @@ func WS() Parser {
})
}
// Cut prevents backtracking beyond this point. Usually used after keywords when you
// are sure this is the correct path. Improves performance and error reporting.
func Cut(ps *State) Result {
ps.Cut = ps.Pos
return Result{}
}
// Run applies some input to a parser and returns the result, failing if the input isnt fully consumed.
// It is a convenience method for the most common way to invoke a parser.
func Run(parser Parserish, input string) (result interface{}, err error) {

View File

@ -1,12 +1,7 @@
goparsify [![CircleCI](https://circleci.com/gh/Vektah/goparsify/tree/master.svg?style=shield)](https://circleci.com/gh/Vektah/goparsify/tree/master) [![godoc](http://b.repl.ca/v1/godoc-reference-blue.png)](https://godoc.org/github.com/Vektah/goparsify)
=========
A parser-combinator library for building easy to test, read and maintain parsers using functional composition.
### todo
- fatal errors: Some way for a parser to say "Ive found a good match, the input is broken, stop here with an error"
- better errors: currently only the longest error is returned, but it would be nice to show all expected tokens that could follow.
A parser-combinator library for building easy to test, read and maintain parsers using functional composition.
### benchmarks
I dont have many benchmarks set up yet, but the json parser is very promising. Nearly keeping up with the stdlib for raw speed:
@ -19,7 +14,70 @@ PASS
ok github.com/vektah/goparsify/json 10.840s
```
### debugging mode
### debugging parsers
When a parser isnt working as you intended you can build with debugging and enable logging to get a detailed log of exactly what the parser is doing.
1. First build with debug using `-tags debug`
2. enable logging by passing a runtime flag -parselogs or calling `EnableLogging(os.Stdout)` in your code.
This works great with tests, eg in the goparsify source tree
```
$ cd html
$ go test -tags debug -parselogs
html.go:50 | <body>hello <p | | tag
html.go:45 | <body>hello <p | | tstart
html.go:45 | body>hello <p c | < | <
html.go:20 | >hello <p color | body | identifier
html.go:35 | >hello <p color | | attrs
html.go:34 | >hello <p color | | attr
html.go:20 | >hello <p color | fail | identifier
html.go:45 | hello <p color= | > | >
html.go:26 | hello <p color= | | elements
html.go:25 | hello <p color= | | element
html.go:21 | <p color="blue" | hello | text
html.go:25 | <p color="blue" | | element
html.go:21 | <p color="blue" | fail | text
html.go:50 | <p color="blue" | | tag
html.go:45 | <p color="blue" | | tstart
html.go:45 | p color="blue"> | < | <
html.go:20 | color="blue">w | p | identifier
html.go:35 | color="blue">w | | attrs
html.go:34 | color="blue">w | | attr
html.go:20 | ="blue">world</ | color | identifier
html.go:34 | "blue">world</p | = | =
html.go:34 | >world</p></bod | | string literal
html.go:34 | >world</p></bod | | attr
html.go:20 | >world</p></bod | fail | identifier
html.go:45 | world</p></body | > | >
html.go:26 | world</p></body | | elements
html.go:25 | world</p></body | | element
html.go:21 | </p></body> | world | text
html.go:25 | </p></body> | | element
html.go:21 | </p></body> | fail | text
html.go:50 | </p></body> | | tag
html.go:45 | </p></body> | | tstart
html.go:45 | /p></body> | < | <
html.go:20 | /p></body> | fail | identifier
html.go:46 | </p></body> | | tend
html.go:46 | p></body> | </ | </
html.go:20 | ></body> | p | identifier
html.go:46 | </body> | > | >
html.go:25 | </body> | | element
html.go:21 | </body> | fail | text
html.go:50 | </body> | | tag
html.go:45 | </body> | | tstart
html.go:45 | /body> | < | <
html.go:20 | /body> | fail | identifier
html.go:46 | </body> | | tend
html.go:46 | body> | </ | </
html.go:20 | > | body | identifier
html.go:46 | | > | >
PASS
ok github.com/vektah/goparsify/html 0.118s
```
### debugging performance
If you build the parser with -tags debug it will instrument each parser and a call to DumpDebugStats() will show stats:
```
Any() 415.7136ms 87000 calls json.go:35

View File

@ -25,6 +25,8 @@ type State struct {
Input string
// An offset into the string, pointing to the current tip
Pos int
// Do not backtrack past this point
Cut int
// Error is a secondary return channel from parsers, but used so heavily
// in backtracking that it has been inlined to avoid allocations.
Error Error
@ -88,6 +90,18 @@ func (s *State) Get() string {
return s.Input[s.Pos:]
}
// Preview of the the next x characters
func (s *State) Preview(x int) string {
if s.Pos > len(s.Input) {
return ""
}
if len(s.Input)-s.Pos >= x {
return s.Input[s.Pos : s.Pos+x]
}
return s.Input[s.Pos:]
}
// ErrorHere raises an error at the current position.
func (s *State) ErrorHere(expected string) {
s.Error.pos = s.Pos