Add parse logging
This commit is contained in:
parent
b64fcfaa61
commit
af542eff9e
@ -45,6 +45,10 @@ func Any(parsers ...Parserish) Parser {
|
||||
for _, parser := range parserfied {
|
||||
node := parser(ps)
|
||||
if ps.Errored() {
|
||||
if ps.Cut > startpos {
|
||||
longestError = ps.Error
|
||||
break
|
||||
}
|
||||
if ps.Error.pos > longestError.pos {
|
||||
longestError = ps.Error
|
||||
}
|
||||
@ -142,13 +146,13 @@ func Bind(parser Parserish, val interface{}) Parser {
|
||||
func Map(parser Parserish, f func(n Result) Result) Parser {
|
||||
p := Parsify(parser)
|
||||
|
||||
return NewParser("Map()", func(ps *State) Result {
|
||||
return func(ps *State) Result {
|
||||
node := p(ps)
|
||||
if ps.Errored() {
|
||||
return node
|
||||
}
|
||||
return f(node)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func flatten(n Result) string {
|
||||
@ -169,7 +173,7 @@ func flatten(n Result) string {
|
||||
|
||||
// Merge all child Tokens together recursively
|
||||
func Merge(parser Parserish) Parser {
|
||||
return NewParser("Merge()", Map(parser, func(n Result) Result {
|
||||
return Map(parser, func(n Result) Result {
|
||||
return Result{Token: flatten(n)}
|
||||
}))
|
||||
})
|
||||
}
|
||||
|
@ -162,6 +162,15 @@ func TestBind(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestCut(t *testing.T) {
|
||||
// does backtracking happen anywhere else?
|
||||
t.Run("test any", func(t *testing.T) {
|
||||
_, ps := runParser("var world", Any(Seq("var", Cut, "hello"), "var world"))
|
||||
require.Equal(t, "offset 4: expected hello", ps.Error.Error())
|
||||
require.Equal(t, 0, ps.Pos)
|
||||
})
|
||||
}
|
||||
|
||||
func TestMerge(t *testing.T) {
|
||||
var bracer Parser
|
||||
bracer = Seq("(", Maybe(&bracer), ")")
|
||||
|
66
debug/frames.go
Normal file
66
debug/frames.go
Normal file
@ -0,0 +1,66 @@
|
||||
package debug
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var varRegex = regexp.MustCompile(`(?:var)?\s*(\w*)\s*:?=`)
|
||||
|
||||
func getPackageName(f runtime.Frame) string {
|
||||
parts := strings.Split(f.Func.Name(), ".")
|
||||
pl := len(parts)
|
||||
|
||||
if parts[pl-2][0] == '(' {
|
||||
return strings.Join(parts[0:pl-2], ".")
|
||||
} else {
|
||||
return strings.Join(parts[0:pl-1], ".")
|
||||
}
|
||||
}
|
||||
|
||||
func getVarName(filename string, lineNo int) string {
|
||||
f, err := os.Open(filename)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(f)
|
||||
for i := 0; i < lineNo; i++ {
|
||||
scanner.Scan()
|
||||
}
|
||||
|
||||
line := scanner.Text()
|
||||
if matches := varRegex.FindStringSubmatch(line); matches != nil {
|
||||
return matches[1]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// GetDefinition returns the name of the variable and location this parser was defined by walking up the stack
|
||||
func GetDefinition() (varName string, location string) {
|
||||
pc := make([]uintptr, 64)
|
||||
n := runtime.Callers(3, pc)
|
||||
frames := runtime.CallersFrames(pc[:n])
|
||||
|
||||
var frame runtime.Frame
|
||||
more := true
|
||||
for more {
|
||||
frame, more = frames.Next()
|
||||
pkg := getPackageName(frame)
|
||||
if pkg == "github.com/vektah/goparsify" || pkg == "github.com/vektah/goparsify/debug" {
|
||||
continue
|
||||
}
|
||||
|
||||
varName := getVarName(frame.File, frame.Line)
|
||||
if varName != "" {
|
||||
return varName, fmt.Sprintf("%s:%d", filepath.Base(frame.File), frame.Line)
|
||||
}
|
||||
}
|
||||
|
||||
return "", ""
|
||||
}
|
24
debug/frames_test.go
Normal file
24
debug/frames_test.go
Normal file
@ -0,0 +1,24 @@
|
||||
package debug
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestRegex(t *testing.T) {
|
||||
tests := map[string]string{
|
||||
"attrs": ` attrs = Map(Some(attr), func(node Result) Result {`,
|
||||
"_value": ` _value = Any(_null, _true, _false, _string, _number, _array, _object)`,
|
||||
"_object": `_object = Map(Seq("{", Cut, _properties, "}"), func(n Result) Result {`,
|
||||
"expr": `var expr = Exact("foo")`,
|
||||
"number": `number := NumberLit()`,
|
||||
}
|
||||
for expected, input := range tests {
|
||||
t.Run(input, func(t *testing.T) {
|
||||
matches := varRegex.FindStringSubmatch(input)
|
||||
require.NotNil(t, matches)
|
||||
require.Equal(t, expected, matches[1])
|
||||
})
|
||||
}
|
||||
}
|
@ -2,6 +2,8 @@
|
||||
|
||||
package goparsify
|
||||
|
||||
import "io"
|
||||
|
||||
// NewParser should be called around the creation of every Parser.
|
||||
// It does nothing normally and should incur no runtime overhead, but when building with -tags debug
|
||||
// it will instrument every parser to collect valuable timing information displayable with DumpDebugStats.
|
||||
@ -11,3 +13,9 @@ func NewParser(description string, p Parser) Parser {
|
||||
|
||||
// DumpDebugStats will print out the curring timings for each parser if built with -tags debug
|
||||
func DumpDebugStats() {}
|
||||
|
||||
// EnableLogging will write logs to the given writer as the next parse happens
|
||||
func EnableLogging(w io.Writer) {}
|
||||
|
||||
// DisableLogging will stop writing logs
|
||||
func DisableLogging() {}
|
||||
|
121
debugon.go
121
debugon.go
@ -3,77 +3,134 @@
|
||||
package goparsify
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"flag"
|
||||
"fmt"
|
||||
"runtime"
|
||||
"io"
|
||||
"os"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/vektah/goparsify/debug"
|
||||
)
|
||||
|
||||
var log io.Writer = nil
|
||||
var parsers []*debugParser
|
||||
var pendingOpenLog = ""
|
||||
var activeParsers []*debugParser
|
||||
var longestLocation = 0
|
||||
|
||||
type debugParser struct {
|
||||
Description string
|
||||
Caller string
|
||||
Match string
|
||||
Var string
|
||||
Location string
|
||||
Next Parser
|
||||
Time time.Duration
|
||||
Calls int
|
||||
}
|
||||
|
||||
func (dp *debugParser) Name() string {
|
||||
if len(activeParsers) > 2 && activeParsers[len(activeParsers)-2].Var == dp.Var {
|
||||
return dp.Match
|
||||
}
|
||||
return dp.Var
|
||||
}
|
||||
|
||||
func (dp *debugParser) logf(ps *State, result *Result, format string, args ...interface{}) string {
|
||||
buf := &bytes.Buffer{}
|
||||
buf.WriteString(fmt.Sprintf("%"+strconv.Itoa(longestLocation)+"s | ", dp.Location))
|
||||
buf.WriteString(fmt.Sprintf("%-15s", ps.Preview(15)))
|
||||
buf.WriteString(" | ")
|
||||
output := ""
|
||||
if ps.Errored() {
|
||||
output = "fail"
|
||||
} else if result != nil {
|
||||
output = result.Token
|
||||
}
|
||||
buf.WriteString(fmt.Sprintf("%-10s | ", output))
|
||||
buf.WriteString(strings.Repeat(" ", len(activeParsers)-1))
|
||||
buf.WriteString(fmt.Sprintf(format, args...))
|
||||
|
||||
buf.WriteRune('\n')
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
func (dp *debugParser) logStart(ps *State) {
|
||||
if log != nil {
|
||||
if pendingOpenLog != "" {
|
||||
fmt.Fprint(log, pendingOpenLog)
|
||||
pendingOpenLog = ""
|
||||
}
|
||||
pendingOpenLog = dp.logf(ps, nil, dp.Name())
|
||||
}
|
||||
}
|
||||
|
||||
func (dp *debugParser) logEnd(ps *State, result *Result) {
|
||||
if log != nil {
|
||||
if pendingOpenLog != "" {
|
||||
fmt.Fprintf(log, dp.logf(ps, result, dp.Name()))
|
||||
pendingOpenLog = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (dp *debugParser) Parse(ps *State) Result {
|
||||
activeParsers = append(activeParsers, dp)
|
||||
start := time.Now()
|
||||
|
||||
dp.logStart(ps)
|
||||
ret := dp.Next(ps)
|
||||
dp.logEnd(ps, &ret)
|
||||
|
||||
dp.Time = dp.Time + time.Since(start)
|
||||
dp.Calls++
|
||||
|
||||
activeParsers = activeParsers[0 : len(activeParsers)-1]
|
||||
return ret
|
||||
}
|
||||
|
||||
func getPackageName(f *runtime.Func) string {
|
||||
parts := strings.Split(f.Name(), ".")
|
||||
pl := len(parts)
|
||||
func init() {
|
||||
logs := flag.Bool("parselogs", false, "print detailed parsing logs")
|
||||
flag.Parse()
|
||||
|
||||
if parts[pl-2][0] == '(' {
|
||||
return strings.Join(parts[0:pl-2], ".")
|
||||
} else {
|
||||
return strings.Join(parts[0:pl-1], ".")
|
||||
if *logs {
|
||||
EnableLogging(os.Stdout)
|
||||
}
|
||||
}
|
||||
|
||||
// NewParser should be called around the creation of every Parser.
|
||||
// It does nothing normally and should incur no runtime overhead, but when building with -tags debug
|
||||
// it will instrument every parser to collect valuable timing information displayable with DumpDebugStats.
|
||||
func NewParser(description string, p Parser) Parser {
|
||||
fpcs := make([]uintptr, 1)
|
||||
caller := ""
|
||||
|
||||
for i := 1; i < 10; i++ {
|
||||
n := runtime.Callers(i, fpcs)
|
||||
|
||||
if n != 0 {
|
||||
frame := runtime.FuncForPC(fpcs[0] - 1)
|
||||
pkg := getPackageName(frame)
|
||||
|
||||
if pkg != "github.com/vektah/goparsify" {
|
||||
file, line := frame.FileLine(fpcs[0] - 1)
|
||||
caller = fmt.Sprintf("%s:%d", file, line)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
// it will instrument every parser to collect valuable timing and debug information.
|
||||
func NewParser(name string, p Parser) Parser {
|
||||
description, location := debug.GetDefinition()
|
||||
|
||||
dp := &debugParser{
|
||||
Description: description,
|
||||
Match: name,
|
||||
Var: description,
|
||||
Location: location,
|
||||
Next: p,
|
||||
Caller: caller,
|
||||
}
|
||||
|
||||
if len(dp.Location) > longestLocation {
|
||||
longestLocation = len(dp.Location)
|
||||
}
|
||||
|
||||
parsers = append(parsers, dp)
|
||||
return dp.Parse
|
||||
}
|
||||
|
||||
// EnableLogging will write logs to the given writer as the next parse happens
|
||||
func EnableLogging(w io.Writer) {
|
||||
log = w
|
||||
}
|
||||
|
||||
// DisableLogging will stop writing logs
|
||||
func DisableLogging() {
|
||||
log = nil
|
||||
}
|
||||
|
||||
// DumpDebugStats will print out the curring timings for each parser if built with -tags debug
|
||||
func DumpDebugStats() {
|
||||
sort.Slice(parsers, func(i, j int) bool {
|
||||
@ -82,6 +139,6 @@ func DumpDebugStats() {
|
||||
|
||||
fmt.Println("Parser stats:")
|
||||
for _, parser := range parsers {
|
||||
fmt.Printf("%20s\t%10s\t%10d\tcalls\t%s\n", parser.Description, parser.Time.String(), parser.Calls, parser.Caller)
|
||||
fmt.Printf("%20s\t%10s\t%10d\tcalls\t%s\n", parser.Name(), parser.Time.String(), parser.Calls, parser.Location)
|
||||
}
|
||||
}
|
||||
|
@ -42,16 +42,16 @@ var (
|
||||
return Result{Result: attr}
|
||||
})
|
||||
|
||||
tstart = Seq("<", identifier, attrs, ">")
|
||||
tend = Seq("</", identifier, ">")
|
||||
tstart = Seq("<", Cut, identifier, attrs, ">")
|
||||
tend = Seq("</", Cut, identifier, ">")
|
||||
)
|
||||
|
||||
func init() {
|
||||
tag = Map(Seq(tstart, elements, tend), func(node Result) Result {
|
||||
openTag := node.Child[0]
|
||||
return Result{Result: Tag{
|
||||
Name: openTag.Child[1].Token,
|
||||
Attributes: openTag.Child[2].Result.(map[string]string),
|
||||
Name: openTag.Child[2].Token,
|
||||
Attributes: openTag.Child[3].Result.(map[string]string),
|
||||
Body: node.Child[1].Result.([]interface{}),
|
||||
}}
|
||||
|
||||
|
12
json/json.go
12
json/json.go
@ -1,6 +1,8 @@
|
||||
package json
|
||||
|
||||
import . "github.com/vektah/goparsify"
|
||||
import (
|
||||
. "github.com/vektah/goparsify"
|
||||
)
|
||||
|
||||
var (
|
||||
_value Parser
|
||||
@ -11,18 +13,18 @@ var (
|
||||
_number = NumberLit()
|
||||
_properties = Some(Seq(StringLit(`"`), ":", &_value), ",")
|
||||
|
||||
_array = Map(Seq("[", Some(&_value, ","), "]"), func(n Result) Result {
|
||||
_array = Map(Seq("[", Cut, Some(&_value, ","), "]"), func(n Result) Result {
|
||||
ret := []interface{}{}
|
||||
for _, child := range n.Child[1].Child {
|
||||
for _, child := range n.Child[2].Child {
|
||||
ret = append(ret, child.Result)
|
||||
}
|
||||
return Result{Result: ret}
|
||||
})
|
||||
|
||||
_object = Map(Seq("{", _properties, "}"), func(n Result) Result {
|
||||
_object = Map(Seq("{", Cut, _properties, "}"), func(n Result) Result {
|
||||
ret := map[string]interface{}{}
|
||||
|
||||
for _, prop := range n.Child[1].Child {
|
||||
for _, prop := range n.Child[2].Child {
|
||||
ret[prop.Child[0].Result.(string)] = prop.Child[2].Result
|
||||
}
|
||||
|
||||
|
@ -52,7 +52,7 @@ type Parserish interface{}
|
||||
func Parsify(p Parserish) Parser {
|
||||
switch p := p.(type) {
|
||||
case func(*State) Result:
|
||||
return NewParser("anonymous func", p)
|
||||
return p
|
||||
case Parser:
|
||||
return p
|
||||
case *Parser:
|
||||
@ -84,6 +84,13 @@ func WS() Parser {
|
||||
})
|
||||
}
|
||||
|
||||
// Cut prevents backtracking beyond this point. Usually used after keywords when you
|
||||
// are sure this is the correct path. Improves performance and error reporting.
|
||||
func Cut(ps *State) Result {
|
||||
ps.Cut = ps.Pos
|
||||
return Result{}
|
||||
}
|
||||
|
||||
// Run applies some input to a parser and returns the result, failing if the input isnt fully consumed.
|
||||
// It is a convenience method for the most common way to invoke a parser.
|
||||
func Run(parser Parserish, input string) (result interface{}, err error) {
|
||||
|
70
readme.md
70
readme.md
@ -3,11 +3,6 @@ goparsify [![CircleCI](https://circleci.com/gh/Vektah/goparsify/tree/master.svg?
|
||||
|
||||
A parser-combinator library for building easy to test, read and maintain parsers using functional composition.
|
||||
|
||||
### todo
|
||||
- fatal errors: Some way for a parser to say "Ive found a good match, the input is broken, stop here with an error"
|
||||
- better errors: currently only the longest error is returned, but it would be nice to show all expected tokens that could follow.
|
||||
|
||||
|
||||
### benchmarks
|
||||
I dont have many benchmarks set up yet, but the json parser is very promising. Nearly keeping up with the stdlib for raw speed:
|
||||
```
|
||||
@ -19,7 +14,70 @@ PASS
|
||||
ok github.com/vektah/goparsify/json 10.840s
|
||||
```
|
||||
|
||||
### debugging mode
|
||||
### debugging parsers
|
||||
|
||||
When a parser isnt working as you intended you can build with debugging and enable logging to get a detailed log of exactly what the parser is doing.
|
||||
|
||||
1. First build with debug using `-tags debug`
|
||||
2. enable logging by passing a runtime flag -parselogs or calling `EnableLogging(os.Stdout)` in your code.
|
||||
|
||||
This works great with tests, eg in the goparsify source tree
|
||||
```
|
||||
$ cd html
|
||||
$ go test -tags debug -parselogs
|
||||
html.go:50 | <body>hello <p | | tag
|
||||
html.go:45 | <body>hello <p | | tstart
|
||||
html.go:45 | body>hello <p c | < | <
|
||||
html.go:20 | >hello <p color | body | identifier
|
||||
html.go:35 | >hello <p color | | attrs
|
||||
html.go:34 | >hello <p color | | attr
|
||||
html.go:20 | >hello <p color | fail | identifier
|
||||
html.go:45 | hello <p color= | > | >
|
||||
html.go:26 | hello <p color= | | elements
|
||||
html.go:25 | hello <p color= | | element
|
||||
html.go:21 | <p color="blue" | hello | text
|
||||
html.go:25 | <p color="blue" | | element
|
||||
html.go:21 | <p color="blue" | fail | text
|
||||
html.go:50 | <p color="blue" | | tag
|
||||
html.go:45 | <p color="blue" | | tstart
|
||||
html.go:45 | p color="blue"> | < | <
|
||||
html.go:20 | color="blue">w | p | identifier
|
||||
html.go:35 | color="blue">w | | attrs
|
||||
html.go:34 | color="blue">w | | attr
|
||||
html.go:20 | ="blue">world</ | color | identifier
|
||||
html.go:34 | "blue">world</p | = | =
|
||||
html.go:34 | >world</p></bod | | string literal
|
||||
html.go:34 | >world</p></bod | | attr
|
||||
html.go:20 | >world</p></bod | fail | identifier
|
||||
html.go:45 | world</p></body | > | >
|
||||
html.go:26 | world</p></body | | elements
|
||||
html.go:25 | world</p></body | | element
|
||||
html.go:21 | </p></body> | world | text
|
||||
html.go:25 | </p></body> | | element
|
||||
html.go:21 | </p></body> | fail | text
|
||||
html.go:50 | </p></body> | | tag
|
||||
html.go:45 | </p></body> | | tstart
|
||||
html.go:45 | /p></body> | < | <
|
||||
html.go:20 | /p></body> | fail | identifier
|
||||
html.go:46 | </p></body> | | tend
|
||||
html.go:46 | p></body> | </ | </
|
||||
html.go:20 | ></body> | p | identifier
|
||||
html.go:46 | </body> | > | >
|
||||
html.go:25 | </body> | | element
|
||||
html.go:21 | </body> | fail | text
|
||||
html.go:50 | </body> | | tag
|
||||
html.go:45 | </body> | | tstart
|
||||
html.go:45 | /body> | < | <
|
||||
html.go:20 | /body> | fail | identifier
|
||||
html.go:46 | </body> | | tend
|
||||
html.go:46 | body> | </ | </
|
||||
html.go:20 | > | body | identifier
|
||||
html.go:46 | | > | >
|
||||
PASS
|
||||
ok github.com/vektah/goparsify/html 0.118s
|
||||
```
|
||||
|
||||
### debugging performance
|
||||
If you build the parser with -tags debug it will instrument each parser and a call to DumpDebugStats() will show stats:
|
||||
```
|
||||
Any() 415.7136ms 87000 calls json.go:35
|
||||
|
14
state.go
14
state.go
@ -25,6 +25,8 @@ type State struct {
|
||||
Input string
|
||||
// An offset into the string, pointing to the current tip
|
||||
Pos int
|
||||
// Do not backtrack past this point
|
||||
Cut int
|
||||
// Error is a secondary return channel from parsers, but used so heavily
|
||||
// in backtracking that it has been inlined to avoid allocations.
|
||||
Error Error
|
||||
@ -88,6 +90,18 @@ func (s *State) Get() string {
|
||||
return s.Input[s.Pos:]
|
||||
}
|
||||
|
||||
// Preview of the the next x characters
|
||||
func (s *State) Preview(x int) string {
|
||||
if s.Pos > len(s.Input) {
|
||||
return ""
|
||||
}
|
||||
if len(s.Input)-s.Pos >= x {
|
||||
return s.Input[s.Pos : s.Pos+x]
|
||||
}
|
||||
|
||||
return s.Input[s.Pos:]
|
||||
}
|
||||
|
||||
// ErrorHere raises an error at the current position.
|
||||
func (s *State) ErrorHere(expected string) {
|
||||
s.Error.pos = s.Pos
|
||||
|
Loading…
Reference in New Issue
Block a user