diff --git a/combinator.go b/combinator.go index f55977c..7a811bc 100644 --- a/combinator.go +++ b/combinator.go @@ -45,6 +45,10 @@ func Any(parsers ...Parserish) Parser { for _, parser := range parserfied { node := parser(ps) if ps.Errored() { + if ps.Cut > startpos { + longestError = ps.Error + break + } if ps.Error.pos > longestError.pos { longestError = ps.Error } @@ -142,13 +146,13 @@ func Bind(parser Parserish, val interface{}) Parser { func Map(parser Parserish, f func(n Result) Result) Parser { p := Parsify(parser) - return NewParser("Map()", func(ps *State) Result { + return func(ps *State) Result { node := p(ps) if ps.Errored() { return node } return f(node) - }) + } } func flatten(n Result) string { @@ -169,7 +173,7 @@ func flatten(n Result) string { // Merge all child Tokens together recursively func Merge(parser Parserish) Parser { - return NewParser("Merge()", Map(parser, func(n Result) Result { + return Map(parser, func(n Result) Result { return Result{Token: flatten(n)} - })) + }) } diff --git a/combinator_test.go b/combinator_test.go index 283bb2a..5ac227f 100644 --- a/combinator_test.go +++ b/combinator_test.go @@ -162,6 +162,15 @@ func TestBind(t *testing.T) { }) } +func TestCut(t *testing.T) { + // does backtracking happen anywhere else? + t.Run("test any", func(t *testing.T) { + _, ps := runParser("var world", Any(Seq("var", Cut, "hello"), "var world")) + require.Equal(t, "offset 4: expected hello", ps.Error.Error()) + require.Equal(t, 0, ps.Pos) + }) +} + func TestMerge(t *testing.T) { var bracer Parser bracer = Seq("(", Maybe(&bracer), ")") diff --git a/debug/frames.go b/debug/frames.go new file mode 100644 index 0000000..88a48f0 --- /dev/null +++ b/debug/frames.go @@ -0,0 +1,66 @@ +package debug + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "regexp" + "runtime" + "strings" +) + +var varRegex = regexp.MustCompile(`(?:var)?\s*(\w*)\s*:?=`) + +func getPackageName(f runtime.Frame) string { + parts := strings.Split(f.Func.Name(), ".") + pl := len(parts) + + if parts[pl-2][0] == '(' { + return strings.Join(parts[0:pl-2], ".") + } else { + return strings.Join(parts[0:pl-1], ".") + } +} + +func getVarName(filename string, lineNo int) string { + f, err := os.Open(filename) + if err != nil { + return "" + } + + scanner := bufio.NewScanner(f) + for i := 0; i < lineNo; i++ { + scanner.Scan() + } + + line := scanner.Text() + if matches := varRegex.FindStringSubmatch(line); matches != nil { + return matches[1] + } + return "" +} + +// GetDefinition returns the name of the variable and location this parser was defined by walking up the stack +func GetDefinition() (varName string, location string) { + pc := make([]uintptr, 64) + n := runtime.Callers(3, pc) + frames := runtime.CallersFrames(pc[:n]) + + var frame runtime.Frame + more := true + for more { + frame, more = frames.Next() + pkg := getPackageName(frame) + if pkg == "github.com/vektah/goparsify" || pkg == "github.com/vektah/goparsify/debug" { + continue + } + + varName := getVarName(frame.File, frame.Line) + if varName != "" { + return varName, fmt.Sprintf("%s:%d", filepath.Base(frame.File), frame.Line) + } + } + + return "", "" +} diff --git a/debug/frames_test.go b/debug/frames_test.go new file mode 100644 index 0000000..40c1086 --- /dev/null +++ b/debug/frames_test.go @@ -0,0 +1,24 @@ +package debug + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestRegex(t *testing.T) { + tests := map[string]string{ + "attrs": ` attrs = Map(Some(attr), func(node Result) Result {`, + "_value": ` _value = Any(_null, _true, _false, _string, _number, _array, _object)`, + "_object": `_object = Map(Seq("{", Cut, _properties, "}"), func(n Result) Result {`, + "expr": `var expr = Exact("foo")`, + "number": `number := NumberLit()`, + } + for expected, input := range tests { + t.Run(input, func(t *testing.T) { + matches := varRegex.FindStringSubmatch(input) + require.NotNil(t, matches) + require.Equal(t, expected, matches[1]) + }) + } +} diff --git a/debugoff.go b/debugoff.go index ba0ef33..7afcce6 100644 --- a/debugoff.go +++ b/debugoff.go @@ -2,6 +2,8 @@ package goparsify +import "io" + // NewParser should be called around the creation of every Parser. // It does nothing normally and should incur no runtime overhead, but when building with -tags debug // it will instrument every parser to collect valuable timing information displayable with DumpDebugStats. @@ -11,3 +13,9 @@ func NewParser(description string, p Parser) Parser { // DumpDebugStats will print out the curring timings for each parser if built with -tags debug func DumpDebugStats() {} + +// EnableLogging will write logs to the given writer as the next parse happens +func EnableLogging(w io.Writer) {} + +// DisableLogging will stop writing logs +func DisableLogging() {} diff --git a/debugon.go b/debugon.go index cf94f75..4e68db3 100644 --- a/debugon.go +++ b/debugon.go @@ -3,77 +3,134 @@ package goparsify import ( + "bytes" + "flag" "fmt" - "runtime" + "io" + "os" "sort" + "strconv" "strings" "time" + + "github.com/vektah/goparsify/debug" ) +var log io.Writer = nil var parsers []*debugParser +var pendingOpenLog = "" +var activeParsers []*debugParser +var longestLocation = 0 type debugParser struct { - Description string - Caller string - Next Parser - Time time.Duration - Calls int + Match string + Var string + Location string + Next Parser + Time time.Duration + Calls int +} + +func (dp *debugParser) Name() string { + if len(activeParsers) > 2 && activeParsers[len(activeParsers)-2].Var == dp.Var { + return dp.Match + } + return dp.Var +} + +func (dp *debugParser) logf(ps *State, result *Result, format string, args ...interface{}) string { + buf := &bytes.Buffer{} + buf.WriteString(fmt.Sprintf("%"+strconv.Itoa(longestLocation)+"s | ", dp.Location)) + buf.WriteString(fmt.Sprintf("%-15s", ps.Preview(15))) + buf.WriteString(" | ") + output := "" + if ps.Errored() { + output = "fail" + } else if result != nil { + output = result.Token + } + buf.WriteString(fmt.Sprintf("%-10s | ", output)) + buf.WriteString(strings.Repeat(" ", len(activeParsers)-1)) + buf.WriteString(fmt.Sprintf(format, args...)) + + buf.WriteRune('\n') + return buf.String() +} + +func (dp *debugParser) logStart(ps *State) { + if log != nil { + if pendingOpenLog != "" { + fmt.Fprint(log, pendingOpenLog) + pendingOpenLog = "" + } + pendingOpenLog = dp.logf(ps, nil, dp.Name()) + } +} + +func (dp *debugParser) logEnd(ps *State, result *Result) { + if log != nil { + if pendingOpenLog != "" { + fmt.Fprintf(log, dp.logf(ps, result, dp.Name())) + pendingOpenLog = "" + } + } } func (dp *debugParser) Parse(ps *State) Result { + activeParsers = append(activeParsers, dp) start := time.Now() + dp.logStart(ps) ret := dp.Next(ps) + dp.logEnd(ps, &ret) dp.Time = dp.Time + time.Since(start) dp.Calls++ + activeParsers = activeParsers[0 : len(activeParsers)-1] return ret } -func getPackageName(f *runtime.Func) string { - parts := strings.Split(f.Name(), ".") - pl := len(parts) +func init() { + logs := flag.Bool("parselogs", false, "print detailed parsing logs") + flag.Parse() - if parts[pl-2][0] == '(' { - return strings.Join(parts[0:pl-2], ".") - } else { - return strings.Join(parts[0:pl-1], ".") + if *logs { + EnableLogging(os.Stdout) } } // NewParser should be called around the creation of every Parser. // It does nothing normally and should incur no runtime overhead, but when building with -tags debug -// it will instrument every parser to collect valuable timing information displayable with DumpDebugStats. -func NewParser(description string, p Parser) Parser { - fpcs := make([]uintptr, 1) - caller := "" - - for i := 1; i < 10; i++ { - n := runtime.Callers(i, fpcs) - - if n != 0 { - frame := runtime.FuncForPC(fpcs[0] - 1) - pkg := getPackageName(frame) - - if pkg != "github.com/vektah/goparsify" { - file, line := frame.FileLine(fpcs[0] - 1) - caller = fmt.Sprintf("%s:%d", file, line) - break - } - } - } +// it will instrument every parser to collect valuable timing and debug information. +func NewParser(name string, p Parser) Parser { + description, location := debug.GetDefinition() dp := &debugParser{ - Description: description, - Next: p, - Caller: caller, + Match: name, + Var: description, + Location: location, + Next: p, + } + + if len(dp.Location) > longestLocation { + longestLocation = len(dp.Location) } parsers = append(parsers, dp) return dp.Parse } +// EnableLogging will write logs to the given writer as the next parse happens +func EnableLogging(w io.Writer) { + log = w +} + +// DisableLogging will stop writing logs +func DisableLogging() { + log = nil +} + // DumpDebugStats will print out the curring timings for each parser if built with -tags debug func DumpDebugStats() { sort.Slice(parsers, func(i, j int) bool { @@ -82,6 +139,6 @@ func DumpDebugStats() { fmt.Println("Parser stats:") for _, parser := range parsers { - fmt.Printf("%20s\t%10s\t%10d\tcalls\t%s\n", parser.Description, parser.Time.String(), parser.Calls, parser.Caller) + fmt.Printf("%20s\t%10s\t%10d\tcalls\t%s\n", parser.Name(), parser.Time.String(), parser.Calls, parser.Location) } } diff --git a/html/html.go b/html/html.go index 2bbe921..0def587 100644 --- a/html/html.go +++ b/html/html.go @@ -42,16 +42,16 @@ var ( return Result{Result: attr} }) - tstart = Seq("<", identifier, attrs, ">") - tend = Seq("") + tstart = Seq("<", Cut, identifier, attrs, ">") + tend = Seq("") ) func init() { tag = Map(Seq(tstart, elements, tend), func(node Result) Result { openTag := node.Child[0] return Result{Result: Tag{ - Name: openTag.Child[1].Token, - Attributes: openTag.Child[2].Result.(map[string]string), + Name: openTag.Child[2].Token, + Attributes: openTag.Child[3].Result.(map[string]string), Body: node.Child[1].Result.([]interface{}), }} diff --git a/json/json.go b/json/json.go index 2748108..0d78cf5 100644 --- a/json/json.go +++ b/json/json.go @@ -1,6 +1,8 @@ package json -import . "github.com/vektah/goparsify" +import ( + . "github.com/vektah/goparsify" +) var ( _value Parser @@ -11,18 +13,18 @@ var ( _number = NumberLit() _properties = Some(Seq(StringLit(`"`), ":", &_value), ",") - _array = Map(Seq("[", Some(&_value, ","), "]"), func(n Result) Result { + _array = Map(Seq("[", Cut, Some(&_value, ","), "]"), func(n Result) Result { ret := []interface{}{} - for _, child := range n.Child[1].Child { + for _, child := range n.Child[2].Child { ret = append(ret, child.Result) } return Result{Result: ret} }) - _object = Map(Seq("{", _properties, "}"), func(n Result) Result { + _object = Map(Seq("{", Cut, _properties, "}"), func(n Result) Result { ret := map[string]interface{}{} - for _, prop := range n.Child[1].Child { + for _, prop := range n.Child[2].Child { ret[prop.Child[0].Result.(string)] = prop.Child[2].Result } diff --git a/parser.go b/parser.go index 7590ffa..7c3f866 100644 --- a/parser.go +++ b/parser.go @@ -52,7 +52,7 @@ type Parserish interface{} func Parsify(p Parserish) Parser { switch p := p.(type) { case func(*State) Result: - return NewParser("anonymous func", p) + return p case Parser: return p case *Parser: @@ -84,6 +84,13 @@ func WS() Parser { }) } +// Cut prevents backtracking beyond this point. Usually used after keywords when you +// are sure this is the correct path. Improves performance and error reporting. +func Cut(ps *State) Result { + ps.Cut = ps.Pos + return Result{} +} + // Run applies some input to a parser and returns the result, failing if the input isnt fully consumed. // It is a convenience method for the most common way to invoke a parser. func Run(parser Parserish, input string) (result interface{}, err error) { diff --git a/readme.md b/readme.md index 967e5c5..9e712b0 100644 --- a/readme.md +++ b/readme.md @@ -1,12 +1,7 @@ goparsify [![CircleCI](https://circleci.com/gh/Vektah/goparsify/tree/master.svg?style=shield)](https://circleci.com/gh/Vektah/goparsify/tree/master) [![godoc](http://b.repl.ca/v1/godoc-reference-blue.png)](https://godoc.org/github.com/Vektah/goparsify) ========= -A parser-combinator library for building easy to test, read and maintain parsers using functional composition. - -### todo - - fatal errors: Some way for a parser to say "Ive found a good match, the input is broken, stop here with an error" - - better errors: currently only the longest error is returned, but it would be nice to show all expected tokens that could follow. - +A parser-combinator library for building easy to test, read and maintain parsers using functional composition. ### benchmarks I dont have many benchmarks set up yet, but the json parser is very promising. Nearly keeping up with the stdlib for raw speed: @@ -19,7 +14,70 @@ PASS ok github.com/vektah/goparsify/json 10.840s ``` -### debugging mode +### debugging parsers + +When a parser isnt working as you intended you can build with debugging and enable logging to get a detailed log of exactly what the parser is doing. + +1. First build with debug using `-tags debug` +2. enable logging by passing a runtime flag -parselogs or calling `EnableLogging(os.Stdout)` in your code. + +This works great with tests, eg in the goparsify source tree +``` +$ cd html +$ go test -tags debug -parselogs +html.go:50 | hello

hello

hello

hello

hello

hello

hello

| > +html.go:26 | hello

| < | < +html.go:20 | color="blue">w | p | identifier +html.go:35 | color="blue">w | | attrs +html.go:34 | color="blue">w | | attr +html.go:20 | ="blue">worldworld

world

world

world

| > +html.go:26 | world

| world | text +html.go:25 |

| | element +html.go:21 |

| fail | text +html.go:50 |

| | tag +html.go:45 |

| | tstart +html.go:45 | /p> | < | < +html.go:20 | /p> | fail | identifier +html.go:46 |

| | tend +html.go:46 | p> | | p | identifier +html.go:46 | | > | > +html.go:25 | | | element +html.go:21 | | fail | text +html.go:50 | | | tag +html.go:45 | | | tstart +html.go:45 | /body> | < | < +html.go:20 | /body> | fail | identifier +html.go:46 | | | tend +html.go:46 | body> | | body | identifier +html.go:46 | | > | > +PASS +ok github.com/vektah/goparsify/html 0.118s +``` + +### debugging performance If you build the parser with -tags debug it will instrument each parser and a call to DumpDebugStats() will show stats: ``` Any() 415.7136ms 87000 calls json.go:35 diff --git a/state.go b/state.go index 3c7de43..384eb32 100644 --- a/state.go +++ b/state.go @@ -25,6 +25,8 @@ type State struct { Input string // An offset into the string, pointing to the current tip Pos int + // Do not backtrack past this point + Cut int // Error is a secondary return channel from parsers, but used so heavily // in backtracking that it has been inlined to avoid allocations. Error Error @@ -88,6 +90,18 @@ func (s *State) Get() string { return s.Input[s.Pos:] } +// Preview of the the next x characters +func (s *State) Preview(x int) string { + if s.Pos > len(s.Input) { + return "" + } + if len(s.Input)-s.Pos >= x { + return s.Input[s.Pos : s.Pos+x] + } + + return s.Input[s.Pos:] +} + // ErrorHere raises an error at the current position. func (s *State) ErrorHere(expected string) { s.Error.pos = s.Pos