From 47badae641b9cd8862f327864d2143a57b8e30af Mon Sep 17 00:00:00 2001 From: Adam Scarr Date: Wed, 9 Aug 2017 21:18:14 +1000 Subject: Add godoc --- parser.go | 87 +++++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 59 insertions(+), 28 deletions(-) (limited to 'parser.go') diff --git a/parser.go b/parser.go index 5802b9d..7b45004 100644 --- a/parser.go +++ b/parser.go @@ -1,18 +1,28 @@ package goparsify import ( + "errors" "fmt" "strings" "unicode/utf8" ) -type Node struct { +// Result is the output of a parser. Usually only one of its fields will be set and should be though of +// more as a union type. having it avoids interface{} littered all through the parsing code and makes +// the it easy to do the two most common operations, getting a token and finding a child. +type Result struct { Token string - Child []Node + Child []Result Result interface{} } -type Parser func(*State) Node +// Parser is the workhorse of parsify. A parser takes a State and returns a result, consuming some +// of the State in the process. +// Given state is shared there are a few rules that should be followed: +// - A parser that errors must set state.Error +// - A parser that errors must not change state.Pos +// - A parser that consumed some input should advance state.Pos +type Parser func(*State) Result // Parserish types are any type that can be turned into a Parser by Parsify // These currently include *Parser and string literals. @@ -30,17 +40,22 @@ type Parser func(*State) Node // ``` type Parserish interface{} +// Parsify takes a Parserish and makes a Parser out of it. It should be called by +// any Parser that accepts a Parser as an argument. It should never be called during +// instead call it during parser creation so there is no runtime cost. +// +// See Parserish for details. func Parsify(p Parserish) Parser { switch p := p.(type) { case nil: return nil - case func(*State) Node: + case func(*State) Result: return NewParser("anonymous func", p) case Parser: return p case *Parser: // Todo: Maybe capture this stack and on nil show it? Is there a good error library to do this? - return func(ptr *State) Node { + return func(ptr *State) Result { return (*p)(ptr) } case string: @@ -50,6 +65,7 @@ func Parsify(p Parserish) Parser { } } +// ParsifyAll calls Parsify on all parsers func ParsifyAll(parsers ...Parserish) []Parser { ret := make([]Parser, len(parsers)) for i, parser := range parsers { @@ -58,53 +74,61 @@ func ParsifyAll(parsers ...Parserish) []Parser { return ret } +// WS will consume whitespace, it should only be needed when AutoWS is turned off func WS() Parser { - return NewParser("AutoWS", func(ps *State) Node { + return NewParser("AutoWS", func(ps *State) Result { ps.WS() - return Node{} + return Result{} }) } -func ParseString(parser Parserish, input string) (result interface{}, remaining string, err error) { +// Run applies some input to a parser and returns the result, failing if the input isnt fully consumed. +// It is a convenience method for the most common way to invoke a parser. +func Run(parser Parserish, input string) (result interface{}, err error) { p := Parsify(parser) - ps := InputString(input) + ps := NewState(input) ret := p(ps) ps.AutoWS() - if ps.Error.Expected != "" { - return nil, ps.Get(), ps.Error + if ps.Error.expected != "" { + return ret.Result, ps.Error + } + + if ps.Get() != "" { + return ret.Result, errors.New("left unparsed: " + ps.Get()) } - return ret.Result, ps.Get(), nil + return ret.Result, nil } +// Exact will fully match the exact string supplied, or error. The match will be stored in .Token func Exact(match string) Parser { if len(match) == 1 { matchByte := match[0] - return NewParser(match, func(ps *State) Node { + return NewParser(match, func(ps *State) Result { ps.AutoWS() if ps.Pos >= len(ps.Input) || ps.Input[ps.Pos] != matchByte { ps.ErrorHere(match) - return Node{} + return Result{} } ps.Advance(1) - return Node{Token: match} + return Result{Token: match} }) } - return NewParser(match, func(ps *State) Node { + return NewParser(match, func(ps *State) Result { ps.AutoWS() if !strings.HasPrefix(ps.Get(), match) { ps.ErrorHere(match) - return Node{} + return Result{} } ps.Advance(len(match)) - return Node{Token: match} + return Result{Token: match} }) } @@ -125,9 +149,9 @@ func parseRepetition(defaultMin, defaultMax int, repetition ...int) (min int, ma } // parseMatcher turns a string in the format a-f01234A-F into: -// - a set string of matches string(01234) +// - an alphabet of matches string(01234) // - a set of ranges [][]rune{{'a', 'f'}, {'A', 'F'}} -func parseMatcher(matcher string) (matches string, ranges [][]rune) { +func parseMatcher(matcher string) (alphabet string, ranges [][]rune) { runes := []rune(matcher) for i := 0; i < len(runes); i++ { @@ -141,29 +165,36 @@ func parseMatcher(matcher string) (matches string, ranges [][]rune) { ranges = append(ranges, []rune{end, start}) } } else if i+1 < len(runes) && runes[i] == '\\' { - matches += string(runes[i+1]) + alphabet += string(runes[i+1]) } else { - matches += string(runes[i]) + alphabet += string(runes[i]) } } - return matches, ranges + return alphabet, ranges } +// Chars is the swiss army knife of character matches. It can match: +// - ranges: Chars("a-z") will match one or more lowercase letter +// - alphabets: Chars("abcd") will match one or more of the letters abcd in any order +// - min and max: Chars("a-z0-9", 4, 6) will match 4-6 lowercase alphanumeric characters +// the above can be combined in any order func Chars(matcher string, repetition ...int) Parser { return NewParser("["+matcher+"]", charsImpl(matcher, false, repetition...)) } +// NotChars accepts the full range of input from Chars, but it will stop when any +// character matches. func NotChars(matcher string, repetition ...int) Parser { return NewParser("!["+matcher+"]", charsImpl(matcher, true, repetition...)) } func charsImpl(matcher string, stopOn bool, repetition ...int) Parser { min, max := parseRepetition(1, -1, repetition...) - matches, ranges := parseMatcher(matcher) + alphabet, ranges := parseMatcher(matcher) - return func(ps *State) Node { + return func(ps *State) Result { ps.AutoWS() matched := 0 for ps.Pos+matched < len(ps.Input) { @@ -173,7 +204,7 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser { r, w := utf8.DecodeRuneInString(ps.Input[ps.Pos+matched:]) - anyMatched := strings.ContainsRune(matches, r) + anyMatched := strings.ContainsRune(alphabet, r) if !anyMatched { for _, rng := range ranges { if r >= rng[0] && r <= rng[1] { @@ -191,11 +222,11 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser { if matched < min { ps.ErrorHere(matcher) - return Node{} + return Result{} } result := ps.Input[ps.Pos : ps.Pos+matched] ps.Advance(matched) - return Node{Token: result} + return Result{Token: result} } } -- cgit v1.2.3