summaryrefslogtreecommitdiff
path: root/parser.go
diff options
context:
space:
mode:
Diffstat (limited to 'parser.go')
-rw-r--r--parser.go87
1 files changed, 59 insertions, 28 deletions
diff --git a/parser.go b/parser.go
index 5802b9d..7b45004 100644
--- a/parser.go
+++ b/parser.go
@@ -1,18 +1,28 @@
package goparsify
import (
+ "errors"
"fmt"
"strings"
"unicode/utf8"
)
-type Node struct {
+// Result is the output of a parser. Usually only one of its fields will be set and should be though of
+// more as a union type. having it avoids interface{} littered all through the parsing code and makes
+// the it easy to do the two most common operations, getting a token and finding a child.
+type Result struct {
Token string
- Child []Node
+ Child []Result
Result interface{}
}
-type Parser func(*State) Node
+// Parser is the workhorse of parsify. A parser takes a State and returns a result, consuming some
+// of the State in the process.
+// Given state is shared there are a few rules that should be followed:
+// - A parser that errors must set state.Error
+// - A parser that errors must not change state.Pos
+// - A parser that consumed some input should advance state.Pos
+type Parser func(*State) Result
// Parserish types are any type that can be turned into a Parser by Parsify
// These currently include *Parser and string literals.
@@ -30,17 +40,22 @@ type Parser func(*State) Node
// ```
type Parserish interface{}
+// Parsify takes a Parserish and makes a Parser out of it. It should be called by
+// any Parser that accepts a Parser as an argument. It should never be called during
+// instead call it during parser creation so there is no runtime cost.
+//
+// See Parserish for details.
func Parsify(p Parserish) Parser {
switch p := p.(type) {
case nil:
return nil
- case func(*State) Node:
+ case func(*State) Result:
return NewParser("anonymous func", p)
case Parser:
return p
case *Parser:
// Todo: Maybe capture this stack and on nil show it? Is there a good error library to do this?
- return func(ptr *State) Node {
+ return func(ptr *State) Result {
return (*p)(ptr)
}
case string:
@@ -50,6 +65,7 @@ func Parsify(p Parserish) Parser {
}
}
+// ParsifyAll calls Parsify on all parsers
func ParsifyAll(parsers ...Parserish) []Parser {
ret := make([]Parser, len(parsers))
for i, parser := range parsers {
@@ -58,53 +74,61 @@ func ParsifyAll(parsers ...Parserish) []Parser {
return ret
}
+// WS will consume whitespace, it should only be needed when AutoWS is turned off
func WS() Parser {
- return NewParser("AutoWS", func(ps *State) Node {
+ return NewParser("AutoWS", func(ps *State) Result {
ps.WS()
- return Node{}
+ return Result{}
})
}
-func ParseString(parser Parserish, input string) (result interface{}, remaining string, err error) {
+// Run applies some input to a parser and returns the result, failing if the input isnt fully consumed.
+// It is a convenience method for the most common way to invoke a parser.
+func Run(parser Parserish, input string) (result interface{}, err error) {
p := Parsify(parser)
- ps := InputString(input)
+ ps := NewState(input)
ret := p(ps)
ps.AutoWS()
- if ps.Error.Expected != "" {
- return nil, ps.Get(), ps.Error
+ if ps.Error.expected != "" {
+ return ret.Result, ps.Error
+ }
+
+ if ps.Get() != "" {
+ return ret.Result, errors.New("left unparsed: " + ps.Get())
}
- return ret.Result, ps.Get(), nil
+ return ret.Result, nil
}
+// Exact will fully match the exact string supplied, or error. The match will be stored in .Token
func Exact(match string) Parser {
if len(match) == 1 {
matchByte := match[0]
- return NewParser(match, func(ps *State) Node {
+ return NewParser(match, func(ps *State) Result {
ps.AutoWS()
if ps.Pos >= len(ps.Input) || ps.Input[ps.Pos] != matchByte {
ps.ErrorHere(match)
- return Node{}
+ return Result{}
}
ps.Advance(1)
- return Node{Token: match}
+ return Result{Token: match}
})
}
- return NewParser(match, func(ps *State) Node {
+ return NewParser(match, func(ps *State) Result {
ps.AutoWS()
if !strings.HasPrefix(ps.Get(), match) {
ps.ErrorHere(match)
- return Node{}
+ return Result{}
}
ps.Advance(len(match))
- return Node{Token: match}
+ return Result{Token: match}
})
}
@@ -125,9 +149,9 @@ func parseRepetition(defaultMin, defaultMax int, repetition ...int) (min int, ma
}
// parseMatcher turns a string in the format a-f01234A-F into:
-// - a set string of matches string(01234)
+// - an alphabet of matches string(01234)
// - a set of ranges [][]rune{{'a', 'f'}, {'A', 'F'}}
-func parseMatcher(matcher string) (matches string, ranges [][]rune) {
+func parseMatcher(matcher string) (alphabet string, ranges [][]rune) {
runes := []rune(matcher)
for i := 0; i < len(runes); i++ {
@@ -141,29 +165,36 @@ func parseMatcher(matcher string) (matches string, ranges [][]rune) {
ranges = append(ranges, []rune{end, start})
}
} else if i+1 < len(runes) && runes[i] == '\\' {
- matches += string(runes[i+1])
+ alphabet += string(runes[i+1])
} else {
- matches += string(runes[i])
+ alphabet += string(runes[i])
}
}
- return matches, ranges
+ return alphabet, ranges
}
+// Chars is the swiss army knife of character matches. It can match:
+// - ranges: Chars("a-z") will match one or more lowercase letter
+// - alphabets: Chars("abcd") will match one or more of the letters abcd in any order
+// - min and max: Chars("a-z0-9", 4, 6) will match 4-6 lowercase alphanumeric characters
+// the above can be combined in any order
func Chars(matcher string, repetition ...int) Parser {
return NewParser("["+matcher+"]", charsImpl(matcher, false, repetition...))
}
+// NotChars accepts the full range of input from Chars, but it will stop when any
+// character matches.
func NotChars(matcher string, repetition ...int) Parser {
return NewParser("!["+matcher+"]", charsImpl(matcher, true, repetition...))
}
func charsImpl(matcher string, stopOn bool, repetition ...int) Parser {
min, max := parseRepetition(1, -1, repetition...)
- matches, ranges := parseMatcher(matcher)
+ alphabet, ranges := parseMatcher(matcher)
- return func(ps *State) Node {
+ return func(ps *State) Result {
ps.AutoWS()
matched := 0
for ps.Pos+matched < len(ps.Input) {
@@ -173,7 +204,7 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser {
r, w := utf8.DecodeRuneInString(ps.Input[ps.Pos+matched:])
- anyMatched := strings.ContainsRune(matches, r)
+ anyMatched := strings.ContainsRune(alphabet, r)
if !anyMatched {
for _, rng := range ranges {
if r >= rng[0] && r <= rng[1] {
@@ -191,11 +222,11 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser {
if matched < min {
ps.ErrorHere(matcher)
- return Node{}
+ return Result{}
}
result := ps.Input[ps.Pos : ps.Pos+matched]
ps.Advance(matched)
- return Node{Token: result}
+ return Result{Token: result}
}
}