188 lines
4.0 KiB
Go
188 lines
4.0 KiB
Go
package parsec
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
type Parser func(Pointer) (Node, Pointer)
|
|
|
|
// Parserish types are any type that can be turned into a Parser by Parsify
|
|
// These currently include *Parser and string literals.
|
|
//
|
|
// This makes recursive grammars cleaner and allows string literals to be used directly in most contexts.
|
|
// eg, matching balanced paren:
|
|
// ```go
|
|
// var group Parser
|
|
// group = And("(", Maybe(&group), ")")
|
|
// ```
|
|
// vs
|
|
// ```go
|
|
// var group ParserPtr{}
|
|
// group.P = And(Exact("("), Maybe(group.Parse), Exact(")"))
|
|
// ```
|
|
type Parserish interface{}
|
|
|
|
func Parsify(p Parserish) Parser {
|
|
switch p := p.(type) {
|
|
case func(Pointer) (Node, Pointer):
|
|
return Parser(p)
|
|
case Parser:
|
|
return p
|
|
case *Parser:
|
|
// Todo: Maybe capture this stack and on nil show it? Is there a good error library to do this?
|
|
return func(ptr Pointer) (Node, Pointer) {
|
|
return (*p)(ptr)
|
|
}
|
|
case string:
|
|
return Exact(p)
|
|
default:
|
|
panic(fmt.Errorf("cant turn a `%T` into a parser", p))
|
|
}
|
|
}
|
|
|
|
func ParsifyAll(parsers ...Parserish) []Parser {
|
|
ret := make([]Parser, len(parsers))
|
|
for i, parser := range parsers {
|
|
ret[i] = Parsify(parser)
|
|
}
|
|
return ret
|
|
}
|
|
|
|
func ParseString(parser Parserish, input string) (result Node, remaining string, err error) {
|
|
p := Parsify(parser)
|
|
result, pointer := p(Pointer{input, 0})
|
|
|
|
if err, isErr := result.(Error); isErr {
|
|
return nil, pointer.Get(), err
|
|
}
|
|
|
|
return result, pointer.Get(), nil
|
|
}
|
|
|
|
func Exact(match string) Parser {
|
|
return func(p Pointer) (Node, Pointer) {
|
|
if !strings.HasPrefix(p.Get(), match) {
|
|
return NewError(p.pos, "Expected "+match), p
|
|
}
|
|
|
|
return NewToken(p.pos, match), p.Advance(len(match))
|
|
}
|
|
}
|
|
|
|
func Char(match string) Parser {
|
|
return func(p Pointer) (Node, Pointer) {
|
|
r, w := utf8.DecodeRuneInString(p.Get())
|
|
|
|
if !strings.ContainsRune(match, r) {
|
|
return NewError(p.pos, "Expected one of "+string(match)), p
|
|
|
|
}
|
|
return NewToken(p.pos, string(r)), p.Advance(w)
|
|
}
|
|
}
|
|
|
|
func CharRun(match string) Parser {
|
|
return func(p Pointer) (Node, Pointer) {
|
|
matched := 0
|
|
for p.pos+matched < len(p.input) {
|
|
r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:])
|
|
if !strings.ContainsRune(match, r) {
|
|
break
|
|
}
|
|
matched += w
|
|
}
|
|
|
|
if matched == 0 {
|
|
return NewError(p.pos, "Expected some of "+match), p
|
|
}
|
|
|
|
return NewToken(p.pos, p.input[p.pos:p.pos+matched]), p.Advance(matched)
|
|
}
|
|
}
|
|
|
|
func CharRunUntil(match string) Parser {
|
|
return func(p Pointer) (Node, Pointer) {
|
|
matched := 0
|
|
for p.pos+matched < len(p.input) {
|
|
r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:])
|
|
if strings.ContainsRune(match, r) {
|
|
break
|
|
}
|
|
matched += w
|
|
}
|
|
|
|
if matched == 0 {
|
|
return NewError(p.pos, "Expected some of "+match), p
|
|
}
|
|
|
|
return NewToken(p.pos, p.input[p.pos:p.pos+matched]), p.Advance(matched)
|
|
}
|
|
}
|
|
|
|
func Range(r string, repetition ...int) Parser {
|
|
min := int(1)
|
|
max := int(-1)
|
|
switch len(repetition) {
|
|
case 0:
|
|
case 1:
|
|
min = repetition[0]
|
|
case 2:
|
|
min = repetition[0]
|
|
max = repetition[1]
|
|
default:
|
|
panic(fmt.Errorf("Dont know what %d repetion args mean", len(repetition)))
|
|
}
|
|
|
|
runes := []rune(r)
|
|
if len(runes)%3 != 0 {
|
|
panic("ranges should be in the form a-z0-9")
|
|
}
|
|
|
|
var ranges [][]rune
|
|
for i := 0; i < len(runes); i += 3 {
|
|
start := runes[i]
|
|
end := runes[i+2]
|
|
if start <= end {
|
|
ranges = append(ranges, []rune{start, end})
|
|
} else {
|
|
ranges = append(ranges, []rune{end, start})
|
|
}
|
|
}
|
|
|
|
return func(p Pointer) (Node, Pointer) {
|
|
matched := 0
|
|
for p.pos+matched < len(p.input) {
|
|
if max != -1 && matched >= max {
|
|
break
|
|
}
|
|
|
|
r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:])
|
|
|
|
anyMatched := false
|
|
for _, rng := range ranges {
|
|
if r >= rng[0] && r <= rng[1] {
|
|
anyMatched = true
|
|
}
|
|
}
|
|
if !anyMatched {
|
|
break
|
|
}
|
|
|
|
matched += w
|
|
}
|
|
|
|
if matched < min {
|
|
return NewError(p.pos+matched, fmt.Sprintf("Expected at least %d more of %s", min-matched, r)), p
|
|
}
|
|
|
|
return NewToken(p.pos, p.input[p.pos:p.pos+matched]), p.Advance(matched)
|
|
}
|
|
}
|
|
|
|
func WS(p Pointer) (Node, Pointer) {
|
|
_, p2 := CharRun("\t\n\v\f\r \x85\xA0")(p)
|
|
return nil, p2
|
|
}
|