Initial commit

This commit is contained in:
Adam Scarr 2017-08-06 14:31:35 +10:00
commit 68cde88125
10 changed files with 814 additions and 0 deletions

9
.editorconfig Normal file
View File

@ -0,0 +1,9 @@
[*]
end_of_line = lf
insert_final_newline = true
charset = utf-8
trim_trailing_whitespace = true
[*.go]
indent_style = tab
indent_size = 4

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
.idea
*.iml

114
combinator.go Normal file
View File

@ -0,0 +1,114 @@
package parsec
func Nil(p Pointer) (Node, Pointer) {
return nil, p
}
func Never(p Pointer) (Node, Pointer) {
return Error{p.pos, "Never matches"}, p
}
func And(parsers ...Parserish) Parser {
if len(parsers) == 0 {
return Nil
}
ps := ParsifyAll(parsers...)
return func(p Pointer) (Node, Pointer) {
var nodes = make([]Node, 0, len(ps))
var node Node
newP := p
for _, parser := range ps {
node, newP = parser(newP)
if node == nil {
continue
}
if IsError(node) {
return node, p
}
nodes = append(nodes, node)
}
return NewSequence(p.pos, nodes...), newP
}
}
func Any(parsers ...Parserish) Parser {
if len(parsers) == 0 {
return Nil
}
ps := ParsifyAll(parsers...)
return func(p Pointer) (Node, Pointer) {
errors := []Error{}
for _, parser := range ps {
node, newP := parser(p)
if err, isErr := node.(Error); isErr {
errors = append(errors, err)
continue
}
return node, newP
}
longestError := errors[0]
for _, e := range errors[1:] {
if e.pos > longestError.pos {
longestError = e
}
}
return longestError, p
}
}
func Kleene(opScan Parserish, sepScan ...Parserish) Parser {
return manyImpl(0, opScan, Never, sepScan...)
}
func KleeneUntil(opScan Parserish, untilScan Parserish, sepScan ...Parserish) Parser {
return manyImpl(0, opScan, untilScan, sepScan...)
}
func Many(opScan Parserish, sepScan ...Parserish) Parser {
return manyImpl(1, opScan, Never, sepScan...)
}
func ManyUntil(opScan Parserish, untilScan Parserish, sepScan ...Parserish) Parser {
return manyImpl(1, opScan, untilScan, sepScan...)
}
func manyImpl(min int, op Parserish, until Parserish, sep ...Parserish) Parser {
opParser := Parsify(op)
untilParser := Parsify(until)
sepParser := Nil
if len(sep) > 0 {
sepParser = Parsify(sep[0])
}
return func(p Pointer) (Node, Pointer) {
var node Node
nodes := make([]Node, 0)
newP := p
for {
if node, _ := untilParser(newP); !IsError(node) {
if len(nodes) < min {
return NewError(newP.pos, "Unexpected input"), p
}
break
}
if node, newP = opParser(newP); IsError(node) {
if len(nodes) < min {
return node, p
}
break
}
nodes = append(nodes, node)
if node, newP = sepParser(newP); IsError(node) {
break
}
}
return NewSequence(p.pos, nodes...), newP
}
}

215
combinator_test.go Normal file
View File

@ -0,0 +1,215 @@
package parsec
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestNil(t *testing.T) {
p := Pointer{"hello world", 0}
node, p2 := Nil(p)
require.Equal(t, nil, node)
require.Equal(t, p, p2)
}
func TestAnd(t *testing.T) {
p := Pointer{"hello world", 0}
t.Run("matches sequence", func(t *testing.T) {
node, p2 := And("hello", WS, "world")(p)
require.Equal(t, NewSequence(0, NewToken(0, "hello"), NewToken(6, "world")), node)
require.Equal(t, 0, p2.Remaining())
})
t.Run("returns errors", func(t *testing.T) {
e, p3 := And("hello", WS, "there")(p)
require.Equal(t, NewError(6, "Expected there"), e)
require.Equal(t, 0, p3.pos)
})
t.Run("No parsers", func(t *testing.T) {
assertNilParser(t, And())
})
}
func TestAny(t *testing.T) {
p := Pointer{"hello world!", 0}
t.Run("Matches any", func(t *testing.T) {
node, p2 := Any("hello", "world")(p)
require.Equal(t, NewToken(0, "hello"), node)
require.Equal(t, 5, p2.pos)
})
t.Run("Returns longest error", func(t *testing.T) {
err, p2 := Any(
Exact("nope"),
And(Exact("hello"), WS, Exact("world"), Exact(".")),
And(Exact("hello"), WS, Exact("brother")),
)(p)
require.Equal(t, NewError(11, "Expected ."), err)
require.Equal(t, 0, p2.pos)
})
t.Run("Accepts nil matches", func(t *testing.T) {
node, p2 := Any(Exact("ffffff"), WS)(p)
require.Equal(t, nil, node)
require.Equal(t, 0, p2.pos)
})
t.Run("No parsers", func(t *testing.T) {
assertNilParser(t, Any())
})
}
func TestKleene(t *testing.T) {
p := Pointer{"a,b,c,d,e,", 0}
t.Run("Matches sequence with sep", func(t *testing.T) {
node, p2 := Kleene(CharRun("abcdefg"), Exact(","))(p)
require.Equal(t, NewSequence(0,
NewToken(0, "a"),
NewToken(2, "b"),
NewToken(4, "c"),
NewToken(6, "d"),
NewToken(8, "e"),
), node)
require.Equal(t, 10, p2.pos)
})
t.Run("Matches sequence without sep", func(t *testing.T) {
node, p2 := Kleene(Any(CharRun("abcdefg"), Exact(",")))(p)
require.Equal(t, NewSequence(0,
NewToken(0, "a"),
NewToken(1, ","),
NewToken(2, "b"),
NewToken(3, ","),
NewToken(4, "c"),
NewToken(5, ","),
NewToken(6, "d"),
NewToken(7, ","),
NewToken(8, "e"),
NewToken(9, ","),
), node)
require.Equal(t, 10, p2.pos)
})
t.Run("Stops on error", func(t *testing.T) {
node, p2 := Kleene(CharRun("abc"), Exact(","))(p)
require.Equal(t, NewSequence(0,
NewToken(0, "a"),
NewToken(2, "b"),
NewToken(4, "c"),
), node)
require.Equal(t, 6, p2.pos)
require.Equal(t, "d,e,", p2.Get())
})
}
func TestMany(t *testing.T) {
p := Pointer{"a,b,c,d,e,", 0}
t.Run("Matches sequence with sep", func(t *testing.T) {
node, p2 := Many(CharRun("abcdefg"), Exact(","))(p)
require.Equal(t, NewSequence(0,
NewToken(0, "a"),
NewToken(2, "b"),
NewToken(4, "c"),
NewToken(6, "d"),
NewToken(8, "e"),
), node)
require.Equal(t, 10, p2.pos)
})
t.Run("Matches sequence without sep", func(t *testing.T) {
node, p2 := Many(Any(CharRun("abcdefg"), Exact(",")))(p)
require.Equal(t, NewSequence(0,
NewToken(0, "a"),
NewToken(1, ","),
NewToken(2, "b"),
NewToken(3, ","),
NewToken(4, "c"),
NewToken(5, ","),
NewToken(6, "d"),
NewToken(7, ","),
NewToken(8, "e"),
NewToken(9, ","),
), node)
require.Equal(t, 10, p2.pos)
})
t.Run("Stops on error", func(t *testing.T) {
node, p2 := Many(CharRun("abc"), Exact(","))(p)
require.Equal(t, NewSequence(0,
NewToken(0, "a"),
NewToken(2, "b"),
NewToken(4, "c"),
), node)
require.Equal(t, 6, p2.pos)
require.Equal(t, "d,e,", p2.Get())
})
t.Run("Returns error if nothing matches", func(t *testing.T) {
node, p2 := Many(CharRun("def"), Exact(","))(p)
require.Equal(t, NewError(0, "Expected some of def"), node)
require.Equal(t, 0, p2.pos)
require.Equal(t, "a,b,c,d,e,", p2.Get())
})
}
func TestKleeneUntil(t *testing.T) {
p := Pointer{"a,b,c,d,e,fg", 0}
t.Run("Matches sequence with sep", func(t *testing.T) {
node, p2 := KleeneUntil(CharRun("abcde"), CharRun("d"), Exact(","))(p)
require.Equal(t, NewSequence(0,
NewToken(0, "a"),
NewToken(2, "b"),
NewToken(4, "c"),
), node)
require.Equal(t, 6, p2.pos)
})
t.Run("Breaks if separator does not match", func(t *testing.T) {
node, p2 := KleeneUntil(Char("abcdefg"), Char("y"), Exact(","))(p)
require.Equal(t, NewSequence(0,
NewToken(0, "a"),
NewToken(2, "b"),
NewToken(4, "c"),
NewToken(6, "d"),
NewToken(8, "e"),
NewToken(10, "f"),
), node)
require.Equal(t, 11, p2.pos)
})
}
func TestManyUntil(t *testing.T) {
p := Pointer{"a,b,c,d,e,", 0}
t.Run("Matches sequence until", func(t *testing.T) {
node, p2 := ManyUntil(CharRun("abcdefg"), Char("d"), Exact(","))(p)
require.Equal(t, NewSequence(0,
NewToken(0, "a"),
NewToken(2, "b"),
NewToken(4, "c"),
), node)
require.Equal(t, 6, p2.pos)
})
t.Run("Returns error until matches early", func(t *testing.T) {
node, p2 := ManyUntil(CharRun("abc"), Exact("a"), Exact(","))(p)
require.Equal(t, NewError(0, "Unexpected input"), node)
require.Equal(t, 0, p2.pos)
require.Equal(t, "a,b,c,d,e,", p2.Get())
})
}
func assertNilParser(t *testing.T, parser Parser) {
p := Pointer{"fff", 0}
node, p2 := parser(p)
require.Equal(t, nil, node)
require.Equal(t, p, p2)
}

35
examples/html.go Normal file
View File

@ -0,0 +1,35 @@
package main
import (
"fmt"
. "github.com/vektah/goparsify"
)
func html(p Pointer) (Node, Pointer) {
opentag := Exact("<")
closetag := Exact(">")
equal := Exact("=")
slash := Exact("/")
identifier := And(Char(Range("a-z")), CharRun(Range("a-zA-Z0-9")))
text := CharRunUntil("<>")
var tag Parser
element := Any(text, &tag)
elements := Kleene(element)
//attr := And(identifier, equal, String())
attr := And(identifier, equal, Exact(`"test"`))
attrws := And(attr, WS)
attrs := Kleene(attrws)
tstart := And(opentag, identifier, attrs, closetag)
tend := And(opentag, slash, identifier, closetag)
tag = And(tstart, elements, tend)
return element(p)
}
func main() {
node, _ := html(Input("<h1>hello world</h1>"))
fmt.Printf("%#v\n", node)
}

43
nodes.go Normal file
View File

@ -0,0 +1,43 @@
package parsec
type Node interface {
Pos() int
}
type Token struct {
pos int
Value string
}
func (e Token) Pos() int { return e.pos }
func NewToken(pos int, value string) Token {
return Token{pos, value}
}
type Error struct {
pos int
Error string
}
func (e Error) Pos() int { return e.pos }
func NewError(pos int, message string) Error {
return Error{pos, message}
}
func IsError(n Node) bool {
_, isErr := n.(Error)
return isErr
}
type Sequence struct {
pos int
Nodes []Node
}
func (e Sequence) Pos() int { return e.pos }
func NewSequence(pos int, n ...Node) Sequence {
return Sequence{pos, n}
}

122
parser.go Normal file
View File

@ -0,0 +1,122 @@
package parsec
import (
"fmt"
)
type Parser func(Pointer) (Node, Pointer)
// Parserish types are any type that can be turned into a Parser by Parsify
// These currently include *Parser and string literals.
//
// This makes recursive grammars cleaner and allows string literals to be used directly in most contexts.
// eg, matching balanced paren:
// ```go
// var group Parser
// group = And("(", Maybe(&group), ")")
// ```
// vs
// ```go
// var group ParserPtr{}
// group.P = And(Exact("("), Maybe(group.Parse), Exact(")"))
// ```
type Parserish interface{}
func Parsify(p Parserish) Parser {
switch p := p.(type) {
case func(Pointer) (Node, Pointer):
return Parser(p)
case Parser:
return p
case *Parser:
// Todo: Maybe capture this stack and on nil show it? Is there a good error library to do this?
return func(ptr Pointer) (Node, Pointer) {
return (*p)(ptr)
}
case string:
return Exact(p)
default:
panic(fmt.Errorf("cant turn a `%T` into a parser", p))
}
}
func ParsifyAll(parsers ...Parserish) []Parser {
ret := make([]Parser, len(parsers))
for i, parser := range parsers {
ret[i] = Parsify(parser)
}
return ret
}
func Exact(match string) Parser {
return func(p Pointer) (Node, Pointer) {
if !p.HasPrefix(match) {
return NewError(p.pos, "Expected "+match), p
}
return NewToken(p.pos, match), p.Advance(len(match))
}
}
func Char(match string) Parser {
return func(p Pointer) (Node, Pointer) {
r, p2 := p.Accept(match)
if r == "" {
return NewError(p.pos, "Expected one of "+string(match)), p
}
return NewToken(p.pos, string(r)), p2
}
}
func CharRun(match string) Parser {
return func(p Pointer) (Node, Pointer) {
s, p2 := p.AcceptRun(match)
if s == "" {
return NewError(p.pos, "Expected some of "+match), p
}
return NewToken(p.pos, s), p2
}
}
func CharRunUntil(match string) Parser {
return func(p Pointer) (Node, Pointer) {
s, p2 := p.AcceptUntil(match)
if s == "" {
return NewError(p.pos, "Expected some of "+match), p
}
return NewToken(p.pos, s), p2
}
}
func Range(r string) string {
runes := []rune(r)
if len(runes)%3 != 0 {
panic("ranges should be in the form a-z0-9")
}
match := ""
for i := 0; i < len(runes); i += 3 {
start := runes[i]
end := runes[i+2]
if start > end {
tmp := start
start = end
end = tmp
}
for c := start; c <= end; c++ {
match += string(c)
}
}
return match
}
func WS(p Pointer) (Node, Pointer) {
_, p2 := p.AcceptRun("\t\n\v\f\r \x85\xA0")
return nil, p2
}

107
parser_test.go Normal file
View File

@ -0,0 +1,107 @@
package parsec
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestParsify(t *testing.T) {
p := Pointer{"ffooo", 0}
t.Run("strings", func(t *testing.T) {
node, _ := Parsify("ff")(p)
require.Equal(t, NewToken(0, "ff"), node)
})
t.Run("parsers", func(t *testing.T) {
node, _ := Parsify(CharRun("f"))(p)
require.Equal(t, NewToken(0, "ff"), node)
})
t.Run("*parsers", func(t *testing.T) {
var parser Parser
parserfied := Parsify(&parser)
parser = CharRun("f")
node, _ := parserfied(p)
require.Equal(t, NewToken(0, "ff"), node)
})
}
func TestExact(t *testing.T) {
p := Pointer{"fooo", 0}
t.Run("success", func(t *testing.T) {
node, p2 := Exact("fo")(p)
require.Equal(t, NewToken(0, "fo"), node)
require.Equal(t, p.Advance(2), p2)
})
t.Run("error", func(t *testing.T) {
node, p2 := Exact("bar")(p)
require.Equal(t, NewError(0, "Expected bar"), node)
require.Equal(t, 0, p2.pos)
})
}
func TestChar(t *testing.T) {
p := Pointer{"foobar", 0}
t.Run("success", func(t *testing.T) {
node, p2 := Char("fo")(p)
require.Equal(t, NewToken(0, "f"), node)
require.Equal(t, p.Advance(1), p2)
})
t.Run("error", func(t *testing.T) {
node, p2 := Char("bar")(p)
require.Equal(t, NewError(0, "Expected one of bar"), node)
require.Equal(t, 0, p2.pos)
})
}
func TestCharRun(t *testing.T) {
p := Pointer{"foobar", 0}
t.Run("success", func(t *testing.T) {
node, p2 := CharRun("fo")(p)
require.Equal(t, NewToken(0, "foo"), node)
require.Equal(t, p.Advance(3), p2)
})
t.Run("error", func(t *testing.T) {
node, p2 := CharRun("bar")(p)
require.Equal(t, NewError(0, "Expected some of bar"), node)
require.Equal(t, 0, p2.pos)
})
}
func TestCharUntil(t *testing.T) {
p := Pointer{"foobar", 0}
t.Run("success", func(t *testing.T) {
node, p2 := CharRunUntil("z")(p)
require.Equal(t, NewToken(0, "foobar"), node)
require.Equal(t, p.Advance(6), p2)
})
t.Run("error", func(t *testing.T) {
node, p2 := CharRunUntil("f")(p)
require.Equal(t, NewError(0, "Expected some of f"), node)
require.Equal(t, 0, p2.pos)
})
}
func TestWS(t *testing.T) {
p := Pointer{" fooo", 0}
node, p2 := WS(p)
require.Equal(t, nil, node)
require.Equal(t, p.Advance(2), p2)
}
func TestRange(t *testing.T) {
require.Equal(t, "abcdefg", Range("a-g"))
require.Equal(t, "01234abcd", Range("0-4a-d"))
}

81
pointer.go Normal file
View File

@ -0,0 +1,81 @@
package parsec
import (
"strings"
"unicode/utf8"
)
const (
EOF rune = -1
)
func Input(s string) Pointer {
return Pointer{s, 0}
}
type Pointer struct {
input string
pos int
}
func (p Pointer) Advance(i int) Pointer {
return Pointer{p.input, p.pos + i}
}
func (p Pointer) Get() string {
return p.input[p.pos:]
}
func (p Pointer) Remaining() int {
remaining := len(p.input) - p.pos
if remaining < 0 {
return 0
}
return remaining
}
func (p Pointer) Next() (rune, Pointer) {
if int(p.pos) >= len(p.input) {
return EOF, p
}
r, w := utf8.DecodeRuneInString(p.input[p.pos:])
return r, p.Advance(w)
}
func (p Pointer) HasPrefix(s string) bool {
return strings.HasPrefix(p.input[p.pos:], s)
}
func (p Pointer) Accept(valid string) (string, Pointer) {
c, newP := p.Next()
if strings.ContainsRune(valid, c) {
return string(c), newP
}
return "", p
}
func (p Pointer) AcceptRun(valid string) (string, Pointer) {
matched := 0
for p.pos+matched < len(p.input) {
r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:])
if !strings.ContainsRune(valid, r) {
break
}
matched += w
}
return p.input[p.pos : p.pos+matched], p.Advance(matched)
}
func (p Pointer) AcceptUntil(invalid string) (string, Pointer) {
matched := 0
for p.pos+matched < len(p.input) {
r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:])
if strings.ContainsRune(invalid, r) {
break
}
matched += w
}
return p.input[p.pos : p.pos+matched], p.Advance(matched)
}

86
pointer_test.go Normal file
View File

@ -0,0 +1,86 @@
package parsec
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestPointer(t *testing.T) {
p := Pointer{"fooo", 0}
t.Run("Advances", func(t *testing.T) {
p2 := p.Advance(2)
require.Equal(t, Pointer{"fooo", 2}, p2)
require.Equal(t, Pointer{"fooo", 0}, p)
require.Equal(t, Pointer{"fooo", 3}, p2.Advance(1))
})
t.Run("Get", func(t *testing.T) {
require.Equal(t, "fooo", p.Get())
require.Equal(t, "ooo", p.Advance(1).Get())
})
t.Run("Remaining", func(t *testing.T) {
require.Equal(t, 4, p.Remaining())
require.Equal(t, 0, p.Advance(4).Remaining())
require.Equal(t, 0, p.Advance(10).Remaining())
})
t.Run("Next takes one character", func(t *testing.T) {
s, p2 := p.Next()
require.Equal(t, p.Advance(1), p2)
require.Equal(t, 'f', s)
})
t.Run("Next handles EOF", func(t *testing.T) {
s, p2 := p.Advance(5).Next()
require.Equal(t, p.Advance(5), p2)
require.Equal(t, EOF, s)
})
t.Run("HasPrefix", func(t *testing.T) {
require.True(t, p.HasPrefix("fo"))
require.False(t, p.HasPrefix("ooo"))
require.True(t, p.Advance(1).HasPrefix("ooo"))
require.False(t, p.Advance(1).HasPrefix("oooo"))
})
t.Run("Accept", func(t *testing.T) {
s, p2 := p.Accept("abcdef")
require.Equal(t, "f", s)
require.Equal(t, p.Advance(1), p2)
s, p2 = p.Accept("ooooo")
require.Equal(t, "", s)
require.Equal(t, p.Advance(0), p2)
s, p2 = p.Advance(4).Accept("ooooo")
require.Equal(t, "", s)
require.Equal(t, p.Advance(4), p2)
})
t.Run("AcceptRun", func(t *testing.T) {
s, p2 := p.AcceptRun("f")
require.Equal(t, "f", s)
require.Equal(t, p.Advance(1), p2)
s, p3 := p.AcceptRun("fo")
require.Equal(t, "fooo", s)
require.Equal(t, p.Advance(4), p3)
s, p4 := p3.AcceptRun("fo")
require.Equal(t, "", s)
require.Equal(t, p.Advance(4), p4)
})
t.Run("AcceptUntil", func(t *testing.T) {
s, p2 := p.AcceptUntil("o")
require.Equal(t, "f", s)
require.Equal(t, p.Advance(1), p2)
s, p3 := p2.AcceptRun("o")
require.Equal(t, "ooo", s)
require.Equal(t, p.Advance(4), p3)
})
}