Clean up Pointer

This commit is contained in:
Adam Scarr 2017-08-06 15:43:23 +10:00
parent 68cde88125
commit 8b343d6360
7 changed files with 186 additions and 171 deletions

View File

@ -20,7 +20,7 @@ func TestAnd(t *testing.T) {
t.Run("matches sequence", func(t *testing.T) {
node, p2 := And("hello", WS, "world")(p)
require.Equal(t, NewSequence(0, NewToken(0, "hello"), NewToken(6, "world")), node)
require.Equal(t, 0, p2.Remaining())
require.Equal(t, "", p2.Get())
})
t.Run("returns errors", func(t *testing.T) {

View File

@ -7,11 +7,7 @@ import (
)
func html(p Pointer) (Node, Pointer) {
opentag := Exact("<")
closetag := Exact(">")
equal := Exact("=")
slash := Exact("/")
identifier := And(Char(Range("a-z")), CharRun(Range("a-zA-Z0-9")))
identifier := And(Range("a-z", 1, 1), Range("a-zA-Z0-9"))
text := CharRunUntil("<>")
var tag Parser
@ -19,17 +15,20 @@ func html(p Pointer) (Node, Pointer) {
element := Any(text, &tag)
elements := Kleene(element)
//attr := And(identifier, equal, String())
attr := And(identifier, equal, Exact(`"test"`))
attr := And(identifier, "=", `"test"`)
attrws := And(attr, WS)
attrs := Kleene(attrws)
tstart := And(opentag, identifier, attrs, closetag)
tend := And(opentag, slash, identifier, closetag)
tstart := And("<", identifier, attrs, ">")
tend := And("</", identifier, ">")
tag = And(tstart, elements, tend)
return element(p)
}
func main() {
node, _ := html(Input("<h1>hello world</h1>"))
fmt.Printf("%#v\n", node)
result, _, err := ParseString(html, "<h1>hello world</h1>")
if err != nil {
panic(err)
}
fmt.Printf("%#v\n", result)
}

View File

@ -1,5 +1,7 @@
package parsec
import "fmt"
type Node interface {
Pos() int
}
@ -17,10 +19,11 @@ func NewToken(pos int, value string) Token {
type Error struct {
pos int
Error string
Message string
}
func (e Error) Pos() int { return e.pos }
func (e Error) Error() string { return fmt.Sprintf("offset %d: %s", e.pos, e.Message) }
func NewError(pos int, message string) Error {
return Error{pos, message}

115
parser.go
View File

@ -2,6 +2,8 @@ package parsec
import (
"fmt"
"strings"
"unicode/utf8"
)
type Parser func(Pointer) (Node, Pointer)
@ -48,9 +50,20 @@ func ParsifyAll(parsers ...Parserish) []Parser {
return ret
}
func ParseString(parser Parserish, input string) (result Node, remaining string, err error) {
p := Parsify(parser)
result, pointer := p(Pointer{input, 0})
if err, isErr := result.(Error); isErr {
return nil, pointer.Get(), err
}
return result, pointer.Get(), nil
}
func Exact(match string) Parser {
return func(p Pointer) (Node, Pointer) {
if !p.HasPrefix(match) {
if !strings.HasPrefix(p.Get(), match) {
return NewError(p.pos, "Expected "+match), p
}
@ -60,63 +73,115 @@ func Exact(match string) Parser {
func Char(match string) Parser {
return func(p Pointer) (Node, Pointer) {
r, p2 := p.Accept(match)
if r == "" {
return NewError(p.pos, "Expected one of "+string(match)), p
}
r, w := utf8.DecodeRuneInString(p.Get())
return NewToken(p.pos, string(r)), p2
if !strings.ContainsRune(match, r) {
return NewError(p.pos, "Expected one of "+string(match)), p
}
return NewToken(p.pos, string(r)), p.Advance(w)
}
}
func CharRun(match string) Parser {
return func(p Pointer) (Node, Pointer) {
s, p2 := p.AcceptRun(match)
if s == "" {
matched := 0
for p.pos+matched < len(p.input) {
r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:])
if !strings.ContainsRune(match, r) {
break
}
matched += w
}
if matched == 0 {
return NewError(p.pos, "Expected some of "+match), p
}
return NewToken(p.pos, s), p2
return NewToken(p.pos, p.input[p.pos:p.pos+matched]), p.Advance(matched)
}
}
func CharRunUntil(match string) Parser {
return func(p Pointer) (Node, Pointer) {
s, p2 := p.AcceptUntil(match)
if s == "" {
matched := 0
for p.pos+matched < len(p.input) {
r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:])
if strings.ContainsRune(match, r) {
break
}
matched += w
}
if matched == 0 {
return NewError(p.pos, "Expected some of "+match), p
}
return NewToken(p.pos, s), p2
return NewToken(p.pos, p.input[p.pos:p.pos+matched]), p.Advance(matched)
}
}
func Range(r string) string {
func Range(r string, repetition ...int) Parser {
min := int(1)
max := int(-1)
switch len(repetition) {
case 0:
case 1:
min = repetition[0]
case 2:
min = repetition[0]
max = repetition[1]
default:
panic(fmt.Errorf("Dont know what %d repetion args mean", len(repetition)))
}
runes := []rune(r)
if len(runes)%3 != 0 {
panic("ranges should be in the form a-z0-9")
}
match := ""
var ranges [][]rune
for i := 0; i < len(runes); i += 3 {
start := runes[i]
end := runes[i+2]
if start > end {
tmp := start
start = end
end = tmp
}
for c := start; c <= end; c++ {
match += string(c)
if start <= end {
ranges = append(ranges, []rune{start, end})
} else {
ranges = append(ranges, []rune{end, start})
}
}
return match
return func(p Pointer) (Node, Pointer) {
matched := 0
for p.pos+matched < len(p.input) {
if max != -1 && matched >= max {
break
}
r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:])
anyMatched := false
for _, rng := range ranges {
if r >= rng[0] && r <= rng[1] {
anyMatched = true
}
}
if !anyMatched {
break
}
matched += w
}
if matched < min {
return NewError(p.pos+matched, fmt.Sprintf("Expected at least %d more of %s", min-matched, r)), p
}
return NewToken(p.pos, p.input[p.pos:p.pos+matched]), p.Advance(matched)
}
}
func WS(p Pointer) (Node, Pointer) {
_, p2 := p.AcceptRun("\t\n\v\f\r \x85\xA0")
_, p2 := CharRun("\t\n\v\f\r \x85\xA0")(p)
return nil, p2
}

View File

@ -19,6 +19,13 @@ func TestParsify(t *testing.T) {
require.Equal(t, NewToken(0, "ff"), node)
})
t.Run("parser funcs", func(t *testing.T) {
node, _ := Parsify(func(p Pointer) (Node, Pointer) {
return NewToken(0, "hello"), p
})(p)
require.Equal(t, NewToken(0, "hello"), node)
})
t.Run("*parsers", func(t *testing.T) {
var parser Parser
parserfied := Parsify(&parser)
@ -27,6 +34,20 @@ func TestParsify(t *testing.T) {
node, _ := parserfied(p)
require.Equal(t, NewToken(0, "ff"), node)
})
require.Panics(t, func() {
Parsify(1)
})
}
func TestParsifyAll(t *testing.T) {
parsers := ParsifyAll("ff", "gg")
result, _ := parsers[0](Pointer{"ffooo", 0})
require.Equal(t, NewToken(0, "ff"), result)
result, _ = parsers[1](Pointer{"ffooo", 0})
require.Equal(t, NewError(0, "Expected gg"), result)
}
func TestExact(t *testing.T) {
@ -102,6 +123,58 @@ func TestWS(t *testing.T) {
}
func TestRange(t *testing.T) {
require.Equal(t, "abcdefg", Range("a-g"))
require.Equal(t, "01234abcd", Range("0-4a-d"))
t.Run("full match", func(t *testing.T) {
node, p := Range("a-z")(Pointer{"foobar", 0})
require.Equal(t, NewToken(0, "foobar"), node)
require.Equal(t, "", p.Get())
})
t.Run("partial match", func(t *testing.T) {
node, p := Range("1-4d-a")(Pointer{"a1b2c3d4efg", 0})
require.Equal(t, NewToken(0, "a1b2c3d4"), node)
require.Equal(t, "efg", p.Get())
})
t.Run("limited match", func(t *testing.T) {
node, p := Range("1-4d-a", 1, 2)(Pointer{"a1b2c3d4efg", 0})
require.Equal(t, NewToken(0, "a1"), node)
require.Equal(t, "b2c3d4efg", p.Get())
})
t.Run("no match", func(t *testing.T) {
node, p := Range("0-9")(Pointer{"ffffff", 0})
require.Equal(t, NewError(0, "Expected at least 1 more of 0-9"), node)
require.Equal(t, 0, p.pos)
})
t.Run("no match with min", func(t *testing.T) {
node, p := Range("0-9", 4)(Pointer{"ffffff", 0})
require.Equal(t, NewError(0, "Expected at least 4 more of 0-9"), node)
require.Equal(t, 0, p.pos)
})
require.Panics(t, func() {
Range("abcd")
})
require.Panics(t, func() {
Range("a-b", 1, 2, 3)
})
}
func TestParseString(t *testing.T) {
t.Run("partial match", func(t *testing.T) {
result, remaining, err := ParseString("hello", "hello world")
require.Equal(t, NewToken(0, "hello"), result)
require.Equal(t, " world", remaining)
require.NoError(t, err)
})
t.Run("error", func(t *testing.T) {
result, remaining, err := ParseString("world", "hello world")
require.Equal(t, nil, result)
require.Equal(t, "hello world", remaining)
require.Error(t, err)
require.Equal(t, "offset 0: Expected world", err.Error())
})
}

View File

@ -1,18 +1,5 @@
package parsec
import (
"strings"
"unicode/utf8"
)
const (
EOF rune = -1
)
func Input(s string) Pointer {
return Pointer{s, 0}
}
type Pointer struct {
input string
pos int
@ -23,59 +10,8 @@ func (p Pointer) Advance(i int) Pointer {
}
func (p Pointer) Get() string {
if p.pos > len(p.input) {
return ""
}
return p.input[p.pos:]
}
func (p Pointer) Remaining() int {
remaining := len(p.input) - p.pos
if remaining < 0 {
return 0
}
return remaining
}
func (p Pointer) Next() (rune, Pointer) {
if int(p.pos) >= len(p.input) {
return EOF, p
}
r, w := utf8.DecodeRuneInString(p.input[p.pos:])
return r, p.Advance(w)
}
func (p Pointer) HasPrefix(s string) bool {
return strings.HasPrefix(p.input[p.pos:], s)
}
func (p Pointer) Accept(valid string) (string, Pointer) {
c, newP := p.Next()
if strings.ContainsRune(valid, c) {
return string(c), newP
}
return "", p
}
func (p Pointer) AcceptRun(valid string) (string, Pointer) {
matched := 0
for p.pos+matched < len(p.input) {
r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:])
if !strings.ContainsRune(valid, r) {
break
}
matched += w
}
return p.input[p.pos : p.pos+matched], p.Advance(matched)
}
func (p Pointer) AcceptUntil(invalid string) (string, Pointer) {
matched := 0
for p.pos+matched < len(p.input) {
r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:])
if strings.ContainsRune(invalid, r) {
break
}
matched += w
}
return p.input[p.pos : p.pos+matched], p.Advance(matched)
}

View File

@ -19,68 +19,7 @@ func TestPointer(t *testing.T) {
t.Run("Get", func(t *testing.T) {
require.Equal(t, "fooo", p.Get())
require.Equal(t, "ooo", p.Advance(1).Get())
})
t.Run("Remaining", func(t *testing.T) {
require.Equal(t, 4, p.Remaining())
require.Equal(t, 0, p.Advance(4).Remaining())
require.Equal(t, 0, p.Advance(10).Remaining())
})
t.Run("Next takes one character", func(t *testing.T) {
s, p2 := p.Next()
require.Equal(t, p.Advance(1), p2)
require.Equal(t, 'f', s)
})
t.Run("Next handles EOF", func(t *testing.T) {
s, p2 := p.Advance(5).Next()
require.Equal(t, p.Advance(5), p2)
require.Equal(t, EOF, s)
})
t.Run("HasPrefix", func(t *testing.T) {
require.True(t, p.HasPrefix("fo"))
require.False(t, p.HasPrefix("ooo"))
require.True(t, p.Advance(1).HasPrefix("ooo"))
require.False(t, p.Advance(1).HasPrefix("oooo"))
})
t.Run("Accept", func(t *testing.T) {
s, p2 := p.Accept("abcdef")
require.Equal(t, "f", s)
require.Equal(t, p.Advance(1), p2)
s, p2 = p.Accept("ooooo")
require.Equal(t, "", s)
require.Equal(t, p.Advance(0), p2)
s, p2 = p.Advance(4).Accept("ooooo")
require.Equal(t, "", s)
require.Equal(t, p.Advance(4), p2)
})
t.Run("AcceptRun", func(t *testing.T) {
s, p2 := p.AcceptRun("f")
require.Equal(t, "f", s)
require.Equal(t, p.Advance(1), p2)
s, p3 := p.AcceptRun("fo")
require.Equal(t, "fooo", s)
require.Equal(t, p.Advance(4), p3)
s, p4 := p3.AcceptRun("fo")
require.Equal(t, "", s)
require.Equal(t, p.Advance(4), p4)
})
t.Run("AcceptUntil", func(t *testing.T) {
s, p2 := p.AcceptUntil("o")
require.Equal(t, "f", s)
require.Equal(t, p.Advance(1), p2)
s, p3 := p2.AcceptRun("o")
require.Equal(t, "ooo", s)
require.Equal(t, p.Advance(4), p3)
require.Equal(t, "", p.Advance(4).Get())
require.Equal(t, "", p.Advance(10).Get())
})
}