Clean up Pointer
This commit is contained in:
parent
68cde88125
commit
8b343d6360
@ -20,7 +20,7 @@ func TestAnd(t *testing.T) {
|
||||
t.Run("matches sequence", func(t *testing.T) {
|
||||
node, p2 := And("hello", WS, "world")(p)
|
||||
require.Equal(t, NewSequence(0, NewToken(0, "hello"), NewToken(6, "world")), node)
|
||||
require.Equal(t, 0, p2.Remaining())
|
||||
require.Equal(t, "", p2.Get())
|
||||
})
|
||||
|
||||
t.Run("returns errors", func(t *testing.T) {
|
||||
|
@ -7,11 +7,7 @@ import (
|
||||
)
|
||||
|
||||
func html(p Pointer) (Node, Pointer) {
|
||||
opentag := Exact("<")
|
||||
closetag := Exact(">")
|
||||
equal := Exact("=")
|
||||
slash := Exact("/")
|
||||
identifier := And(Char(Range("a-z")), CharRun(Range("a-zA-Z0-9")))
|
||||
identifier := And(Range("a-z", 1, 1), Range("a-zA-Z0-9"))
|
||||
text := CharRunUntil("<>")
|
||||
|
||||
var tag Parser
|
||||
@ -19,17 +15,20 @@ func html(p Pointer) (Node, Pointer) {
|
||||
element := Any(text, &tag)
|
||||
elements := Kleene(element)
|
||||
//attr := And(identifier, equal, String())
|
||||
attr := And(identifier, equal, Exact(`"test"`))
|
||||
attr := And(identifier, "=", `"test"`)
|
||||
attrws := And(attr, WS)
|
||||
attrs := Kleene(attrws)
|
||||
tstart := And(opentag, identifier, attrs, closetag)
|
||||
tend := And(opentag, slash, identifier, closetag)
|
||||
tstart := And("<", identifier, attrs, ">")
|
||||
tend := And("</", identifier, ">")
|
||||
tag = And(tstart, elements, tend)
|
||||
|
||||
return element(p)
|
||||
}
|
||||
|
||||
func main() {
|
||||
node, _ := html(Input("<h1>hello world</h1>"))
|
||||
fmt.Printf("%#v\n", node)
|
||||
result, _, err := ParseString(html, "<h1>hello world</h1>")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
fmt.Printf("%#v\n", result)
|
||||
}
|
||||
|
9
nodes.go
9
nodes.go
@ -1,5 +1,7 @@
|
||||
package parsec
|
||||
|
||||
import "fmt"
|
||||
|
||||
type Node interface {
|
||||
Pos() int
|
||||
}
|
||||
@ -16,11 +18,12 @@ func NewToken(pos int, value string) Token {
|
||||
}
|
||||
|
||||
type Error struct {
|
||||
pos int
|
||||
Error string
|
||||
pos int
|
||||
Message string
|
||||
}
|
||||
|
||||
func (e Error) Pos() int { return e.pos }
|
||||
func (e Error) Pos() int { return e.pos }
|
||||
func (e Error) Error() string { return fmt.Sprintf("offset %d: %s", e.pos, e.Message) }
|
||||
|
||||
func NewError(pos int, message string) Error {
|
||||
return Error{pos, message}
|
||||
|
115
parser.go
115
parser.go
@ -2,6 +2,8 @@ package parsec
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type Parser func(Pointer) (Node, Pointer)
|
||||
@ -48,9 +50,20 @@ func ParsifyAll(parsers ...Parserish) []Parser {
|
||||
return ret
|
||||
}
|
||||
|
||||
func ParseString(parser Parserish, input string) (result Node, remaining string, err error) {
|
||||
p := Parsify(parser)
|
||||
result, pointer := p(Pointer{input, 0})
|
||||
|
||||
if err, isErr := result.(Error); isErr {
|
||||
return nil, pointer.Get(), err
|
||||
}
|
||||
|
||||
return result, pointer.Get(), nil
|
||||
}
|
||||
|
||||
func Exact(match string) Parser {
|
||||
return func(p Pointer) (Node, Pointer) {
|
||||
if !p.HasPrefix(match) {
|
||||
if !strings.HasPrefix(p.Get(), match) {
|
||||
return NewError(p.pos, "Expected "+match), p
|
||||
}
|
||||
|
||||
@ -60,63 +73,115 @@ func Exact(match string) Parser {
|
||||
|
||||
func Char(match string) Parser {
|
||||
return func(p Pointer) (Node, Pointer) {
|
||||
r, p2 := p.Accept(match)
|
||||
if r == "" {
|
||||
return NewError(p.pos, "Expected one of "+string(match)), p
|
||||
}
|
||||
r, w := utf8.DecodeRuneInString(p.Get())
|
||||
|
||||
return NewToken(p.pos, string(r)), p2
|
||||
if !strings.ContainsRune(match, r) {
|
||||
return NewError(p.pos, "Expected one of "+string(match)), p
|
||||
|
||||
}
|
||||
return NewToken(p.pos, string(r)), p.Advance(w)
|
||||
}
|
||||
}
|
||||
|
||||
func CharRun(match string) Parser {
|
||||
return func(p Pointer) (Node, Pointer) {
|
||||
s, p2 := p.AcceptRun(match)
|
||||
if s == "" {
|
||||
matched := 0
|
||||
for p.pos+matched < len(p.input) {
|
||||
r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:])
|
||||
if !strings.ContainsRune(match, r) {
|
||||
break
|
||||
}
|
||||
matched += w
|
||||
}
|
||||
|
||||
if matched == 0 {
|
||||
return NewError(p.pos, "Expected some of "+match), p
|
||||
}
|
||||
|
||||
return NewToken(p.pos, s), p2
|
||||
return NewToken(p.pos, p.input[p.pos:p.pos+matched]), p.Advance(matched)
|
||||
}
|
||||
}
|
||||
|
||||
func CharRunUntil(match string) Parser {
|
||||
return func(p Pointer) (Node, Pointer) {
|
||||
s, p2 := p.AcceptUntil(match)
|
||||
if s == "" {
|
||||
matched := 0
|
||||
for p.pos+matched < len(p.input) {
|
||||
r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:])
|
||||
if strings.ContainsRune(match, r) {
|
||||
break
|
||||
}
|
||||
matched += w
|
||||
}
|
||||
|
||||
if matched == 0 {
|
||||
return NewError(p.pos, "Expected some of "+match), p
|
||||
}
|
||||
|
||||
return NewToken(p.pos, s), p2
|
||||
return NewToken(p.pos, p.input[p.pos:p.pos+matched]), p.Advance(matched)
|
||||
}
|
||||
}
|
||||
|
||||
func Range(r string) string {
|
||||
func Range(r string, repetition ...int) Parser {
|
||||
min := int(1)
|
||||
max := int(-1)
|
||||
switch len(repetition) {
|
||||
case 0:
|
||||
case 1:
|
||||
min = repetition[0]
|
||||
case 2:
|
||||
min = repetition[0]
|
||||
max = repetition[1]
|
||||
default:
|
||||
panic(fmt.Errorf("Dont know what %d repetion args mean", len(repetition)))
|
||||
}
|
||||
|
||||
runes := []rune(r)
|
||||
if len(runes)%3 != 0 {
|
||||
panic("ranges should be in the form a-z0-9")
|
||||
}
|
||||
|
||||
match := ""
|
||||
|
||||
var ranges [][]rune
|
||||
for i := 0; i < len(runes); i += 3 {
|
||||
start := runes[i]
|
||||
end := runes[i+2]
|
||||
if start > end {
|
||||
tmp := start
|
||||
start = end
|
||||
end = tmp
|
||||
}
|
||||
for c := start; c <= end; c++ {
|
||||
match += string(c)
|
||||
if start <= end {
|
||||
ranges = append(ranges, []rune{start, end})
|
||||
} else {
|
||||
ranges = append(ranges, []rune{end, start})
|
||||
}
|
||||
}
|
||||
|
||||
return match
|
||||
return func(p Pointer) (Node, Pointer) {
|
||||
matched := 0
|
||||
for p.pos+matched < len(p.input) {
|
||||
if max != -1 && matched >= max {
|
||||
break
|
||||
}
|
||||
|
||||
r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:])
|
||||
|
||||
anyMatched := false
|
||||
for _, rng := range ranges {
|
||||
if r >= rng[0] && r <= rng[1] {
|
||||
anyMatched = true
|
||||
}
|
||||
}
|
||||
if !anyMatched {
|
||||
break
|
||||
}
|
||||
|
||||
matched += w
|
||||
}
|
||||
|
||||
if matched < min {
|
||||
return NewError(p.pos+matched, fmt.Sprintf("Expected at least %d more of %s", min-matched, r)), p
|
||||
}
|
||||
|
||||
return NewToken(p.pos, p.input[p.pos:p.pos+matched]), p.Advance(matched)
|
||||
}
|
||||
}
|
||||
|
||||
func WS(p Pointer) (Node, Pointer) {
|
||||
_, p2 := p.AcceptRun("\t\n\v\f\r \x85\xA0")
|
||||
|
||||
_, p2 := CharRun("\t\n\v\f\r \x85\xA0")(p)
|
||||
return nil, p2
|
||||
}
|
||||
|
@ -19,6 +19,13 @@ func TestParsify(t *testing.T) {
|
||||
require.Equal(t, NewToken(0, "ff"), node)
|
||||
})
|
||||
|
||||
t.Run("parser funcs", func(t *testing.T) {
|
||||
node, _ := Parsify(func(p Pointer) (Node, Pointer) {
|
||||
return NewToken(0, "hello"), p
|
||||
})(p)
|
||||
require.Equal(t, NewToken(0, "hello"), node)
|
||||
})
|
||||
|
||||
t.Run("*parsers", func(t *testing.T) {
|
||||
var parser Parser
|
||||
parserfied := Parsify(&parser)
|
||||
@ -27,6 +34,20 @@ func TestParsify(t *testing.T) {
|
||||
node, _ := parserfied(p)
|
||||
require.Equal(t, NewToken(0, "ff"), node)
|
||||
})
|
||||
|
||||
require.Panics(t, func() {
|
||||
Parsify(1)
|
||||
})
|
||||
}
|
||||
|
||||
func TestParsifyAll(t *testing.T) {
|
||||
parsers := ParsifyAll("ff", "gg")
|
||||
|
||||
result, _ := parsers[0](Pointer{"ffooo", 0})
|
||||
require.Equal(t, NewToken(0, "ff"), result)
|
||||
|
||||
result, _ = parsers[1](Pointer{"ffooo", 0})
|
||||
require.Equal(t, NewError(0, "Expected gg"), result)
|
||||
}
|
||||
|
||||
func TestExact(t *testing.T) {
|
||||
@ -102,6 +123,58 @@ func TestWS(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestRange(t *testing.T) {
|
||||
require.Equal(t, "abcdefg", Range("a-g"))
|
||||
require.Equal(t, "01234abcd", Range("0-4a-d"))
|
||||
t.Run("full match", func(t *testing.T) {
|
||||
node, p := Range("a-z")(Pointer{"foobar", 0})
|
||||
require.Equal(t, NewToken(0, "foobar"), node)
|
||||
require.Equal(t, "", p.Get())
|
||||
})
|
||||
|
||||
t.Run("partial match", func(t *testing.T) {
|
||||
node, p := Range("1-4d-a")(Pointer{"a1b2c3d4efg", 0})
|
||||
require.Equal(t, NewToken(0, "a1b2c3d4"), node)
|
||||
require.Equal(t, "efg", p.Get())
|
||||
})
|
||||
|
||||
t.Run("limited match", func(t *testing.T) {
|
||||
node, p := Range("1-4d-a", 1, 2)(Pointer{"a1b2c3d4efg", 0})
|
||||
require.Equal(t, NewToken(0, "a1"), node)
|
||||
require.Equal(t, "b2c3d4efg", p.Get())
|
||||
})
|
||||
|
||||
t.Run("no match", func(t *testing.T) {
|
||||
node, p := Range("0-9")(Pointer{"ffffff", 0})
|
||||
require.Equal(t, NewError(0, "Expected at least 1 more of 0-9"), node)
|
||||
require.Equal(t, 0, p.pos)
|
||||
})
|
||||
|
||||
t.Run("no match with min", func(t *testing.T) {
|
||||
node, p := Range("0-9", 4)(Pointer{"ffffff", 0})
|
||||
require.Equal(t, NewError(0, "Expected at least 4 more of 0-9"), node)
|
||||
require.Equal(t, 0, p.pos)
|
||||
})
|
||||
|
||||
require.Panics(t, func() {
|
||||
Range("abcd")
|
||||
})
|
||||
|
||||
require.Panics(t, func() {
|
||||
Range("a-b", 1, 2, 3)
|
||||
})
|
||||
}
|
||||
|
||||
func TestParseString(t *testing.T) {
|
||||
t.Run("partial match", func(t *testing.T) {
|
||||
result, remaining, err := ParseString("hello", "hello world")
|
||||
require.Equal(t, NewToken(0, "hello"), result)
|
||||
require.Equal(t, " world", remaining)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("error", func(t *testing.T) {
|
||||
result, remaining, err := ParseString("world", "hello world")
|
||||
require.Equal(t, nil, result)
|
||||
require.Equal(t, "hello world", remaining)
|
||||
require.Error(t, err)
|
||||
require.Equal(t, "offset 0: Expected world", err.Error())
|
||||
})
|
||||
}
|
||||
|
70
pointer.go
70
pointer.go
@ -1,18 +1,5 @@
|
||||
package parsec
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
const (
|
||||
EOF rune = -1
|
||||
)
|
||||
|
||||
func Input(s string) Pointer {
|
||||
return Pointer{s, 0}
|
||||
}
|
||||
|
||||
type Pointer struct {
|
||||
input string
|
||||
pos int
|
||||
@ -23,59 +10,8 @@ func (p Pointer) Advance(i int) Pointer {
|
||||
}
|
||||
|
||||
func (p Pointer) Get() string {
|
||||
if p.pos > len(p.input) {
|
||||
return ""
|
||||
}
|
||||
return p.input[p.pos:]
|
||||
}
|
||||
|
||||
func (p Pointer) Remaining() int {
|
||||
remaining := len(p.input) - p.pos
|
||||
if remaining < 0 {
|
||||
return 0
|
||||
}
|
||||
return remaining
|
||||
}
|
||||
|
||||
func (p Pointer) Next() (rune, Pointer) {
|
||||
if int(p.pos) >= len(p.input) {
|
||||
return EOF, p
|
||||
}
|
||||
r, w := utf8.DecodeRuneInString(p.input[p.pos:])
|
||||
return r, p.Advance(w)
|
||||
}
|
||||
|
||||
func (p Pointer) HasPrefix(s string) bool {
|
||||
return strings.HasPrefix(p.input[p.pos:], s)
|
||||
}
|
||||
|
||||
func (p Pointer) Accept(valid string) (string, Pointer) {
|
||||
c, newP := p.Next()
|
||||
if strings.ContainsRune(valid, c) {
|
||||
return string(c), newP
|
||||
}
|
||||
return "", p
|
||||
}
|
||||
|
||||
func (p Pointer) AcceptRun(valid string) (string, Pointer) {
|
||||
matched := 0
|
||||
for p.pos+matched < len(p.input) {
|
||||
r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:])
|
||||
if !strings.ContainsRune(valid, r) {
|
||||
break
|
||||
}
|
||||
matched += w
|
||||
}
|
||||
|
||||
return p.input[p.pos : p.pos+matched], p.Advance(matched)
|
||||
}
|
||||
|
||||
func (p Pointer) AcceptUntil(invalid string) (string, Pointer) {
|
||||
matched := 0
|
||||
for p.pos+matched < len(p.input) {
|
||||
r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:])
|
||||
if strings.ContainsRune(invalid, r) {
|
||||
break
|
||||
}
|
||||
matched += w
|
||||
}
|
||||
|
||||
return p.input[p.pos : p.pos+matched], p.Advance(matched)
|
||||
}
|
||||
|
@ -19,68 +19,7 @@ func TestPointer(t *testing.T) {
|
||||
t.Run("Get", func(t *testing.T) {
|
||||
require.Equal(t, "fooo", p.Get())
|
||||
require.Equal(t, "ooo", p.Advance(1).Get())
|
||||
})
|
||||
|
||||
t.Run("Remaining", func(t *testing.T) {
|
||||
require.Equal(t, 4, p.Remaining())
|
||||
require.Equal(t, 0, p.Advance(4).Remaining())
|
||||
require.Equal(t, 0, p.Advance(10).Remaining())
|
||||
})
|
||||
|
||||
t.Run("Next takes one character", func(t *testing.T) {
|
||||
s, p2 := p.Next()
|
||||
require.Equal(t, p.Advance(1), p2)
|
||||
require.Equal(t, 'f', s)
|
||||
})
|
||||
|
||||
t.Run("Next handles EOF", func(t *testing.T) {
|
||||
s, p2 := p.Advance(5).Next()
|
||||
require.Equal(t, p.Advance(5), p2)
|
||||
require.Equal(t, EOF, s)
|
||||
})
|
||||
|
||||
t.Run("HasPrefix", func(t *testing.T) {
|
||||
require.True(t, p.HasPrefix("fo"))
|
||||
require.False(t, p.HasPrefix("ooo"))
|
||||
require.True(t, p.Advance(1).HasPrefix("ooo"))
|
||||
require.False(t, p.Advance(1).HasPrefix("oooo"))
|
||||
})
|
||||
|
||||
t.Run("Accept", func(t *testing.T) {
|
||||
s, p2 := p.Accept("abcdef")
|
||||
require.Equal(t, "f", s)
|
||||
require.Equal(t, p.Advance(1), p2)
|
||||
|
||||
s, p2 = p.Accept("ooooo")
|
||||
require.Equal(t, "", s)
|
||||
require.Equal(t, p.Advance(0), p2)
|
||||
|
||||
s, p2 = p.Advance(4).Accept("ooooo")
|
||||
require.Equal(t, "", s)
|
||||
require.Equal(t, p.Advance(4), p2)
|
||||
})
|
||||
|
||||
t.Run("AcceptRun", func(t *testing.T) {
|
||||
s, p2 := p.AcceptRun("f")
|
||||
require.Equal(t, "f", s)
|
||||
require.Equal(t, p.Advance(1), p2)
|
||||
|
||||
s, p3 := p.AcceptRun("fo")
|
||||
require.Equal(t, "fooo", s)
|
||||
require.Equal(t, p.Advance(4), p3)
|
||||
|
||||
s, p4 := p3.AcceptRun("fo")
|
||||
require.Equal(t, "", s)
|
||||
require.Equal(t, p.Advance(4), p4)
|
||||
})
|
||||
|
||||
t.Run("AcceptUntil", func(t *testing.T) {
|
||||
s, p2 := p.AcceptUntil("o")
|
||||
require.Equal(t, "f", s)
|
||||
require.Equal(t, p.Advance(1), p2)
|
||||
|
||||
s, p3 := p2.AcceptRun("o")
|
||||
require.Equal(t, "ooo", s)
|
||||
require.Equal(t, p.Advance(4), p3)
|
||||
require.Equal(t, "", p.Advance(4).Get())
|
||||
require.Equal(t, "", p.Advance(10).Get())
|
||||
})
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user