diff --git a/combinator_test.go b/combinator_test.go
index 8c0bab0..2bec734 100644
--- a/combinator_test.go
+++ b/combinator_test.go
@@ -20,7 +20,7 @@ func TestAnd(t *testing.T) {
t.Run("matches sequence", func(t *testing.T) {
node, p2 := And("hello", WS, "world")(p)
require.Equal(t, NewSequence(0, NewToken(0, "hello"), NewToken(6, "world")), node)
- require.Equal(t, 0, p2.Remaining())
+ require.Equal(t, "", p2.Get())
})
t.Run("returns errors", func(t *testing.T) {
diff --git a/examples/html.go b/examples/html.go
index 88a3913..d1a290a 100644
--- a/examples/html.go
+++ b/examples/html.go
@@ -7,11 +7,7 @@ import (
)
func html(p Pointer) (Node, Pointer) {
- opentag := Exact("<")
- closetag := Exact(">")
- equal := Exact("=")
- slash := Exact("/")
- identifier := And(Char(Range("a-z")), CharRun(Range("a-zA-Z0-9")))
+ identifier := And(Range("a-z", 1, 1), Range("a-zA-Z0-9"))
text := CharRunUntil("<>")
var tag Parser
@@ -19,17 +15,20 @@ func html(p Pointer) (Node, Pointer) {
element := Any(text, &tag)
elements := Kleene(element)
//attr := And(identifier, equal, String())
- attr := And(identifier, equal, Exact(`"test"`))
+ attr := And(identifier, "=", `"test"`)
attrws := And(attr, WS)
attrs := Kleene(attrws)
- tstart := And(opentag, identifier, attrs, closetag)
- tend := And(opentag, slash, identifier, closetag)
+ tstart := And("<", identifier, attrs, ">")
+ tend := And("", identifier, ">")
tag = And(tstart, elements, tend)
return element(p)
}
func main() {
- node, _ := html(Input("
hello world
"))
- fmt.Printf("%#v\n", node)
+ result, _, err := ParseString(html, "hello world
")
+ if err != nil {
+ panic(err)
+ }
+ fmt.Printf("%#v\n", result)
}
diff --git a/nodes.go b/nodes.go
index c51db6f..410fa7e 100644
--- a/nodes.go
+++ b/nodes.go
@@ -1,5 +1,7 @@
package parsec
+import "fmt"
+
type Node interface {
Pos() int
}
@@ -16,11 +18,12 @@ func NewToken(pos int, value string) Token {
}
type Error struct {
- pos int
- Error string
+ pos int
+ Message string
}
-func (e Error) Pos() int { return e.pos }
+func (e Error) Pos() int { return e.pos }
+func (e Error) Error() string { return fmt.Sprintf("offset %d: %s", e.pos, e.Message) }
func NewError(pos int, message string) Error {
return Error{pos, message}
diff --git a/parser.go b/parser.go
index 8066526..82b5d56 100644
--- a/parser.go
+++ b/parser.go
@@ -2,6 +2,8 @@ package parsec
import (
"fmt"
+ "strings"
+ "unicode/utf8"
)
type Parser func(Pointer) (Node, Pointer)
@@ -48,9 +50,20 @@ func ParsifyAll(parsers ...Parserish) []Parser {
return ret
}
+func ParseString(parser Parserish, input string) (result Node, remaining string, err error) {
+ p := Parsify(parser)
+ result, pointer := p(Pointer{input, 0})
+
+ if err, isErr := result.(Error); isErr {
+ return nil, pointer.Get(), err
+ }
+
+ return result, pointer.Get(), nil
+}
+
func Exact(match string) Parser {
return func(p Pointer) (Node, Pointer) {
- if !p.HasPrefix(match) {
+ if !strings.HasPrefix(p.Get(), match) {
return NewError(p.pos, "Expected "+match), p
}
@@ -60,63 +73,115 @@ func Exact(match string) Parser {
func Char(match string) Parser {
return func(p Pointer) (Node, Pointer) {
- r, p2 := p.Accept(match)
- if r == "" {
- return NewError(p.pos, "Expected one of "+string(match)), p
- }
+ r, w := utf8.DecodeRuneInString(p.Get())
- return NewToken(p.pos, string(r)), p2
+ if !strings.ContainsRune(match, r) {
+ return NewError(p.pos, "Expected one of "+string(match)), p
+
+ }
+ return NewToken(p.pos, string(r)), p.Advance(w)
}
}
func CharRun(match string) Parser {
return func(p Pointer) (Node, Pointer) {
- s, p2 := p.AcceptRun(match)
- if s == "" {
+ matched := 0
+ for p.pos+matched < len(p.input) {
+ r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:])
+ if !strings.ContainsRune(match, r) {
+ break
+ }
+ matched += w
+ }
+
+ if matched == 0 {
return NewError(p.pos, "Expected some of "+match), p
}
- return NewToken(p.pos, s), p2
+ return NewToken(p.pos, p.input[p.pos:p.pos+matched]), p.Advance(matched)
}
}
func CharRunUntil(match string) Parser {
return func(p Pointer) (Node, Pointer) {
- s, p2 := p.AcceptUntil(match)
- if s == "" {
+ matched := 0
+ for p.pos+matched < len(p.input) {
+ r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:])
+ if strings.ContainsRune(match, r) {
+ break
+ }
+ matched += w
+ }
+
+ if matched == 0 {
return NewError(p.pos, "Expected some of "+match), p
}
- return NewToken(p.pos, s), p2
+ return NewToken(p.pos, p.input[p.pos:p.pos+matched]), p.Advance(matched)
}
}
-func Range(r string) string {
+func Range(r string, repetition ...int) Parser {
+ min := int(1)
+ max := int(-1)
+ switch len(repetition) {
+ case 0:
+ case 1:
+ min = repetition[0]
+ case 2:
+ min = repetition[0]
+ max = repetition[1]
+ default:
+ panic(fmt.Errorf("Dont know what %d repetion args mean", len(repetition)))
+ }
+
runes := []rune(r)
if len(runes)%3 != 0 {
panic("ranges should be in the form a-z0-9")
}
- match := ""
-
+ var ranges [][]rune
for i := 0; i < len(runes); i += 3 {
start := runes[i]
end := runes[i+2]
- if start > end {
- tmp := start
- start = end
- end = tmp
- }
- for c := start; c <= end; c++ {
- match += string(c)
+ if start <= end {
+ ranges = append(ranges, []rune{start, end})
+ } else {
+ ranges = append(ranges, []rune{end, start})
}
}
- return match
+ return func(p Pointer) (Node, Pointer) {
+ matched := 0
+ for p.pos+matched < len(p.input) {
+ if max != -1 && matched >= max {
+ break
+ }
+
+ r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:])
+
+ anyMatched := false
+ for _, rng := range ranges {
+ if r >= rng[0] && r <= rng[1] {
+ anyMatched = true
+ }
+ }
+ if !anyMatched {
+ break
+ }
+
+ matched += w
+ }
+
+ if matched < min {
+ return NewError(p.pos+matched, fmt.Sprintf("Expected at least %d more of %s", min-matched, r)), p
+ }
+
+ return NewToken(p.pos, p.input[p.pos:p.pos+matched]), p.Advance(matched)
+ }
}
func WS(p Pointer) (Node, Pointer) {
- _, p2 := p.AcceptRun("\t\n\v\f\r \x85\xA0")
-
+ _, p2 := CharRun("\t\n\v\f\r \x85\xA0")(p)
return nil, p2
}
diff --git a/parser_test.go b/parser_test.go
index 7c22a76..83a95f6 100644
--- a/parser_test.go
+++ b/parser_test.go
@@ -19,6 +19,13 @@ func TestParsify(t *testing.T) {
require.Equal(t, NewToken(0, "ff"), node)
})
+ t.Run("parser funcs", func(t *testing.T) {
+ node, _ := Parsify(func(p Pointer) (Node, Pointer) {
+ return NewToken(0, "hello"), p
+ })(p)
+ require.Equal(t, NewToken(0, "hello"), node)
+ })
+
t.Run("*parsers", func(t *testing.T) {
var parser Parser
parserfied := Parsify(&parser)
@@ -27,6 +34,20 @@ func TestParsify(t *testing.T) {
node, _ := parserfied(p)
require.Equal(t, NewToken(0, "ff"), node)
})
+
+ require.Panics(t, func() {
+ Parsify(1)
+ })
+}
+
+func TestParsifyAll(t *testing.T) {
+ parsers := ParsifyAll("ff", "gg")
+
+ result, _ := parsers[0](Pointer{"ffooo", 0})
+ require.Equal(t, NewToken(0, "ff"), result)
+
+ result, _ = parsers[1](Pointer{"ffooo", 0})
+ require.Equal(t, NewError(0, "Expected gg"), result)
}
func TestExact(t *testing.T) {
@@ -102,6 +123,58 @@ func TestWS(t *testing.T) {
}
func TestRange(t *testing.T) {
- require.Equal(t, "abcdefg", Range("a-g"))
- require.Equal(t, "01234abcd", Range("0-4a-d"))
+ t.Run("full match", func(t *testing.T) {
+ node, p := Range("a-z")(Pointer{"foobar", 0})
+ require.Equal(t, NewToken(0, "foobar"), node)
+ require.Equal(t, "", p.Get())
+ })
+
+ t.Run("partial match", func(t *testing.T) {
+ node, p := Range("1-4d-a")(Pointer{"a1b2c3d4efg", 0})
+ require.Equal(t, NewToken(0, "a1b2c3d4"), node)
+ require.Equal(t, "efg", p.Get())
+ })
+
+ t.Run("limited match", func(t *testing.T) {
+ node, p := Range("1-4d-a", 1, 2)(Pointer{"a1b2c3d4efg", 0})
+ require.Equal(t, NewToken(0, "a1"), node)
+ require.Equal(t, "b2c3d4efg", p.Get())
+ })
+
+ t.Run("no match", func(t *testing.T) {
+ node, p := Range("0-9")(Pointer{"ffffff", 0})
+ require.Equal(t, NewError(0, "Expected at least 1 more of 0-9"), node)
+ require.Equal(t, 0, p.pos)
+ })
+
+ t.Run("no match with min", func(t *testing.T) {
+ node, p := Range("0-9", 4)(Pointer{"ffffff", 0})
+ require.Equal(t, NewError(0, "Expected at least 4 more of 0-9"), node)
+ require.Equal(t, 0, p.pos)
+ })
+
+ require.Panics(t, func() {
+ Range("abcd")
+ })
+
+ require.Panics(t, func() {
+ Range("a-b", 1, 2, 3)
+ })
+}
+
+func TestParseString(t *testing.T) {
+ t.Run("partial match", func(t *testing.T) {
+ result, remaining, err := ParseString("hello", "hello world")
+ require.Equal(t, NewToken(0, "hello"), result)
+ require.Equal(t, " world", remaining)
+ require.NoError(t, err)
+ })
+
+ t.Run("error", func(t *testing.T) {
+ result, remaining, err := ParseString("world", "hello world")
+ require.Equal(t, nil, result)
+ require.Equal(t, "hello world", remaining)
+ require.Error(t, err)
+ require.Equal(t, "offset 0: Expected world", err.Error())
+ })
}
diff --git a/pointer.go b/pointer.go
index 92b2bcb..7727833 100644
--- a/pointer.go
+++ b/pointer.go
@@ -1,18 +1,5 @@
package parsec
-import (
- "strings"
- "unicode/utf8"
-)
-
-const (
- EOF rune = -1
-)
-
-func Input(s string) Pointer {
- return Pointer{s, 0}
-}
-
type Pointer struct {
input string
pos int
@@ -23,59 +10,8 @@ func (p Pointer) Advance(i int) Pointer {
}
func (p Pointer) Get() string {
+ if p.pos > len(p.input) {
+ return ""
+ }
return p.input[p.pos:]
}
-
-func (p Pointer) Remaining() int {
- remaining := len(p.input) - p.pos
- if remaining < 0 {
- return 0
- }
- return remaining
-}
-
-func (p Pointer) Next() (rune, Pointer) {
- if int(p.pos) >= len(p.input) {
- return EOF, p
- }
- r, w := utf8.DecodeRuneInString(p.input[p.pos:])
- return r, p.Advance(w)
-}
-
-func (p Pointer) HasPrefix(s string) bool {
- return strings.HasPrefix(p.input[p.pos:], s)
-}
-
-func (p Pointer) Accept(valid string) (string, Pointer) {
- c, newP := p.Next()
- if strings.ContainsRune(valid, c) {
- return string(c), newP
- }
- return "", p
-}
-
-func (p Pointer) AcceptRun(valid string) (string, Pointer) {
- matched := 0
- for p.pos+matched < len(p.input) {
- r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:])
- if !strings.ContainsRune(valid, r) {
- break
- }
- matched += w
- }
-
- return p.input[p.pos : p.pos+matched], p.Advance(matched)
-}
-
-func (p Pointer) AcceptUntil(invalid string) (string, Pointer) {
- matched := 0
- for p.pos+matched < len(p.input) {
- r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:])
- if strings.ContainsRune(invalid, r) {
- break
- }
- matched += w
- }
-
- return p.input[p.pos : p.pos+matched], p.Advance(matched)
-}
diff --git a/pointer_test.go b/pointer_test.go
index e8076d7..6b432f0 100644
--- a/pointer_test.go
+++ b/pointer_test.go
@@ -19,68 +19,7 @@ func TestPointer(t *testing.T) {
t.Run("Get", func(t *testing.T) {
require.Equal(t, "fooo", p.Get())
require.Equal(t, "ooo", p.Advance(1).Get())
- })
-
- t.Run("Remaining", func(t *testing.T) {
- require.Equal(t, 4, p.Remaining())
- require.Equal(t, 0, p.Advance(4).Remaining())
- require.Equal(t, 0, p.Advance(10).Remaining())
- })
-
- t.Run("Next takes one character", func(t *testing.T) {
- s, p2 := p.Next()
- require.Equal(t, p.Advance(1), p2)
- require.Equal(t, 'f', s)
- })
-
- t.Run("Next handles EOF", func(t *testing.T) {
- s, p2 := p.Advance(5).Next()
- require.Equal(t, p.Advance(5), p2)
- require.Equal(t, EOF, s)
- })
-
- t.Run("HasPrefix", func(t *testing.T) {
- require.True(t, p.HasPrefix("fo"))
- require.False(t, p.HasPrefix("ooo"))
- require.True(t, p.Advance(1).HasPrefix("ooo"))
- require.False(t, p.Advance(1).HasPrefix("oooo"))
- })
-
- t.Run("Accept", func(t *testing.T) {
- s, p2 := p.Accept("abcdef")
- require.Equal(t, "f", s)
- require.Equal(t, p.Advance(1), p2)
-
- s, p2 = p.Accept("ooooo")
- require.Equal(t, "", s)
- require.Equal(t, p.Advance(0), p2)
-
- s, p2 = p.Advance(4).Accept("ooooo")
- require.Equal(t, "", s)
- require.Equal(t, p.Advance(4), p2)
- })
-
- t.Run("AcceptRun", func(t *testing.T) {
- s, p2 := p.AcceptRun("f")
- require.Equal(t, "f", s)
- require.Equal(t, p.Advance(1), p2)
-
- s, p3 := p.AcceptRun("fo")
- require.Equal(t, "fooo", s)
- require.Equal(t, p.Advance(4), p3)
-
- s, p4 := p3.AcceptRun("fo")
- require.Equal(t, "", s)
- require.Equal(t, p.Advance(4), p4)
- })
-
- t.Run("AcceptUntil", func(t *testing.T) {
- s, p2 := p.AcceptUntil("o")
- require.Equal(t, "f", s)
- require.Equal(t, p.Advance(1), p2)
-
- s, p3 := p2.AcceptRun("o")
- require.Equal(t, "ooo", s)
- require.Equal(t, p.Advance(4), p3)
+ require.Equal(t, "", p.Advance(4).Get())
+ require.Equal(t, "", p.Advance(10).Get())
})
}