diff --git a/combinator_test.go b/combinator_test.go index 8c0bab0..2bec734 100644 --- a/combinator_test.go +++ b/combinator_test.go @@ -20,7 +20,7 @@ func TestAnd(t *testing.T) { t.Run("matches sequence", func(t *testing.T) { node, p2 := And("hello", WS, "world")(p) require.Equal(t, NewSequence(0, NewToken(0, "hello"), NewToken(6, "world")), node) - require.Equal(t, 0, p2.Remaining()) + require.Equal(t, "", p2.Get()) }) t.Run("returns errors", func(t *testing.T) { diff --git a/examples/html.go b/examples/html.go index 88a3913..d1a290a 100644 --- a/examples/html.go +++ b/examples/html.go @@ -7,11 +7,7 @@ import ( ) func html(p Pointer) (Node, Pointer) { - opentag := Exact("<") - closetag := Exact(">") - equal := Exact("=") - slash := Exact("/") - identifier := And(Char(Range("a-z")), CharRun(Range("a-zA-Z0-9"))) + identifier := And(Range("a-z", 1, 1), Range("a-zA-Z0-9")) text := CharRunUntil("<>") var tag Parser @@ -19,17 +15,20 @@ func html(p Pointer) (Node, Pointer) { element := Any(text, &tag) elements := Kleene(element) //attr := And(identifier, equal, String()) - attr := And(identifier, equal, Exact(`"test"`)) + attr := And(identifier, "=", `"test"`) attrws := And(attr, WS) attrs := Kleene(attrws) - tstart := And(opentag, identifier, attrs, closetag) - tend := And(opentag, slash, identifier, closetag) + tstart := And("<", identifier, attrs, ">") + tend := And("") tag = And(tstart, elements, tend) return element(p) } func main() { - node, _ := html(Input("

hello world

")) - fmt.Printf("%#v\n", node) + result, _, err := ParseString(html, "

hello world

") + if err != nil { + panic(err) + } + fmt.Printf("%#v\n", result) } diff --git a/nodes.go b/nodes.go index c51db6f..410fa7e 100644 --- a/nodes.go +++ b/nodes.go @@ -1,5 +1,7 @@ package parsec +import "fmt" + type Node interface { Pos() int } @@ -16,11 +18,12 @@ func NewToken(pos int, value string) Token { } type Error struct { - pos int - Error string + pos int + Message string } -func (e Error) Pos() int { return e.pos } +func (e Error) Pos() int { return e.pos } +func (e Error) Error() string { return fmt.Sprintf("offset %d: %s", e.pos, e.Message) } func NewError(pos int, message string) Error { return Error{pos, message} diff --git a/parser.go b/parser.go index 8066526..82b5d56 100644 --- a/parser.go +++ b/parser.go @@ -2,6 +2,8 @@ package parsec import ( "fmt" + "strings" + "unicode/utf8" ) type Parser func(Pointer) (Node, Pointer) @@ -48,9 +50,20 @@ func ParsifyAll(parsers ...Parserish) []Parser { return ret } +func ParseString(parser Parserish, input string) (result Node, remaining string, err error) { + p := Parsify(parser) + result, pointer := p(Pointer{input, 0}) + + if err, isErr := result.(Error); isErr { + return nil, pointer.Get(), err + } + + return result, pointer.Get(), nil +} + func Exact(match string) Parser { return func(p Pointer) (Node, Pointer) { - if !p.HasPrefix(match) { + if !strings.HasPrefix(p.Get(), match) { return NewError(p.pos, "Expected "+match), p } @@ -60,63 +73,115 @@ func Exact(match string) Parser { func Char(match string) Parser { return func(p Pointer) (Node, Pointer) { - r, p2 := p.Accept(match) - if r == "" { - return NewError(p.pos, "Expected one of "+string(match)), p - } + r, w := utf8.DecodeRuneInString(p.Get()) - return NewToken(p.pos, string(r)), p2 + if !strings.ContainsRune(match, r) { + return NewError(p.pos, "Expected one of "+string(match)), p + + } + return NewToken(p.pos, string(r)), p.Advance(w) } } func CharRun(match string) Parser { return func(p Pointer) (Node, Pointer) { - s, p2 := p.AcceptRun(match) - if s == "" { + matched := 0 + for p.pos+matched < len(p.input) { + r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:]) + if !strings.ContainsRune(match, r) { + break + } + matched += w + } + + if matched == 0 { return NewError(p.pos, "Expected some of "+match), p } - return NewToken(p.pos, s), p2 + return NewToken(p.pos, p.input[p.pos:p.pos+matched]), p.Advance(matched) } } func CharRunUntil(match string) Parser { return func(p Pointer) (Node, Pointer) { - s, p2 := p.AcceptUntil(match) - if s == "" { + matched := 0 + for p.pos+matched < len(p.input) { + r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:]) + if strings.ContainsRune(match, r) { + break + } + matched += w + } + + if matched == 0 { return NewError(p.pos, "Expected some of "+match), p } - return NewToken(p.pos, s), p2 + return NewToken(p.pos, p.input[p.pos:p.pos+matched]), p.Advance(matched) } } -func Range(r string) string { +func Range(r string, repetition ...int) Parser { + min := int(1) + max := int(-1) + switch len(repetition) { + case 0: + case 1: + min = repetition[0] + case 2: + min = repetition[0] + max = repetition[1] + default: + panic(fmt.Errorf("Dont know what %d repetion args mean", len(repetition))) + } + runes := []rune(r) if len(runes)%3 != 0 { panic("ranges should be in the form a-z0-9") } - match := "" - + var ranges [][]rune for i := 0; i < len(runes); i += 3 { start := runes[i] end := runes[i+2] - if start > end { - tmp := start - start = end - end = tmp - } - for c := start; c <= end; c++ { - match += string(c) + if start <= end { + ranges = append(ranges, []rune{start, end}) + } else { + ranges = append(ranges, []rune{end, start}) } } - return match + return func(p Pointer) (Node, Pointer) { + matched := 0 + for p.pos+matched < len(p.input) { + if max != -1 && matched >= max { + break + } + + r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:]) + + anyMatched := false + for _, rng := range ranges { + if r >= rng[0] && r <= rng[1] { + anyMatched = true + } + } + if !anyMatched { + break + } + + matched += w + } + + if matched < min { + return NewError(p.pos+matched, fmt.Sprintf("Expected at least %d more of %s", min-matched, r)), p + } + + return NewToken(p.pos, p.input[p.pos:p.pos+matched]), p.Advance(matched) + } } func WS(p Pointer) (Node, Pointer) { - _, p2 := p.AcceptRun("\t\n\v\f\r \x85\xA0") - + _, p2 := CharRun("\t\n\v\f\r \x85\xA0")(p) return nil, p2 } diff --git a/parser_test.go b/parser_test.go index 7c22a76..83a95f6 100644 --- a/parser_test.go +++ b/parser_test.go @@ -19,6 +19,13 @@ func TestParsify(t *testing.T) { require.Equal(t, NewToken(0, "ff"), node) }) + t.Run("parser funcs", func(t *testing.T) { + node, _ := Parsify(func(p Pointer) (Node, Pointer) { + return NewToken(0, "hello"), p + })(p) + require.Equal(t, NewToken(0, "hello"), node) + }) + t.Run("*parsers", func(t *testing.T) { var parser Parser parserfied := Parsify(&parser) @@ -27,6 +34,20 @@ func TestParsify(t *testing.T) { node, _ := parserfied(p) require.Equal(t, NewToken(0, "ff"), node) }) + + require.Panics(t, func() { + Parsify(1) + }) +} + +func TestParsifyAll(t *testing.T) { + parsers := ParsifyAll("ff", "gg") + + result, _ := parsers[0](Pointer{"ffooo", 0}) + require.Equal(t, NewToken(0, "ff"), result) + + result, _ = parsers[1](Pointer{"ffooo", 0}) + require.Equal(t, NewError(0, "Expected gg"), result) } func TestExact(t *testing.T) { @@ -102,6 +123,58 @@ func TestWS(t *testing.T) { } func TestRange(t *testing.T) { - require.Equal(t, "abcdefg", Range("a-g")) - require.Equal(t, "01234abcd", Range("0-4a-d")) + t.Run("full match", func(t *testing.T) { + node, p := Range("a-z")(Pointer{"foobar", 0}) + require.Equal(t, NewToken(0, "foobar"), node) + require.Equal(t, "", p.Get()) + }) + + t.Run("partial match", func(t *testing.T) { + node, p := Range("1-4d-a")(Pointer{"a1b2c3d4efg", 0}) + require.Equal(t, NewToken(0, "a1b2c3d4"), node) + require.Equal(t, "efg", p.Get()) + }) + + t.Run("limited match", func(t *testing.T) { + node, p := Range("1-4d-a", 1, 2)(Pointer{"a1b2c3d4efg", 0}) + require.Equal(t, NewToken(0, "a1"), node) + require.Equal(t, "b2c3d4efg", p.Get()) + }) + + t.Run("no match", func(t *testing.T) { + node, p := Range("0-9")(Pointer{"ffffff", 0}) + require.Equal(t, NewError(0, "Expected at least 1 more of 0-9"), node) + require.Equal(t, 0, p.pos) + }) + + t.Run("no match with min", func(t *testing.T) { + node, p := Range("0-9", 4)(Pointer{"ffffff", 0}) + require.Equal(t, NewError(0, "Expected at least 4 more of 0-9"), node) + require.Equal(t, 0, p.pos) + }) + + require.Panics(t, func() { + Range("abcd") + }) + + require.Panics(t, func() { + Range("a-b", 1, 2, 3) + }) +} + +func TestParseString(t *testing.T) { + t.Run("partial match", func(t *testing.T) { + result, remaining, err := ParseString("hello", "hello world") + require.Equal(t, NewToken(0, "hello"), result) + require.Equal(t, " world", remaining) + require.NoError(t, err) + }) + + t.Run("error", func(t *testing.T) { + result, remaining, err := ParseString("world", "hello world") + require.Equal(t, nil, result) + require.Equal(t, "hello world", remaining) + require.Error(t, err) + require.Equal(t, "offset 0: Expected world", err.Error()) + }) } diff --git a/pointer.go b/pointer.go index 92b2bcb..7727833 100644 --- a/pointer.go +++ b/pointer.go @@ -1,18 +1,5 @@ package parsec -import ( - "strings" - "unicode/utf8" -) - -const ( - EOF rune = -1 -) - -func Input(s string) Pointer { - return Pointer{s, 0} -} - type Pointer struct { input string pos int @@ -23,59 +10,8 @@ func (p Pointer) Advance(i int) Pointer { } func (p Pointer) Get() string { + if p.pos > len(p.input) { + return "" + } return p.input[p.pos:] } - -func (p Pointer) Remaining() int { - remaining := len(p.input) - p.pos - if remaining < 0 { - return 0 - } - return remaining -} - -func (p Pointer) Next() (rune, Pointer) { - if int(p.pos) >= len(p.input) { - return EOF, p - } - r, w := utf8.DecodeRuneInString(p.input[p.pos:]) - return r, p.Advance(w) -} - -func (p Pointer) HasPrefix(s string) bool { - return strings.HasPrefix(p.input[p.pos:], s) -} - -func (p Pointer) Accept(valid string) (string, Pointer) { - c, newP := p.Next() - if strings.ContainsRune(valid, c) { - return string(c), newP - } - return "", p -} - -func (p Pointer) AcceptRun(valid string) (string, Pointer) { - matched := 0 - for p.pos+matched < len(p.input) { - r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:]) - if !strings.ContainsRune(valid, r) { - break - } - matched += w - } - - return p.input[p.pos : p.pos+matched], p.Advance(matched) -} - -func (p Pointer) AcceptUntil(invalid string) (string, Pointer) { - matched := 0 - for p.pos+matched < len(p.input) { - r, w := utf8.DecodeRuneInString(p.input[p.pos+matched:]) - if strings.ContainsRune(invalid, r) { - break - } - matched += w - } - - return p.input[p.pos : p.pos+matched], p.Advance(matched) -} diff --git a/pointer_test.go b/pointer_test.go index e8076d7..6b432f0 100644 --- a/pointer_test.go +++ b/pointer_test.go @@ -19,68 +19,7 @@ func TestPointer(t *testing.T) { t.Run("Get", func(t *testing.T) { require.Equal(t, "fooo", p.Get()) require.Equal(t, "ooo", p.Advance(1).Get()) - }) - - t.Run("Remaining", func(t *testing.T) { - require.Equal(t, 4, p.Remaining()) - require.Equal(t, 0, p.Advance(4).Remaining()) - require.Equal(t, 0, p.Advance(10).Remaining()) - }) - - t.Run("Next takes one character", func(t *testing.T) { - s, p2 := p.Next() - require.Equal(t, p.Advance(1), p2) - require.Equal(t, 'f', s) - }) - - t.Run("Next handles EOF", func(t *testing.T) { - s, p2 := p.Advance(5).Next() - require.Equal(t, p.Advance(5), p2) - require.Equal(t, EOF, s) - }) - - t.Run("HasPrefix", func(t *testing.T) { - require.True(t, p.HasPrefix("fo")) - require.False(t, p.HasPrefix("ooo")) - require.True(t, p.Advance(1).HasPrefix("ooo")) - require.False(t, p.Advance(1).HasPrefix("oooo")) - }) - - t.Run("Accept", func(t *testing.T) { - s, p2 := p.Accept("abcdef") - require.Equal(t, "f", s) - require.Equal(t, p.Advance(1), p2) - - s, p2 = p.Accept("ooooo") - require.Equal(t, "", s) - require.Equal(t, p.Advance(0), p2) - - s, p2 = p.Advance(4).Accept("ooooo") - require.Equal(t, "", s) - require.Equal(t, p.Advance(4), p2) - }) - - t.Run("AcceptRun", func(t *testing.T) { - s, p2 := p.AcceptRun("f") - require.Equal(t, "f", s) - require.Equal(t, p.Advance(1), p2) - - s, p3 := p.AcceptRun("fo") - require.Equal(t, "fooo", s) - require.Equal(t, p.Advance(4), p3) - - s, p4 := p3.AcceptRun("fo") - require.Equal(t, "", s) - require.Equal(t, p.Advance(4), p4) - }) - - t.Run("AcceptUntil", func(t *testing.T) { - s, p2 := p.AcceptUntil("o") - require.Equal(t, "f", s) - require.Equal(t, p.Advance(1), p2) - - s, p3 := p2.AcceptRun("o") - require.Equal(t, "ooo", s) - require.Equal(t, p.Advance(4), p3) + require.Equal(t, "", p.Advance(4).Get()) + require.Equal(t, "", p.Advance(10).Get()) }) }