diff --git a/combinator.go b/combinator.go index 7dc4147..12991a3 100644 --- a/combinator.go +++ b/combinator.go @@ -37,6 +37,18 @@ func And(parsers ...Parserish) Parser { }) } +func NoAutoWS(parser Parserish) Parser { + parserfied := Parsify(parser) + return func(ps *State) *Node { + ps.NoAutoWS = true + + ret := parserfied(ps) + + ps.NoAutoWS = false + return ret + } +} + func Any(parsers ...Parserish) Parser { if len(parsers) == 0 { return Nil diff --git a/combinator_test.go b/combinator_test.go index 0f10b01..dbf2b50 100644 --- a/combinator_test.go +++ b/combinator_test.go @@ -23,7 +23,7 @@ func TestNever(t *testing.T) { } func TestAnd(t *testing.T) { - parser := And("hello", WS, "world") + parser := And("hello", "world") t.Run("matches sequence", func(t *testing.T) { node, p2 := runParser("hello world", parser) @@ -68,8 +68,8 @@ func TestAny(t *testing.T) { t.Run("Returns longest error", func(t *testing.T) { _, p2 := runParser("hello world!", Any( "nope", - And("hello", WS, "world", "."), - And("hello", WS, "brother"), + And("hello", "world", "."), + And("hello", "brother"), )) require.Equal(t, "offset 11: Expected .", p2.Error.Error()) require.Equal(t, 11, p2.Error.Pos()) @@ -77,7 +77,7 @@ func TestAny(t *testing.T) { }) t.Run("Accepts nil matches", func(t *testing.T) { - node, p2 := runParser("hello world!", Any(Exact("ffffff"), WS)) + node, p2 := runParser("hello world!", Any(Exact("ffffff"))) require.Nil(t, node) require.Equal(t, 0, p2.Pos) }) @@ -101,6 +101,12 @@ func TestKleene(t *testing.T) { require.Equal(t, 10, p2.Pos) }) + t.Run("splits words automatically on space", func(t *testing.T) { + node, p2 := runParser("hello world", Kleene(Chars("a-z"))) + assertSequence(t, node, "hello", "world") + require.Equal(t, "", p2.Get()) + }) + t.Run("Stops on error", func(t *testing.T) { node, p2 := runParser("a,b,c,d,e,", Kleene(Chars("a-c"), ",")) assertSequence(t, node, "a", "b", "c") @@ -210,6 +216,7 @@ func assertNilParser(t *testing.T, parser Parser) { } func assertSequence(t *testing.T, node *Node, expected ...string) { + require.NotNil(t, node) actual := []string{} for _, child := range node.Children { diff --git a/html/html.go b/html/html.go index 8b19c23..4d02cb0 100644 --- a/html/html.go +++ b/html/html.go @@ -1,6 +1,8 @@ package html -import . "github.com/vektah/goparsify" +import ( + . "github.com/vektah/goparsify" +) func Parse(input string) (result interface{}, remaining string, err error) { return ParseString(tag, input) @@ -15,7 +17,7 @@ type Tag struct { var ( tag Parser - identifier = Merge(And(Chars("a-z", 1, 1), Chars("a-zA-Z0-9", 0))) + identifier = NoAutoWS(Merge(And(WS(), Chars("a-zA-Z", 1), Chars("a-zA-Z0-9", 0)))) text = Map(NotChars("<>"), func(n *Node) *Node { return &Node{Result: n.Token} }) @@ -29,8 +31,8 @@ var ( return &Node{Result: ret} }) - attr = And(WS, identifier, WS, "=", WS, Any(String('"'), String('\''))) - attrs = Map(Kleene(attr, WS), func(node *Node) *Node { + attr = And(identifier, "=", Any(String('"'), String('\''))) + attrs = Map(Kleene(attr), func(node *Node) *Node { attr := map[string]string{} for _, attrNode := range node.Children { diff --git a/json/json.go b/json/json.go index fbba21c..01d209f 100644 --- a/json/json.go +++ b/json/json.go @@ -9,15 +9,15 @@ import ( var ( value Parser - _array = Map(And(WS, "[", Kleene(&value, And(WS, ",")), "]"), func(n *Node) *Node { + _array = Map(And("[", Kleene(&value, ","), "]"), func(n *Node) *Node { ret := []interface{}{} for _, child := range n.Children[1].Children { ret = append(ret, child.Result) } return &Node{Result: ret} }) - properties = Kleene(And(WS, String('"'), WS, ":", WS, &value), ",") - _object = Map(And(WS, "{", WS, properties, WS, "}"), func(n *Node) *Node { + properties = Kleene(And(String('"'), ":", &value), ",") + _object = Map(And("{", properties, "}"), func(n *Node) *Node { ret := map[string]interface{}{} for _, prop := range n.Children[1].Children { @@ -27,15 +27,15 @@ var ( return &Node{Result: ret} }) - _null = Map(And(WS, "null"), func(n *Node) *Node { + _null = Map("null", func(n *Node) *Node { return &Node{Result: nil} }) - _true = Map(And(WS, "true"), func(n *Node) *Node { + _true = Map("true", func(n *Node) *Node { return &Node{Result: true} }) - _false = Map(And(WS, "false"), func(n *Node) *Node { + _false = Map("false", func(n *Node) *Node { return &Node{Result: false} }) @@ -43,8 +43,8 @@ var ( return &Node{Result: n.Token} }) - Y = Map(And(&value, WS), func(n *Node) *Node { - return &Node{Result: n.Children[0].Result} + Y = Map(&value, func(n *Node) *Node { + return &Node{Result: n.Result} }) ) diff --git a/parser.go b/parser.go index 4db525b..25a907e 100644 --- a/parser.go +++ b/parser.go @@ -33,8 +33,8 @@ type Parserish interface{} func Parsify(p Parserish) Parser { switch p := p.(type) { - //case func(*State) *Node: - // return NewParser("anonymous func", p) + case func(*State) *Node: + return NewParser("anonymous func", p) case Parser: return p case *Parser: @@ -57,10 +57,19 @@ func ParsifyAll(parsers ...Parserish) []Parser { return ret } +func WS() Parser { + return NewParser("AutoWS", func(ps *State) *Node { + ps.WS() + return nil + }) +} + func ParseString(parser Parserish, input string) (result interface{}, remaining string, err error) { p := Parsify(parser) - ps := &State{input, 0, Error{}} + ps := InputString(input) + ret := p(ps) + ps.AutoWS() if ps.Error.Expected != "" { return nil, ps.Get(), ps.Error @@ -71,6 +80,7 @@ func ParseString(parser Parserish, input string) (result interface{}, remaining func Exact(match string) Parser { return NewParser(match, func(ps *State) *Node { + ps.AutoWS() if !strings.HasPrefix(ps.Get(), match) { ps.ErrorHere(match) return nil @@ -138,6 +148,7 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser { matches, ranges := parseMatcher(matcher) return func(ps *State) *Node { + ps.AutoWS() matched := 0 for ps.Pos+matched < len(ps.Input) { if max != -1 && matched >= max { @@ -173,15 +184,9 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser { } } -var ws = NewParser("WS", Chars("\t\n\v\f\r \x85\xA0", 0)) - -func WS(ps *State) *Node { - ws(ps) - return nil -} - func String(quote rune) Parser { return NewParser("string", func(ps *State) *Node { + ps.AutoWS() var r rune var w int var matched int diff --git a/parser_test.go b/parser_test.go index d7ab0d4..cd2d24a 100644 --- a/parser_test.go +++ b/parser_test.go @@ -120,7 +120,7 @@ func TestParseString(t *testing.T) { t.Run("partial match", func(t *testing.T) { result, remaining, err := ParseString(Y, "hello world") require.Equal(t, "hello", result) - require.Equal(t, " world", remaining) + require.Equal(t, "world", remaining) require.NoError(t, err) }) @@ -159,22 +159,6 @@ func TestString(t *testing.T) { }) } -func TestWS(t *testing.T) { - t.Run("consumes all whitespace", func(t *testing.T) { - result, p := runParser(" asdf", WS) - require.Nil(t, result) - require.Equal(t, "asdf", p.Get()) - require.False(t, p.Errored()) - }) - - t.Run("never errors", func(t *testing.T) { - result, p := runParser("asdf", WS) - require.Nil(t, result) - require.Equal(t, "asdf", p.Get()) - require.False(t, p.Errored()) - }) -} - func runParser(input string, parser Parser) (*Node, *State) { ps := InputString(input) result := parser(ps) diff --git a/state.go b/state.go index e7a5fe4..99e6021 100644 --- a/state.go +++ b/state.go @@ -1,6 +1,10 @@ package goparsify -import "fmt" +import ( + "fmt" + "strings" + "unicode/utf8" +) type Error struct { pos int @@ -11,15 +15,36 @@ func (e Error) Pos() int { return e.pos } func (e Error) Error() string { return fmt.Sprintf("offset %d: Expected %s", e.pos, e.Expected) } type State struct { - Input string - Pos int - Error Error + Input string + Pos int + Error Error + WSChars string + NoAutoWS bool } func (s *State) Advance(i int) { s.Pos += i } +// AutoWS consumes all whitespace +func (s *State) AutoWS() { + if s.NoAutoWS { + return + } + s.WS() +} + +func (s *State) WS() { + for s.Pos < len(s.Input) { + r, w := utf8.DecodeRuneInString(s.Input[s.Pos:]) + if !strings.ContainsRune(s.WSChars, r) { + return + } + s.Pos += w + + } +} + func (s *State) Get() string { if s.Pos > len(s.Input) { return "" @@ -41,5 +66,5 @@ func (s *State) Errored() bool { } func InputString(input string) *State { - return &State{Input: input} + return &State{Input: input, WSChars: "\t\n\v\f\r \x85\xA0"} }