summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdam Scarr <adam@vektah.net>2017-08-07 21:20:30 +1000
committerAdam Scarr <adam@vektah.net>2017-08-07 21:22:54 +1000
commita656dc0d78c5f51a16dc4c26936d337cdae5105c (patch)
treeabd40ad9b8c6218d61c5dab230ef3712a0fef0b3
parentcc9d18219af9375ad89eaa8a23f1e0bcffa5734e (diff)
AutoWS
-rw-r--r--combinator.go12
-rw-r--r--combinator_test.go15
-rw-r--r--html/html.go10
-rw-r--r--json/json.go16
-rw-r--r--parser.go25
-rw-r--r--parser_test.go18
-rw-r--r--state.go35
7 files changed, 83 insertions, 48 deletions
diff --git a/combinator.go b/combinator.go
index 7dc4147..12991a3 100644
--- a/combinator.go
+++ b/combinator.go
@@ -37,6 +37,18 @@ func And(parsers ...Parserish) Parser {
})
}
+func NoAutoWS(parser Parserish) Parser {
+ parserfied := Parsify(parser)
+ return func(ps *State) *Node {
+ ps.NoAutoWS = true
+
+ ret := parserfied(ps)
+
+ ps.NoAutoWS = false
+ return ret
+ }
+}
+
func Any(parsers ...Parserish) Parser {
if len(parsers) == 0 {
return Nil
diff --git a/combinator_test.go b/combinator_test.go
index 0f10b01..dbf2b50 100644
--- a/combinator_test.go
+++ b/combinator_test.go
@@ -23,7 +23,7 @@ func TestNever(t *testing.T) {
}
func TestAnd(t *testing.T) {
- parser := And("hello", WS, "world")
+ parser := And("hello", "world")
t.Run("matches sequence", func(t *testing.T) {
node, p2 := runParser("hello world", parser)
@@ -68,8 +68,8 @@ func TestAny(t *testing.T) {
t.Run("Returns longest error", func(t *testing.T) {
_, p2 := runParser("hello world!", Any(
"nope",
- And("hello", WS, "world", "."),
- And("hello", WS, "brother"),
+ And("hello", "world", "."),
+ And("hello", "brother"),
))
require.Equal(t, "offset 11: Expected .", p2.Error.Error())
require.Equal(t, 11, p2.Error.Pos())
@@ -77,7 +77,7 @@ func TestAny(t *testing.T) {
})
t.Run("Accepts nil matches", func(t *testing.T) {
- node, p2 := runParser("hello world!", Any(Exact("ffffff"), WS))
+ node, p2 := runParser("hello world!", Any(Exact("ffffff")))
require.Nil(t, node)
require.Equal(t, 0, p2.Pos)
})
@@ -101,6 +101,12 @@ func TestKleene(t *testing.T) {
require.Equal(t, 10, p2.Pos)
})
+ t.Run("splits words automatically on space", func(t *testing.T) {
+ node, p2 := runParser("hello world", Kleene(Chars("a-z")))
+ assertSequence(t, node, "hello", "world")
+ require.Equal(t, "", p2.Get())
+ })
+
t.Run("Stops on error", func(t *testing.T) {
node, p2 := runParser("a,b,c,d,e,", Kleene(Chars("a-c"), ","))
assertSequence(t, node, "a", "b", "c")
@@ -210,6 +216,7 @@ func assertNilParser(t *testing.T, parser Parser) {
}
func assertSequence(t *testing.T, node *Node, expected ...string) {
+ require.NotNil(t, node)
actual := []string{}
for _, child := range node.Children {
diff --git a/html/html.go b/html/html.go
index 8b19c23..4d02cb0 100644
--- a/html/html.go
+++ b/html/html.go
@@ -1,6 +1,8 @@
package html
-import . "github.com/vektah/goparsify"
+import (
+ . "github.com/vektah/goparsify"
+)
func Parse(input string) (result interface{}, remaining string, err error) {
return ParseString(tag, input)
@@ -15,7 +17,7 @@ type Tag struct {
var (
tag Parser
- identifier = Merge(And(Chars("a-z", 1, 1), Chars("a-zA-Z0-9", 0)))
+ identifier = NoAutoWS(Merge(And(WS(), Chars("a-zA-Z", 1), Chars("a-zA-Z0-9", 0))))
text = Map(NotChars("<>"), func(n *Node) *Node {
return &Node{Result: n.Token}
})
@@ -29,8 +31,8 @@ var (
return &Node{Result: ret}
})
- attr = And(WS, identifier, WS, "=", WS, Any(String('"'), String('\'')))
- attrs = Map(Kleene(attr, WS), func(node *Node) *Node {
+ attr = And(identifier, "=", Any(String('"'), String('\'')))
+ attrs = Map(Kleene(attr), func(node *Node) *Node {
attr := map[string]string{}
for _, attrNode := range node.Children {
diff --git a/json/json.go b/json/json.go
index fbba21c..01d209f 100644
--- a/json/json.go
+++ b/json/json.go
@@ -9,15 +9,15 @@ import (
var (
value Parser
- _array = Map(And(WS, "[", Kleene(&value, And(WS, ",")), "]"), func(n *Node) *Node {
+ _array = Map(And("[", Kleene(&value, ","), "]"), func(n *Node) *Node {
ret := []interface{}{}
for _, child := range n.Children[1].Children {
ret = append(ret, child.Result)
}
return &Node{Result: ret}
})
- properties = Kleene(And(WS, String('"'), WS, ":", WS, &value), ",")
- _object = Map(And(WS, "{", WS, properties, WS, "}"), func(n *Node) *Node {
+ properties = Kleene(And(String('"'), ":", &value), ",")
+ _object = Map(And("{", properties, "}"), func(n *Node) *Node {
ret := map[string]interface{}{}
for _, prop := range n.Children[1].Children {
@@ -27,15 +27,15 @@ var (
return &Node{Result: ret}
})
- _null = Map(And(WS, "null"), func(n *Node) *Node {
+ _null = Map("null", func(n *Node) *Node {
return &Node{Result: nil}
})
- _true = Map(And(WS, "true"), func(n *Node) *Node {
+ _true = Map("true", func(n *Node) *Node {
return &Node{Result: true}
})
- _false = Map(And(WS, "false"), func(n *Node) *Node {
+ _false = Map("false", func(n *Node) *Node {
return &Node{Result: false}
})
@@ -43,8 +43,8 @@ var (
return &Node{Result: n.Token}
})
- Y = Map(And(&value, WS), func(n *Node) *Node {
- return &Node{Result: n.Children[0].Result}
+ Y = Map(&value, func(n *Node) *Node {
+ return &Node{Result: n.Result}
})
)
diff --git a/parser.go b/parser.go
index 4db525b..25a907e 100644
--- a/parser.go
+++ b/parser.go
@@ -33,8 +33,8 @@ type Parserish interface{}
func Parsify(p Parserish) Parser {
switch p := p.(type) {
- //case func(*State) *Node:
- // return NewParser("anonymous func", p)
+ case func(*State) *Node:
+ return NewParser("anonymous func", p)
case Parser:
return p
case *Parser:
@@ -57,10 +57,19 @@ func ParsifyAll(parsers ...Parserish) []Parser {
return ret
}
+func WS() Parser {
+ return NewParser("AutoWS", func(ps *State) *Node {
+ ps.WS()
+ return nil
+ })
+}
+
func ParseString(parser Parserish, input string) (result interface{}, remaining string, err error) {
p := Parsify(parser)
- ps := &State{input, 0, Error{}}
+ ps := InputString(input)
+
ret := p(ps)
+ ps.AutoWS()
if ps.Error.Expected != "" {
return nil, ps.Get(), ps.Error
@@ -71,6 +80,7 @@ func ParseString(parser Parserish, input string) (result interface{}, remaining
func Exact(match string) Parser {
return NewParser(match, func(ps *State) *Node {
+ ps.AutoWS()
if !strings.HasPrefix(ps.Get(), match) {
ps.ErrorHere(match)
return nil
@@ -138,6 +148,7 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser {
matches, ranges := parseMatcher(matcher)
return func(ps *State) *Node {
+ ps.AutoWS()
matched := 0
for ps.Pos+matched < len(ps.Input) {
if max != -1 && matched >= max {
@@ -173,15 +184,9 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser {
}
}
-var ws = NewParser("WS", Chars("\t\n\v\f\r \x85\xA0", 0))
-
-func WS(ps *State) *Node {
- ws(ps)
- return nil
-}
-
func String(quote rune) Parser {
return NewParser("string", func(ps *State) *Node {
+ ps.AutoWS()
var r rune
var w int
var matched int
diff --git a/parser_test.go b/parser_test.go
index d7ab0d4..cd2d24a 100644
--- a/parser_test.go
+++ b/parser_test.go
@@ -120,7 +120,7 @@ func TestParseString(t *testing.T) {
t.Run("partial match", func(t *testing.T) {
result, remaining, err := ParseString(Y, "hello world")
require.Equal(t, "hello", result)
- require.Equal(t, " world", remaining)
+ require.Equal(t, "world", remaining)
require.NoError(t, err)
})
@@ -159,22 +159,6 @@ func TestString(t *testing.T) {
})
}
-func TestWS(t *testing.T) {
- t.Run("consumes all whitespace", func(t *testing.T) {
- result, p := runParser(" asdf", WS)
- require.Nil(t, result)
- require.Equal(t, "asdf", p.Get())
- require.False(t, p.Errored())
- })
-
- t.Run("never errors", func(t *testing.T) {
- result, p := runParser("asdf", WS)
- require.Nil(t, result)
- require.Equal(t, "asdf", p.Get())
- require.False(t, p.Errored())
- })
-}
-
func runParser(input string, parser Parser) (*Node, *State) {
ps := InputString(input)
result := parser(ps)
diff --git a/state.go b/state.go
index e7a5fe4..99e6021 100644
--- a/state.go
+++ b/state.go
@@ -1,6 +1,10 @@
package goparsify
-import "fmt"
+import (
+ "fmt"
+ "strings"
+ "unicode/utf8"
+)
type Error struct {
pos int
@@ -11,15 +15,36 @@ func (e Error) Pos() int { return e.pos }
func (e Error) Error() string { return fmt.Sprintf("offset %d: Expected %s", e.pos, e.Expected) }
type State struct {
- Input string
- Pos int
- Error Error
+ Input string
+ Pos int
+ Error Error
+ WSChars string
+ NoAutoWS bool
}
func (s *State) Advance(i int) {
s.Pos += i
}
+// AutoWS consumes all whitespace
+func (s *State) AutoWS() {
+ if s.NoAutoWS {
+ return
+ }
+ s.WS()
+}
+
+func (s *State) WS() {
+ for s.Pos < len(s.Input) {
+ r, w := utf8.DecodeRuneInString(s.Input[s.Pos:])
+ if !strings.ContainsRune(s.WSChars, r) {
+ return
+ }
+ s.Pos += w
+
+ }
+}
+
func (s *State) Get() string {
if s.Pos > len(s.Input) {
return ""
@@ -41,5 +66,5 @@ func (s *State) Errored() bool {
}
func InputString(input string) *State {
- return &State{Input: input}
+ return &State{Input: input, WSChars: "\t\n\v\f\r \x85\xA0"}
}