This commit is contained in:
Adam Scarr 2017-08-07 21:20:30 +10:00
parent cc9d18219a
commit a656dc0d78
7 changed files with 83 additions and 48 deletions

View File

@ -37,6 +37,18 @@ func And(parsers ...Parserish) Parser {
}) })
} }
func NoAutoWS(parser Parserish) Parser {
parserfied := Parsify(parser)
return func(ps *State) *Node {
ps.NoAutoWS = true
ret := parserfied(ps)
ps.NoAutoWS = false
return ret
}
}
func Any(parsers ...Parserish) Parser { func Any(parsers ...Parserish) Parser {
if len(parsers) == 0 { if len(parsers) == 0 {
return Nil return Nil

View File

@ -23,7 +23,7 @@ func TestNever(t *testing.T) {
} }
func TestAnd(t *testing.T) { func TestAnd(t *testing.T) {
parser := And("hello", WS, "world") parser := And("hello", "world")
t.Run("matches sequence", func(t *testing.T) { t.Run("matches sequence", func(t *testing.T) {
node, p2 := runParser("hello world", parser) node, p2 := runParser("hello world", parser)
@ -68,8 +68,8 @@ func TestAny(t *testing.T) {
t.Run("Returns longest error", func(t *testing.T) { t.Run("Returns longest error", func(t *testing.T) {
_, p2 := runParser("hello world!", Any( _, p2 := runParser("hello world!", Any(
"nope", "nope",
And("hello", WS, "world", "."), And("hello", "world", "."),
And("hello", WS, "brother"), And("hello", "brother"),
)) ))
require.Equal(t, "offset 11: Expected .", p2.Error.Error()) require.Equal(t, "offset 11: Expected .", p2.Error.Error())
require.Equal(t, 11, p2.Error.Pos()) require.Equal(t, 11, p2.Error.Pos())
@ -77,7 +77,7 @@ func TestAny(t *testing.T) {
}) })
t.Run("Accepts nil matches", func(t *testing.T) { t.Run("Accepts nil matches", func(t *testing.T) {
node, p2 := runParser("hello world!", Any(Exact("ffffff"), WS)) node, p2 := runParser("hello world!", Any(Exact("ffffff")))
require.Nil(t, node) require.Nil(t, node)
require.Equal(t, 0, p2.Pos) require.Equal(t, 0, p2.Pos)
}) })
@ -101,6 +101,12 @@ func TestKleene(t *testing.T) {
require.Equal(t, 10, p2.Pos) require.Equal(t, 10, p2.Pos)
}) })
t.Run("splits words automatically on space", func(t *testing.T) {
node, p2 := runParser("hello world", Kleene(Chars("a-z")))
assertSequence(t, node, "hello", "world")
require.Equal(t, "", p2.Get())
})
t.Run("Stops on error", func(t *testing.T) { t.Run("Stops on error", func(t *testing.T) {
node, p2 := runParser("a,b,c,d,e,", Kleene(Chars("a-c"), ",")) node, p2 := runParser("a,b,c,d,e,", Kleene(Chars("a-c"), ","))
assertSequence(t, node, "a", "b", "c") assertSequence(t, node, "a", "b", "c")
@ -210,6 +216,7 @@ func assertNilParser(t *testing.T, parser Parser) {
} }
func assertSequence(t *testing.T, node *Node, expected ...string) { func assertSequence(t *testing.T, node *Node, expected ...string) {
require.NotNil(t, node)
actual := []string{} actual := []string{}
for _, child := range node.Children { for _, child := range node.Children {

View File

@ -1,6 +1,8 @@
package html package html
import . "github.com/vektah/goparsify" import (
. "github.com/vektah/goparsify"
)
func Parse(input string) (result interface{}, remaining string, err error) { func Parse(input string) (result interface{}, remaining string, err error) {
return ParseString(tag, input) return ParseString(tag, input)
@ -15,7 +17,7 @@ type Tag struct {
var ( var (
tag Parser tag Parser
identifier = Merge(And(Chars("a-z", 1, 1), Chars("a-zA-Z0-9", 0))) identifier = NoAutoWS(Merge(And(WS(), Chars("a-zA-Z", 1), Chars("a-zA-Z0-9", 0))))
text = Map(NotChars("<>"), func(n *Node) *Node { text = Map(NotChars("<>"), func(n *Node) *Node {
return &Node{Result: n.Token} return &Node{Result: n.Token}
}) })
@ -29,8 +31,8 @@ var (
return &Node{Result: ret} return &Node{Result: ret}
}) })
attr = And(WS, identifier, WS, "=", WS, Any(String('"'), String('\''))) attr = And(identifier, "=", Any(String('"'), String('\'')))
attrs = Map(Kleene(attr, WS), func(node *Node) *Node { attrs = Map(Kleene(attr), func(node *Node) *Node {
attr := map[string]string{} attr := map[string]string{}
for _, attrNode := range node.Children { for _, attrNode := range node.Children {

View File

@ -9,15 +9,15 @@ import (
var ( var (
value Parser value Parser
_array = Map(And(WS, "[", Kleene(&value, And(WS, ",")), "]"), func(n *Node) *Node { _array = Map(And("[", Kleene(&value, ","), "]"), func(n *Node) *Node {
ret := []interface{}{} ret := []interface{}{}
for _, child := range n.Children[1].Children { for _, child := range n.Children[1].Children {
ret = append(ret, child.Result) ret = append(ret, child.Result)
} }
return &Node{Result: ret} return &Node{Result: ret}
}) })
properties = Kleene(And(WS, String('"'), WS, ":", WS, &value), ",") properties = Kleene(And(String('"'), ":", &value), ",")
_object = Map(And(WS, "{", WS, properties, WS, "}"), func(n *Node) *Node { _object = Map(And("{", properties, "}"), func(n *Node) *Node {
ret := map[string]interface{}{} ret := map[string]interface{}{}
for _, prop := range n.Children[1].Children { for _, prop := range n.Children[1].Children {
@ -27,15 +27,15 @@ var (
return &Node{Result: ret} return &Node{Result: ret}
}) })
_null = Map(And(WS, "null"), func(n *Node) *Node { _null = Map("null", func(n *Node) *Node {
return &Node{Result: nil} return &Node{Result: nil}
}) })
_true = Map(And(WS, "true"), func(n *Node) *Node { _true = Map("true", func(n *Node) *Node {
return &Node{Result: true} return &Node{Result: true}
}) })
_false = Map(And(WS, "false"), func(n *Node) *Node { _false = Map("false", func(n *Node) *Node {
return &Node{Result: false} return &Node{Result: false}
}) })
@ -43,8 +43,8 @@ var (
return &Node{Result: n.Token} return &Node{Result: n.Token}
}) })
Y = Map(And(&value, WS), func(n *Node) *Node { Y = Map(&value, func(n *Node) *Node {
return &Node{Result: n.Children[0].Result} return &Node{Result: n.Result}
}) })
) )

View File

@ -33,8 +33,8 @@ type Parserish interface{}
func Parsify(p Parserish) Parser { func Parsify(p Parserish) Parser {
switch p := p.(type) { switch p := p.(type) {
//case func(*State) *Node: case func(*State) *Node:
// return NewParser("anonymous func", p) return NewParser("anonymous func", p)
case Parser: case Parser:
return p return p
case *Parser: case *Parser:
@ -57,10 +57,19 @@ func ParsifyAll(parsers ...Parserish) []Parser {
return ret return ret
} }
func WS() Parser {
return NewParser("AutoWS", func(ps *State) *Node {
ps.WS()
return nil
})
}
func ParseString(parser Parserish, input string) (result interface{}, remaining string, err error) { func ParseString(parser Parserish, input string) (result interface{}, remaining string, err error) {
p := Parsify(parser) p := Parsify(parser)
ps := &State{input, 0, Error{}} ps := InputString(input)
ret := p(ps) ret := p(ps)
ps.AutoWS()
if ps.Error.Expected != "" { if ps.Error.Expected != "" {
return nil, ps.Get(), ps.Error return nil, ps.Get(), ps.Error
@ -71,6 +80,7 @@ func ParseString(parser Parserish, input string) (result interface{}, remaining
func Exact(match string) Parser { func Exact(match string) Parser {
return NewParser(match, func(ps *State) *Node { return NewParser(match, func(ps *State) *Node {
ps.AutoWS()
if !strings.HasPrefix(ps.Get(), match) { if !strings.HasPrefix(ps.Get(), match) {
ps.ErrorHere(match) ps.ErrorHere(match)
return nil return nil
@ -138,6 +148,7 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser {
matches, ranges := parseMatcher(matcher) matches, ranges := parseMatcher(matcher)
return func(ps *State) *Node { return func(ps *State) *Node {
ps.AutoWS()
matched := 0 matched := 0
for ps.Pos+matched < len(ps.Input) { for ps.Pos+matched < len(ps.Input) {
if max != -1 && matched >= max { if max != -1 && matched >= max {
@ -173,15 +184,9 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser {
} }
} }
var ws = NewParser("WS", Chars("\t\n\v\f\r \x85\xA0", 0))
func WS(ps *State) *Node {
ws(ps)
return nil
}
func String(quote rune) Parser { func String(quote rune) Parser {
return NewParser("string", func(ps *State) *Node { return NewParser("string", func(ps *State) *Node {
ps.AutoWS()
var r rune var r rune
var w int var w int
var matched int var matched int

View File

@ -120,7 +120,7 @@ func TestParseString(t *testing.T) {
t.Run("partial match", func(t *testing.T) { t.Run("partial match", func(t *testing.T) {
result, remaining, err := ParseString(Y, "hello world") result, remaining, err := ParseString(Y, "hello world")
require.Equal(t, "hello", result) require.Equal(t, "hello", result)
require.Equal(t, " world", remaining) require.Equal(t, "world", remaining)
require.NoError(t, err) require.NoError(t, err)
}) })
@ -159,22 +159,6 @@ func TestString(t *testing.T) {
}) })
} }
func TestWS(t *testing.T) {
t.Run("consumes all whitespace", func(t *testing.T) {
result, p := runParser(" asdf", WS)
require.Nil(t, result)
require.Equal(t, "asdf", p.Get())
require.False(t, p.Errored())
})
t.Run("never errors", func(t *testing.T) {
result, p := runParser("asdf", WS)
require.Nil(t, result)
require.Equal(t, "asdf", p.Get())
require.False(t, p.Errored())
})
}
func runParser(input string, parser Parser) (*Node, *State) { func runParser(input string, parser Parser) (*Node, *State) {
ps := InputString(input) ps := InputString(input)
result := parser(ps) result := parser(ps)

View File

@ -1,6 +1,10 @@
package goparsify package goparsify
import "fmt" import (
"fmt"
"strings"
"unicode/utf8"
)
type Error struct { type Error struct {
pos int pos int
@ -11,15 +15,36 @@ func (e Error) Pos() int { return e.pos }
func (e Error) Error() string { return fmt.Sprintf("offset %d: Expected %s", e.pos, e.Expected) } func (e Error) Error() string { return fmt.Sprintf("offset %d: Expected %s", e.pos, e.Expected) }
type State struct { type State struct {
Input string Input string
Pos int Pos int
Error Error Error Error
WSChars string
NoAutoWS bool
} }
func (s *State) Advance(i int) { func (s *State) Advance(i int) {
s.Pos += i s.Pos += i
} }
// AutoWS consumes all whitespace
func (s *State) AutoWS() {
if s.NoAutoWS {
return
}
s.WS()
}
func (s *State) WS() {
for s.Pos < len(s.Input) {
r, w := utf8.DecodeRuneInString(s.Input[s.Pos:])
if !strings.ContainsRune(s.WSChars, r) {
return
}
s.Pos += w
}
}
func (s *State) Get() string { func (s *State) Get() string {
if s.Pos > len(s.Input) { if s.Pos > len(s.Input) {
return "" return ""
@ -41,5 +66,5 @@ func (s *State) Errored() bool {
} }
func InputString(input string) *State { func InputString(input string) *State {
return &State{Input: input} return &State{Input: input, WSChars: "\t\n\v\f\r \x85\xA0"}
} }