AutoWS
This commit is contained in:
parent
cc9d18219a
commit
a656dc0d78
@ -37,6 +37,18 @@ func And(parsers ...Parserish) Parser {
|
||||
})
|
||||
}
|
||||
|
||||
func NoAutoWS(parser Parserish) Parser {
|
||||
parserfied := Parsify(parser)
|
||||
return func(ps *State) *Node {
|
||||
ps.NoAutoWS = true
|
||||
|
||||
ret := parserfied(ps)
|
||||
|
||||
ps.NoAutoWS = false
|
||||
return ret
|
||||
}
|
||||
}
|
||||
|
||||
func Any(parsers ...Parserish) Parser {
|
||||
if len(parsers) == 0 {
|
||||
return Nil
|
||||
|
@ -23,7 +23,7 @@ func TestNever(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestAnd(t *testing.T) {
|
||||
parser := And("hello", WS, "world")
|
||||
parser := And("hello", "world")
|
||||
|
||||
t.Run("matches sequence", func(t *testing.T) {
|
||||
node, p2 := runParser("hello world", parser)
|
||||
@ -68,8 +68,8 @@ func TestAny(t *testing.T) {
|
||||
t.Run("Returns longest error", func(t *testing.T) {
|
||||
_, p2 := runParser("hello world!", Any(
|
||||
"nope",
|
||||
And("hello", WS, "world", "."),
|
||||
And("hello", WS, "brother"),
|
||||
And("hello", "world", "."),
|
||||
And("hello", "brother"),
|
||||
))
|
||||
require.Equal(t, "offset 11: Expected .", p2.Error.Error())
|
||||
require.Equal(t, 11, p2.Error.Pos())
|
||||
@ -77,7 +77,7 @@ func TestAny(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("Accepts nil matches", func(t *testing.T) {
|
||||
node, p2 := runParser("hello world!", Any(Exact("ffffff"), WS))
|
||||
node, p2 := runParser("hello world!", Any(Exact("ffffff")))
|
||||
require.Nil(t, node)
|
||||
require.Equal(t, 0, p2.Pos)
|
||||
})
|
||||
@ -101,6 +101,12 @@ func TestKleene(t *testing.T) {
|
||||
require.Equal(t, 10, p2.Pos)
|
||||
})
|
||||
|
||||
t.Run("splits words automatically on space", func(t *testing.T) {
|
||||
node, p2 := runParser("hello world", Kleene(Chars("a-z")))
|
||||
assertSequence(t, node, "hello", "world")
|
||||
require.Equal(t, "", p2.Get())
|
||||
})
|
||||
|
||||
t.Run("Stops on error", func(t *testing.T) {
|
||||
node, p2 := runParser("a,b,c,d,e,", Kleene(Chars("a-c"), ","))
|
||||
assertSequence(t, node, "a", "b", "c")
|
||||
@ -210,6 +216,7 @@ func assertNilParser(t *testing.T, parser Parser) {
|
||||
}
|
||||
|
||||
func assertSequence(t *testing.T, node *Node, expected ...string) {
|
||||
require.NotNil(t, node)
|
||||
actual := []string{}
|
||||
|
||||
for _, child := range node.Children {
|
||||
|
10
html/html.go
10
html/html.go
@ -1,6 +1,8 @@
|
||||
package html
|
||||
|
||||
import . "github.com/vektah/goparsify"
|
||||
import (
|
||||
. "github.com/vektah/goparsify"
|
||||
)
|
||||
|
||||
func Parse(input string) (result interface{}, remaining string, err error) {
|
||||
return ParseString(tag, input)
|
||||
@ -15,7 +17,7 @@ type Tag struct {
|
||||
var (
|
||||
tag Parser
|
||||
|
||||
identifier = Merge(And(Chars("a-z", 1, 1), Chars("a-zA-Z0-9", 0)))
|
||||
identifier = NoAutoWS(Merge(And(WS(), Chars("a-zA-Z", 1), Chars("a-zA-Z0-9", 0))))
|
||||
text = Map(NotChars("<>"), func(n *Node) *Node {
|
||||
return &Node{Result: n.Token}
|
||||
})
|
||||
@ -29,8 +31,8 @@ var (
|
||||
return &Node{Result: ret}
|
||||
})
|
||||
|
||||
attr = And(WS, identifier, WS, "=", WS, Any(String('"'), String('\'')))
|
||||
attrs = Map(Kleene(attr, WS), func(node *Node) *Node {
|
||||
attr = And(identifier, "=", Any(String('"'), String('\'')))
|
||||
attrs = Map(Kleene(attr), func(node *Node) *Node {
|
||||
attr := map[string]string{}
|
||||
|
||||
for _, attrNode := range node.Children {
|
||||
|
16
json/json.go
16
json/json.go
@ -9,15 +9,15 @@ import (
|
||||
var (
|
||||
value Parser
|
||||
|
||||
_array = Map(And(WS, "[", Kleene(&value, And(WS, ",")), "]"), func(n *Node) *Node {
|
||||
_array = Map(And("[", Kleene(&value, ","), "]"), func(n *Node) *Node {
|
||||
ret := []interface{}{}
|
||||
for _, child := range n.Children[1].Children {
|
||||
ret = append(ret, child.Result)
|
||||
}
|
||||
return &Node{Result: ret}
|
||||
})
|
||||
properties = Kleene(And(WS, String('"'), WS, ":", WS, &value), ",")
|
||||
_object = Map(And(WS, "{", WS, properties, WS, "}"), func(n *Node) *Node {
|
||||
properties = Kleene(And(String('"'), ":", &value), ",")
|
||||
_object = Map(And("{", properties, "}"), func(n *Node) *Node {
|
||||
ret := map[string]interface{}{}
|
||||
|
||||
for _, prop := range n.Children[1].Children {
|
||||
@ -27,15 +27,15 @@ var (
|
||||
return &Node{Result: ret}
|
||||
})
|
||||
|
||||
_null = Map(And(WS, "null"), func(n *Node) *Node {
|
||||
_null = Map("null", func(n *Node) *Node {
|
||||
return &Node{Result: nil}
|
||||
})
|
||||
|
||||
_true = Map(And(WS, "true"), func(n *Node) *Node {
|
||||
_true = Map("true", func(n *Node) *Node {
|
||||
return &Node{Result: true}
|
||||
})
|
||||
|
||||
_false = Map(And(WS, "false"), func(n *Node) *Node {
|
||||
_false = Map("false", func(n *Node) *Node {
|
||||
return &Node{Result: false}
|
||||
})
|
||||
|
||||
@ -43,8 +43,8 @@ var (
|
||||
return &Node{Result: n.Token}
|
||||
})
|
||||
|
||||
Y = Map(And(&value, WS), func(n *Node) *Node {
|
||||
return &Node{Result: n.Children[0].Result}
|
||||
Y = Map(&value, func(n *Node) *Node {
|
||||
return &Node{Result: n.Result}
|
||||
})
|
||||
)
|
||||
|
||||
|
25
parser.go
25
parser.go
@ -33,8 +33,8 @@ type Parserish interface{}
|
||||
|
||||
func Parsify(p Parserish) Parser {
|
||||
switch p := p.(type) {
|
||||
//case func(*State) *Node:
|
||||
// return NewParser("anonymous func", p)
|
||||
case func(*State) *Node:
|
||||
return NewParser("anonymous func", p)
|
||||
case Parser:
|
||||
return p
|
||||
case *Parser:
|
||||
@ -57,10 +57,19 @@ func ParsifyAll(parsers ...Parserish) []Parser {
|
||||
return ret
|
||||
}
|
||||
|
||||
func WS() Parser {
|
||||
return NewParser("AutoWS", func(ps *State) *Node {
|
||||
ps.WS()
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
func ParseString(parser Parserish, input string) (result interface{}, remaining string, err error) {
|
||||
p := Parsify(parser)
|
||||
ps := &State{input, 0, Error{}}
|
||||
ps := InputString(input)
|
||||
|
||||
ret := p(ps)
|
||||
ps.AutoWS()
|
||||
|
||||
if ps.Error.Expected != "" {
|
||||
return nil, ps.Get(), ps.Error
|
||||
@ -71,6 +80,7 @@ func ParseString(parser Parserish, input string) (result interface{}, remaining
|
||||
|
||||
func Exact(match string) Parser {
|
||||
return NewParser(match, func(ps *State) *Node {
|
||||
ps.AutoWS()
|
||||
if !strings.HasPrefix(ps.Get(), match) {
|
||||
ps.ErrorHere(match)
|
||||
return nil
|
||||
@ -138,6 +148,7 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser {
|
||||
matches, ranges := parseMatcher(matcher)
|
||||
|
||||
return func(ps *State) *Node {
|
||||
ps.AutoWS()
|
||||
matched := 0
|
||||
for ps.Pos+matched < len(ps.Input) {
|
||||
if max != -1 && matched >= max {
|
||||
@ -173,15 +184,9 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser {
|
||||
}
|
||||
}
|
||||
|
||||
var ws = NewParser("WS", Chars("\t\n\v\f\r \x85\xA0", 0))
|
||||
|
||||
func WS(ps *State) *Node {
|
||||
ws(ps)
|
||||
return nil
|
||||
}
|
||||
|
||||
func String(quote rune) Parser {
|
||||
return NewParser("string", func(ps *State) *Node {
|
||||
ps.AutoWS()
|
||||
var r rune
|
||||
var w int
|
||||
var matched int
|
||||
|
@ -120,7 +120,7 @@ func TestParseString(t *testing.T) {
|
||||
t.Run("partial match", func(t *testing.T) {
|
||||
result, remaining, err := ParseString(Y, "hello world")
|
||||
require.Equal(t, "hello", result)
|
||||
require.Equal(t, " world", remaining)
|
||||
require.Equal(t, "world", remaining)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
@ -159,22 +159,6 @@ func TestString(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestWS(t *testing.T) {
|
||||
t.Run("consumes all whitespace", func(t *testing.T) {
|
||||
result, p := runParser(" asdf", WS)
|
||||
require.Nil(t, result)
|
||||
require.Equal(t, "asdf", p.Get())
|
||||
require.False(t, p.Errored())
|
||||
})
|
||||
|
||||
t.Run("never errors", func(t *testing.T) {
|
||||
result, p := runParser("asdf", WS)
|
||||
require.Nil(t, result)
|
||||
require.Equal(t, "asdf", p.Get())
|
||||
require.False(t, p.Errored())
|
||||
})
|
||||
}
|
||||
|
||||
func runParser(input string, parser Parser) (*Node, *State) {
|
||||
ps := InputString(input)
|
||||
result := parser(ps)
|
||||
|
29
state.go
29
state.go
@ -1,6 +1,10 @@
|
||||
package goparsify
|
||||
|
||||
import "fmt"
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type Error struct {
|
||||
pos int
|
||||
@ -14,12 +18,33 @@ type State struct {
|
||||
Input string
|
||||
Pos int
|
||||
Error Error
|
||||
WSChars string
|
||||
NoAutoWS bool
|
||||
}
|
||||
|
||||
func (s *State) Advance(i int) {
|
||||
s.Pos += i
|
||||
}
|
||||
|
||||
// AutoWS consumes all whitespace
|
||||
func (s *State) AutoWS() {
|
||||
if s.NoAutoWS {
|
||||
return
|
||||
}
|
||||
s.WS()
|
||||
}
|
||||
|
||||
func (s *State) WS() {
|
||||
for s.Pos < len(s.Input) {
|
||||
r, w := utf8.DecodeRuneInString(s.Input[s.Pos:])
|
||||
if !strings.ContainsRune(s.WSChars, r) {
|
||||
return
|
||||
}
|
||||
s.Pos += w
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func (s *State) Get() string {
|
||||
if s.Pos > len(s.Input) {
|
||||
return ""
|
||||
@ -41,5 +66,5 @@ func (s *State) Errored() bool {
|
||||
}
|
||||
|
||||
func InputString(input string) *State {
|
||||
return &State{Input: input}
|
||||
return &State{Input: input, WSChars: "\t\n\v\f\r \x85\xA0"}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user