Pass result in instead of returning

This commit is contained in:
Adam Scarr 2017-08-13 17:30:10 +10:00
parent 0dc37ae5bc
commit 5716ddb5e7
10 changed files with 129 additions and 146 deletions

View File

@ -12,22 +12,22 @@ var (
sumOp = Chars("+-", 1, 1)
prodOp = Chars("/*", 1, 1)
groupExpr = Seq("(", sum, ")").Map(func(n Result) Result {
return Result{Result: n.Child[1].Result}
groupExpr = Seq("(", sum, ")").Map(func(n *Result) {
n.Result = n.Child[1].Result
})
number = NumberLit().Map(func(n Result) Result {
number = NumberLit().Map(func(n *Result) {
switch i := n.Result.(type) {
case int64:
return Result{Result: float64(i)}
n.Result = float64(i)
case float64:
return Result{Result: i}
n.Result = i
default:
panic(fmt.Errorf("unknown value %#v", i))
}
})
sum = Seq(prod, Some(Seq(sumOp, prod))).Map(func(n Result) Result {
sum = Seq(prod, Some(Seq(sumOp, prod))).Map(func(n *Result) {
i := n.Child[0].Result.(float64)
for _, op := range n.Child[1].Child {
@ -39,10 +39,10 @@ var (
}
}
return Result{Result: i}
n.Result = i
})
prod = Seq(&value, Some(Seq(prodOp, &value))).Map(func(n Result) Result {
prod = Seq(&value, Some(Seq(prodOp, &value))).Map(func(n *Result) {
i := n.Child[0].Result.(float64)
for _, op := range n.Child[1].Child {
@ -54,7 +54,7 @@ var (
}
}
return Result{Result: i}
n.Result = i
})
y = Maybe(sum)

View File

@ -8,52 +8,48 @@ import (
func Seq(parsers ...Parserish) Parser {
parserfied := ParsifyAll(parsers...)
return NewParser("Seq()", func(ps *State) Result {
result := Result{Child: make([]Result, len(parserfied))}
return NewParser("Seq()", func(ps *State, node *Result) {
node.Child = make([]Result, len(parserfied))
startpos := ps.Pos
for i, parser := range parserfied {
result.Child[i] = parser(ps)
parser(ps, &node.Child[i])
if ps.Errored() {
ps.Pos = startpos
return result
return
}
}
return result
})
}
// NoAutoWS disables automatically ignoring whitespace between tokens for all parsers underneath
func NoAutoWS(parser Parserish) Parser {
parserfied := Parsify(parser)
return func(ps *State) Result {
return func(ps *State, node *Result) {
ps.NoAutoWS = true
ret := parserfied(ps)
parserfied(ps, node)
ps.NoAutoWS = false
return ret
}
}
// Any matches the first successful parser and returns its result
func Any(parsers ...Parserish) Parser {
parserfied := ParsifyAll(parsers...)
// For
// Records which parser was successful for each byte, and will use it first next time.
predictor := [255]int{}
return NewParser("Any()", func(ps *State) Result {
return NewParser("Any()", func(ps *State, node *Result) {
if ps.Pos >= len(ps.Input) {
ps.ErrorHere("!EOF")
return Result{}
return
}
longestError := Error{}
startpos := ps.Pos
predictorChar := ps.Input[startpos]
predicted := predictor[predictorChar]
node := parserfied[predicted](ps)
parserfied[predicted](ps, node)
if !ps.Errored() {
return node
return
}
if ps.Error.pos >= longestError.pos {
@ -62,14 +58,14 @@ func Any(parsers ...Parserish) Parser {
if ps.Cut <= startpos {
ps.Recover()
} else {
return node
return
}
for i, parser := range parserfied {
if i == predicted {
continue
}
node := parser(ps)
parser(ps, node)
if ps.Errored() {
if ps.Error.pos >= longestError.pos {
longestError = ps.Error
@ -81,12 +77,11 @@ func Any(parsers ...Parserish) Parser {
continue
}
predictor[predictorChar] = i
return node
return
}
ps.Error = longestError
ps.Pos = startpos
return Result{}
})
}
@ -111,26 +106,26 @@ func manyImpl(min int, op Parserish, sep ...Parserish) Parser {
sepParser = Parsify(sep[0])
}
return func(ps *State) Result {
return func(ps *State, node *Result) {
var result Result
startpos := ps.Pos
for {
node := opParser(ps)
opParser(ps, &result)
if ps.Errored() {
if len(result.Child) < min || ps.Cut > ps.Pos {
if len(node.Child) < min || ps.Cut > ps.Pos {
ps.Pos = startpos
return result
return
}
ps.Recover()
return result
return
}
result.Child = append(result.Child, node)
node.Child = append(node.Child, result)
if sepParser != nil {
sepParser(ps)
sepParser(ps, TrashResult)
if ps.Errored() {
ps.Recover()
return result
return
}
}
}
@ -141,14 +136,12 @@ func manyImpl(min int, op Parserish, sep ...Parserish) Parser {
func Maybe(parser Parserish) Parser {
parserfied := Parsify(parser)
return NewParser("Maybe()", func(ps *State) Result {
return NewParser("Maybe()", func(ps *State, node *Result) {
startpos := ps.Pos
node := parserfied(ps)
parserfied(ps, node)
if ps.Errored() && ps.Cut <= startpos {
ps.Recover()
}
return node
})
}
@ -158,49 +151,42 @@ func Maybe(parser Parserish) Parser {
func Bind(parser Parserish, val interface{}) Parser {
p := Parsify(parser)
return func(ps *State) Result {
node := p(ps)
return func(ps *State, node *Result) {
p(ps, node)
if ps.Errored() {
return node
return
}
node.Result = val
return node
return
}
}
// Map applies the callback if the parser matches. This is used to set the Result
// based on the matched result.
func Map(parser Parserish, f func(n Result) Result) Parser {
func Map(parser Parserish, f func(n *Result)) Parser {
p := Parsify(parser)
return func(ps *State) Result {
node := p(ps)
return func(ps *State, node *Result) {
p(ps, node)
if ps.Errored() {
return node
return
}
return f(node)
f(node)
}
}
func flatten(n Result) string {
if n.Token != "" {
return n.Token
}
func flatten(n *Result) {
if len(n.Child) > 0 {
sbuf := &bytes.Buffer{}
for _, node := range n.Child {
sbuf.WriteString(flatten(node))
for _, child := range n.Child {
flatten(&child)
sbuf.WriteString(child.Token)
}
return sbuf.String()
n.Token = sbuf.String()
}
return ""
}
// Merge all child Tokens together recursively
func Merge(parser Parserish) Parser {
return Map(parser, func(n Result) Result {
return Result{Token: flatten(n)}
})
return Map(parser, flatten)
}

View File

@ -165,8 +165,8 @@ type htmlTag struct {
}
func TestMap(t *testing.T) {
parser := Map(Seq("<", Chars("a-zA-Z0-9"), ">"), func(n Result) Result {
return Result{Result: htmlTag{n.Child[1].Token}}
parser := Seq("<", Chars("a-zA-Z0-9"), ">").Map(func(n *Result) {
n.Result = htmlTag{n.Child[1].Token}
})
t.Run("success", func(t *testing.T) {
@ -235,8 +235,8 @@ func TestMerge(t *testing.T) {
}
func TestMapShorthand(t *testing.T) {
Chars("a-z").Map(func(n Result) Result {
return Result{Result: n.Token}
Chars("a-z").Map(func(n *Result) {
n.Result = n.Token
})
}

View File

@ -53,7 +53,7 @@ func (dp *debugParser) logf(ps *State, result *Result, format string, args ...in
buf.WriteString(fmt.Sprintf("%-10s | ", output))
buf.WriteString(strings.Repeat(" ", len(activeParsers)-1))
buf.WriteString(fmt.Sprintf(format, args...))
buf.WriteString(fmt.Sprintf(" > %#v", result))
buf.WriteRune('\n')
return buf.String()
}
@ -77,14 +77,14 @@ func (dp *debugParser) logEnd(ps *State, result *Result) {
}
}
func (dp *debugParser) Parse(ps *State) Result {
func (dp *debugParser) Parse(ps *State, node *Result) {
activeParsers = append(activeParsers, dp)
start := time.Now()
dp.SelfStart = start
dp.logStart(ps)
ret := dp.Next(ps)
dp.logEnd(ps, &ret)
dp.Next(ps, node)
dp.logEnd(ps, node)
dp.Cumulative += time.Since(start)
dp.Self += time.Since(dp.SelfStart)
@ -94,7 +94,6 @@ func (dp *debugParser) Parse(ps *State) Result {
}
activeParsers = activeParsers[0 : len(activeParsers)-1]
return ret
}
// NewParser should be called around the creation of every Parser.
@ -109,13 +108,12 @@ func NewParser(name string, p Parser) Parser {
Location: location,
}
dp.Next = func(ps *State) Result {
dp.Next = func(ps *State, ret *Result) {
dp.Self += time.Since(dp.SelfStart)
ret := p(ps)
p(ps, ret)
dp.SelfStart = time.Now()
return ret
}
if len(dp.Location) > longestLocation {

View File

@ -18,28 +18,26 @@ var (
tag Parser
identifier = Regex("[a-zA-Z][a-zA-Z0-9]*")
text = NotChars("<>").Map(func(n Result) Result {
return Result{Result: n.Token}
})
text = NotChars("<>").Map(func(n *Result) { n.Result = n.Token })
element = Any(text, &tag)
elements = Some(element).Map(func(n Result) Result {
elements = Some(element).Map(func(n *Result) {
ret := []interface{}{}
for _, child := range n.Child {
ret = append(ret, child.Result)
}
return Result{Result: ret}
n.Result = ret
})
attr = Seq(identifier, "=", StringLit(`"'`))
attrs = Some(attr).Map(func(node Result) Result {
attrs = Some(attr).Map(func(node *Result) {
attr := map[string]string{}
for _, attrNode := range node.Child {
attr[attrNode.Child[0].Token] = attrNode.Child[2].Result.(string)
}
return Result{Result: attr}
node.Result = attr
})
tstart = Seq("<", identifier, Cut(), attrs, ">")
@ -47,13 +45,12 @@ var (
)
func init() {
tag = Seq(tstart, Cut(), elements, tend).Map(func(node Result) Result {
tag = Seq(tstart, Cut(), elements, tend).Map(func(node *Result) {
openTag := node.Child[0]
return Result{Result: htmlTag{
node.Result = htmlTag{
Name: openTag.Child[1].Token,
Attributes: openTag.Child[3].Result.(map[string]string),
Body: node.Child[2].Result.([]interface{}),
}}
}
})
}

View File

@ -1,12 +1,15 @@
package html
import (
"os"
"testing"
"github.com/stretchr/testify/require"
"github.com/vektah/goparsify"
)
func TestParse(t *testing.T) {
goparsify.EnableLogging(os.Stdout)
result, err := parse(`<body>hello <p color="blue">world</p></body>`)
require.NoError(t, err)
require.Equal(t, htmlTag{Name: "body", Attributes: map[string]string{}, Body: []interface{}{

View File

@ -13,22 +13,22 @@ var (
_number = NumberLit()
_properties = Some(Seq(StringLit(`"`), ":", &_value), ",")
_array = Seq("[", Cut(), Some(&_value, ","), "]").Map(func(n Result) Result {
_array = Seq("[", Cut(), Some(&_value, ","), "]").Map(func(n *Result) {
ret := []interface{}{}
for _, child := range n.Child[2].Child {
ret = append(ret, child.Result)
}
return Result{Result: ret}
n.Result = ret
})
_object = Seq("{", Cut(), _properties, "}").Map(func(n Result) Result {
_object = Seq("{", Cut(), _properties, "}").Map(func(n *Result) {
ret := map[string]interface{}{}
for _, prop := range n.Child[2].Child {
ret[prop.Child[0].Result.(string)] = prop.Child[2].Result
}
return Result{Result: ret}
n.Result = ret
})
)

View File

@ -11,12 +11,12 @@ import (
// - escaped characters, eg \" or \n
// - unicode sequences, eg \uBEEF
func StringLit(allowedQuotes string) Parser {
return NewParser("string literal", func(ps *State) Result {
return NewParser("string literal", func(ps *State, node *Result) {
ps.AutoWS()
if !stringContainsByte(allowedQuotes, ps.Input[ps.Pos]) {
ps.ErrorHere(allowedQuotes)
return Result{}
return
}
quote := ps.Input[ps.Pos]
@ -30,7 +30,7 @@ func StringLit(allowedQuotes string) Parser {
case '\\':
if end+1 >= inputLen {
ps.ErrorHere(string(quote))
return Result{}
return
}
if buf == nil {
@ -42,14 +42,14 @@ func StringLit(allowedQuotes string) Parser {
if end+6 >= inputLen {
ps.Error.expected = "[a-f0-9]{4}"
ps.Error.pos = end + 2
return Result{}
return
}
r, ok := unhex(ps.Input[end+2 : end+6])
if !ok {
ps.Error.expected = "[a-f0-9]"
ps.Error.pos = end + 2
return Result{}
return
}
buf.WriteRune(r)
end += 6
@ -59,12 +59,13 @@ func StringLit(allowedQuotes string) Parser {
}
case quote:
if buf == nil {
result := ps.Input[ps.Pos+1 : end]
node.Result = ps.Input[ps.Pos+1 : end]
ps.Pos = end + 1
return Result{Result: result}
return
}
ps.Pos = end + 1
return Result{Result: buf.String()}
node.Result = buf.String()
return
default:
if buf == nil {
if ps.Input[end] < 127 {
@ -82,13 +83,12 @@ func StringLit(allowedQuotes string) Parser {
}
ps.ErrorHere(string(quote))
return Result{}
})
}
// NumberLit matches a floating point or integer number and returns it as a int64 or float64 in .Result
func NumberLit() Parser {
return NewParser("number literal", func(ps *State) Result {
return NewParser("number literal", func(ps *State, node *Result) {
ps.AutoWS()
end := ps.Pos
float := false
@ -126,22 +126,20 @@ func NumberLit() Parser {
if end == ps.Pos {
ps.ErrorHere("number")
return Result{}
return
}
var result interface{}
var err error
if float {
result, err = strconv.ParseFloat(ps.Input[ps.Pos:end], 10)
node.Result, err = strconv.ParseFloat(ps.Input[ps.Pos:end], 10)
} else {
result, err = strconv.ParseInt(ps.Input[ps.Pos:end], 10, 64)
node.Result, err = strconv.ParseInt(ps.Input[ps.Pos:end], 10, 64)
}
if err != nil {
ps.ErrorHere("number")
return Result{}
return
}
ps.Pos = end
return Result{Result: result}
})
}

View File

@ -7,6 +7,8 @@ import (
"unicode/utf8"
)
var TrashResult = &Result{}
// Result is the output of a parser. Usually only one of its fields will be set and should be though of
// more as a union type. having it avoids interface{} littered all through the parsing code and makes
// the it easy to do the two most common operations, getting a token and finding a child.
@ -22,10 +24,10 @@ type Result struct {
// - A parser that errors must set state.Error
// - A parser that errors must not change state.Pos
// - A parser that consumed some input should advance state.Pos
type Parser func(*State) Result
type Parser func(*State, *Result)
// Map shorthand for Map(p, func())
func (p Parser) Map(f func(n Result) Result) Parser {
func (p Parser) Map(f func(n *Result)) Parser {
return Map(p, f)
}
@ -51,14 +53,14 @@ type Parserish interface{}
// See Parserish for details.
func Parsify(p Parserish) Parser {
switch p := p.(type) {
case func(*State) Result:
case func(*State, *Result):
return p
case Parser:
return p
case *Parser:
// Todo: Maybe capture this stack and on nil show it? Is there a good error library to do this?
return func(ptr *State) Result {
return (*p)(ptr)
return func(ptr *State, node *Result) {
(*p)(ptr, node)
}
case string:
return Exact(p)
@ -85,7 +87,8 @@ func Run(parser Parserish, input string, ws ...VoidParser) (result interface{},
ps.WS = ws[0]
}
ret := p(ps)
ret := Result{}
p(ps, &ret)
ps.AutoWS()
if ps.Error.expected != "" {
@ -101,32 +104,30 @@ func Run(parser Parserish, input string, ws ...VoidParser) (result interface{},
// WS will consume whitespace, it should only be needed when AutoWS is turned off
func WS() Parser {
return NewParser("AutoWS", func(ps *State) Result {
return NewParser("AutoWS", func(ps *State, ret *Result) {
ps.WS(ps)
return Result{}
})
}
// Cut prevents backtracking beyond this point. Usually used after keywords when you
// are sure this is the correct path. Improves performance and error reporting.
func Cut() Parser {
return func(ps *State) Result {
return func(ps *State, node *Result) {
ps.Cut = ps.Pos
return Result{}
}
}
// Regex returns a match if the regex successfully matches
func Regex(pattern string) Parser {
re := regexp.MustCompile("^" + pattern)
return NewParser(pattern, func(ps *State) Result {
return NewParser(pattern, func(ps *State, node *Result) {
ps.AutoWS()
if match := re.FindString(ps.Get()); match != "" {
ps.Advance(len(match))
return Result{Token: match}
node.Token = match
return
}
ps.ErrorHere(pattern)
return Result{}
})
}
@ -134,29 +135,29 @@ func Regex(pattern string) Parser {
func Exact(match string) Parser {
if len(match) == 1 {
matchByte := match[0]
return NewParser(match, func(ps *State) Result {
return NewParser(match, func(ps *State, node *Result) {
ps.AutoWS()
if ps.Pos >= len(ps.Input) || ps.Input[ps.Pos] != matchByte {
ps.ErrorHere(match)
return Result{}
return
}
ps.Advance(1)
return Result{Token: match}
node.Token = match
})
}
return NewParser(match, func(ps *State) Result {
return NewParser(match, func(ps *State, node *Result) {
ps.AutoWS()
if !strings.HasPrefix(ps.Get(), match) {
ps.ErrorHere(match)
return Result{}
return
}
ps.Advance(len(match))
return Result{Token: match}
node.Token = match
})
}
@ -222,7 +223,7 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser {
min, max := parseRepetition(1, -1, repetition...)
alphabet, ranges := parseMatcher(matcher)
return func(ps *State) Result {
return func(ps *State, node *Result) {
ps.AutoWS()
matched := 0
for ps.Pos+matched < len(ps.Input) {
@ -250,11 +251,10 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser {
if matched < min {
ps.ErrorHere(matcher)
return Result{}
return
}
result := ps.Input[ps.Pos : ps.Pos+matched]
node.Token = ps.Input[ps.Pos : ps.Pos+matched]
ps.Advance(matched)
return Result{Token: result}
}
}

View File

@ -7,21 +7,21 @@ import (
)
func TestParsify(t *testing.T) {
result := Result{}
t.Run("strings", func(t *testing.T) {
require.Equal(t, "ff", Parsify("ff")(NewState("ffooo")).Token)
Parsify("ff")(NewState("ffooo"), &result)
require.Equal(t, "ff", result.Token)
})
t.Run("parsers", func(t *testing.T) {
require.Equal(t, "ff", Parsify(Chars("f"))(NewState("ffooo")).Token)
Parsify(Chars("f"))(NewState("ffooo"), &result)
require.Equal(t, "ff", result.Token)
})
t.Run("parser funcs", func(t *testing.T) {
node := Parsify(func(p *State) Result {
return Result{Token: "hello"}
})(NewState("ffooo"))
Parsify(func(p *State, node *Result) { node.Token = "hello" })(NewState("ffooo"), &result)
require.Equal(t, "hello", node.Token)
require.Equal(t, "hello", result.Token)
})
t.Run("*parsers", func(t *testing.T) {
@ -29,8 +29,8 @@ func TestParsify(t *testing.T) {
parserfied := Parsify(&parser)
parser = Chars("f")
node := parserfied(NewState("ffooo"))
require.Equal(t, "ff", node.Token)
parserfied(NewState("ffooo"), &result)
require.Equal(t, "ff", result.Token)
})
require.Panics(t, func() {
@ -41,10 +41,12 @@ func TestParsify(t *testing.T) {
func TestParsifyAll(t *testing.T) {
parsers := ParsifyAll("ff", "gg")
result := parsers[0](NewState("ffooo"))
result := Result{}
parsers[0](NewState("ffooo"), &result)
require.Equal(t, "ff", result.Token)
result = parsers[1](NewState("ffooo"))
result = Result{}
parsers[1](NewState("ffooo"), &result)
require.Equal(t, "", result.Token)
}
@ -169,7 +171,7 @@ func TestRegex(t *testing.T) {
}
func TestParseString(t *testing.T) {
Y := Map("hello", func(n Result) Result { return Result{Result: n.Token} })
Y := Map("hello", func(n *Result) { n.Result = n.Token })
t.Run("full match", func(t *testing.T) {
result, err := Run(Y, "hello")
@ -205,17 +207,16 @@ func TestAutoWS(t *testing.T) {
})
t.Run("unicode whitespace", func(t *testing.T) {
ps := NewState(" \u202f hello")
ps.WS = UnicodeWhitespace
result := Exact("hello")(ps)
require.Equal(t, "hello", result.Token)
result, ps := runParser(" \u202f hello", NoAutoWS(Seq(WS(), "hello")))
require.Equal(t, "hello", result.Child[1].Token)
require.Equal(t, "", ps.Get())
require.False(t, ps.Errored())
})
}
func runParser(input string, parser Parser) (Result, *State) {
ps := NewState(input)
result := parser(ps)
result := Result{}
parser(ps, &result)
return result, ps
}