summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--calc/calc.go18
-rw-r--r--combinator.go92
-rw-r--r--combinator_test.go8
-rw-r--r--debugon.go14
-rw-r--r--html/html.go19
-rw-r--r--html/html_test.go3
-rw-r--r--json/json.go8
-rw-r--r--literals.go30
-rw-r--r--parser.go46
-rw-r--r--parser_test.go37
10 files changed, 129 insertions, 146 deletions
diff --git a/calc/calc.go b/calc/calc.go
index 6dcd304..a4d6f8a 100644
--- a/calc/calc.go
+++ b/calc/calc.go
@@ -12,22 +12,22 @@ var (
sumOp = Chars("+-", 1, 1)
prodOp = Chars("/*", 1, 1)
- groupExpr = Seq("(", sum, ")").Map(func(n Result) Result {
- return Result{Result: n.Child[1].Result}
+ groupExpr = Seq("(", sum, ")").Map(func(n *Result) {
+ n.Result = n.Child[1].Result
})
- number = NumberLit().Map(func(n Result) Result {
+ number = NumberLit().Map(func(n *Result) {
switch i := n.Result.(type) {
case int64:
- return Result{Result: float64(i)}
+ n.Result = float64(i)
case float64:
- return Result{Result: i}
+ n.Result = i
default:
panic(fmt.Errorf("unknown value %#v", i))
}
})
- sum = Seq(prod, Some(Seq(sumOp, prod))).Map(func(n Result) Result {
+ sum = Seq(prod, Some(Seq(sumOp, prod))).Map(func(n *Result) {
i := n.Child[0].Result.(float64)
for _, op := range n.Child[1].Child {
@@ -39,10 +39,10 @@ var (
}
}
- return Result{Result: i}
+ n.Result = i
})
- prod = Seq(&value, Some(Seq(prodOp, &value))).Map(func(n Result) Result {
+ prod = Seq(&value, Some(Seq(prodOp, &value))).Map(func(n *Result) {
i := n.Child[0].Result.(float64)
for _, op := range n.Child[1].Child {
@@ -54,7 +54,7 @@ var (
}
}
- return Result{Result: i}
+ n.Result = i
})
y = Maybe(sum)
diff --git a/combinator.go b/combinator.go
index 01386e6..0c4e0f5 100644
--- a/combinator.go
+++ b/combinator.go
@@ -8,52 +8,48 @@ import (
func Seq(parsers ...Parserish) Parser {
parserfied := ParsifyAll(parsers...)
- return NewParser("Seq()", func(ps *State) Result {
- result := Result{Child: make([]Result, len(parserfied))}
+ return NewParser("Seq()", func(ps *State, node *Result) {
+ node.Child = make([]Result, len(parserfied))
startpos := ps.Pos
for i, parser := range parserfied {
- result.Child[i] = parser(ps)
+ parser(ps, &node.Child[i])
if ps.Errored() {
ps.Pos = startpos
- return result
+ return
}
}
- return result
})
}
// NoAutoWS disables automatically ignoring whitespace between tokens for all parsers underneath
func NoAutoWS(parser Parserish) Parser {
parserfied := Parsify(parser)
- return func(ps *State) Result {
+ return func(ps *State, node *Result) {
ps.NoAutoWS = true
-
- ret := parserfied(ps)
-
+ parserfied(ps, node)
ps.NoAutoWS = false
- return ret
}
}
// Any matches the first successful parser and returns its result
func Any(parsers ...Parserish) Parser {
parserfied := ParsifyAll(parsers...)
- // For
+ // Records which parser was successful for each byte, and will use it first next time.
predictor := [255]int{}
- return NewParser("Any()", func(ps *State) Result {
+ return NewParser("Any()", func(ps *State, node *Result) {
if ps.Pos >= len(ps.Input) {
ps.ErrorHere("!EOF")
- return Result{}
+ return
}
longestError := Error{}
startpos := ps.Pos
predictorChar := ps.Input[startpos]
predicted := predictor[predictorChar]
- node := parserfied[predicted](ps)
+ parserfied[predicted](ps, node)
if !ps.Errored() {
- return node
+ return
}
if ps.Error.pos >= longestError.pos {
@@ -62,14 +58,14 @@ func Any(parsers ...Parserish) Parser {
if ps.Cut <= startpos {
ps.Recover()
} else {
- return node
+ return
}
for i, parser := range parserfied {
if i == predicted {
continue
}
- node := parser(ps)
+ parser(ps, node)
if ps.Errored() {
if ps.Error.pos >= longestError.pos {
longestError = ps.Error
@@ -81,12 +77,11 @@ func Any(parsers ...Parserish) Parser {
continue
}
predictor[predictorChar] = i
- return node
+ return
}
ps.Error = longestError
ps.Pos = startpos
- return Result{}
})
}
@@ -111,26 +106,26 @@ func manyImpl(min int, op Parserish, sep ...Parserish) Parser {
sepParser = Parsify(sep[0])
}
- return func(ps *State) Result {
+ return func(ps *State, node *Result) {
var result Result
startpos := ps.Pos
for {
- node := opParser(ps)
+ opParser(ps, &result)
if ps.Errored() {
- if len(result.Child) < min || ps.Cut > ps.Pos {
+ if len(node.Child) < min || ps.Cut > ps.Pos {
ps.Pos = startpos
- return result
+ return
}
ps.Recover()
- return result
+ return
}
- result.Child = append(result.Child, node)
+ node.Child = append(node.Child, result)
if sepParser != nil {
- sepParser(ps)
+ sepParser(ps, TrashResult)
if ps.Errored() {
ps.Recover()
- return result
+ return
}
}
}
@@ -141,14 +136,12 @@ func manyImpl(min int, op Parserish, sep ...Parserish) Parser {
func Maybe(parser Parserish) Parser {
parserfied := Parsify(parser)
- return NewParser("Maybe()", func(ps *State) Result {
+ return NewParser("Maybe()", func(ps *State, node *Result) {
startpos := ps.Pos
- node := parserfied(ps)
+ parserfied(ps, node)
if ps.Errored() && ps.Cut <= startpos {
ps.Recover()
}
-
- return node
})
}
@@ -158,49 +151,42 @@ func Maybe(parser Parserish) Parser {
func Bind(parser Parserish, val interface{}) Parser {
p := Parsify(parser)
- return func(ps *State) Result {
- node := p(ps)
+ return func(ps *State, node *Result) {
+ p(ps, node)
if ps.Errored() {
- return node
+ return
}
node.Result = val
- return node
+ return
}
}
// Map applies the callback if the parser matches. This is used to set the Result
// based on the matched result.
-func Map(parser Parserish, f func(n Result) Result) Parser {
+func Map(parser Parserish, f func(n *Result)) Parser {
p := Parsify(parser)
- return func(ps *State) Result {
- node := p(ps)
+ return func(ps *State, node *Result) {
+ p(ps, node)
if ps.Errored() {
- return node
+ return
}
- return f(node)
+ f(node)
}
}
-func flatten(n Result) string {
- if n.Token != "" {
- return n.Token
- }
-
+func flatten(n *Result) {
if len(n.Child) > 0 {
sbuf := &bytes.Buffer{}
- for _, node := range n.Child {
- sbuf.WriteString(flatten(node))
+ for _, child := range n.Child {
+ flatten(&child)
+ sbuf.WriteString(child.Token)
}
- return sbuf.String()
+ n.Token = sbuf.String()
}
-
- return ""
}
// Merge all child Tokens together recursively
func Merge(parser Parserish) Parser {
- return Map(parser, func(n Result) Result {
- return Result{Token: flatten(n)}
- })
+ return Map(parser, flatten)
}
diff --git a/combinator_test.go b/combinator_test.go
index 1c95eba..c1eedb3 100644
--- a/combinator_test.go
+++ b/combinator_test.go
@@ -165,8 +165,8 @@ type htmlTag struct {
}
func TestMap(t *testing.T) {
- parser := Map(Seq("<", Chars("a-zA-Z0-9"), ">"), func(n Result) Result {
- return Result{Result: htmlTag{n.Child[1].Token}}
+ parser := Seq("<", Chars("a-zA-Z0-9"), ">").Map(func(n *Result) {
+ n.Result = htmlTag{n.Child[1].Token}
})
t.Run("success", func(t *testing.T) {
@@ -235,8 +235,8 @@ func TestMerge(t *testing.T) {
}
func TestMapShorthand(t *testing.T) {
- Chars("a-z").Map(func(n Result) Result {
- return Result{Result: n.Token}
+ Chars("a-z").Map(func(n *Result) {
+ n.Result = n.Token
})
}
diff --git a/debugon.go b/debugon.go
index b976839..60efe60 100644
--- a/debugon.go
+++ b/debugon.go
@@ -53,7 +53,7 @@ func (dp *debugParser) logf(ps *State, result *Result, format string, args ...in
buf.WriteString(fmt.Sprintf("%-10s | ", output))
buf.WriteString(strings.Repeat(" ", len(activeParsers)-1))
buf.WriteString(fmt.Sprintf(format, args...))
-
+ buf.WriteString(fmt.Sprintf(" > %#v", result))
buf.WriteRune('\n')
return buf.String()
}
@@ -77,14 +77,14 @@ func (dp *debugParser) logEnd(ps *State, result *Result) {
}
}
-func (dp *debugParser) Parse(ps *State) Result {
+func (dp *debugParser) Parse(ps *State, node *Result) {
activeParsers = append(activeParsers, dp)
start := time.Now()
dp.SelfStart = start
dp.logStart(ps)
- ret := dp.Next(ps)
- dp.logEnd(ps, &ret)
+ dp.Next(ps, node)
+ dp.logEnd(ps, node)
dp.Cumulative += time.Since(start)
dp.Self += time.Since(dp.SelfStart)
@@ -94,7 +94,6 @@ func (dp *debugParser) Parse(ps *State) Result {
}
activeParsers = activeParsers[0 : len(activeParsers)-1]
- return ret
}
// NewParser should be called around the creation of every Parser.
@@ -109,13 +108,12 @@ func NewParser(name string, p Parser) Parser {
Location: location,
}
- dp.Next = func(ps *State) Result {
+ dp.Next = func(ps *State, ret *Result) {
dp.Self += time.Since(dp.SelfStart)
- ret := p(ps)
+ p(ps, ret)
dp.SelfStart = time.Now()
- return ret
}
if len(dp.Location) > longestLocation {
diff --git a/html/html.go b/html/html.go
index 33e5cc4..05e091b 100644
--- a/html/html.go
+++ b/html/html.go
@@ -18,28 +18,26 @@ var (
tag Parser
identifier = Regex("[a-zA-Z][a-zA-Z0-9]*")
- text = NotChars("<>").Map(func(n Result) Result {
- return Result{Result: n.Token}
- })
+ text = NotChars("<>").Map(func(n *Result) { n.Result = n.Token })
element = Any(text, &tag)
- elements = Some(element).Map(func(n Result) Result {
+ elements = Some(element).Map(func(n *Result) {
ret := []interface{}{}
for _, child := range n.Child {
ret = append(ret, child.Result)
}
- return Result{Result: ret}
+ n.Result = ret
})
attr = Seq(identifier, "=", StringLit(`"'`))
- attrs = Some(attr).Map(func(node Result) Result {
+ attrs = Some(attr).Map(func(node *Result) {
attr := map[string]string{}
for _, attrNode := range node.Child {
attr[attrNode.Child[0].Token] = attrNode.Child[2].Result.(string)
}
- return Result{Result: attr}
+ node.Result = attr
})
tstart = Seq("<", identifier, Cut(), attrs, ">")
@@ -47,13 +45,12 @@ var (
)
func init() {
- tag = Seq(tstart, Cut(), elements, tend).Map(func(node Result) Result {
+ tag = Seq(tstart, Cut(), elements, tend).Map(func(node *Result) {
openTag := node.Child[0]
- return Result{Result: htmlTag{
+ node.Result = htmlTag{
Name: openTag.Child[1].Token,
Attributes: openTag.Child[3].Result.(map[string]string),
Body: node.Child[2].Result.([]interface{}),
- }}
-
+ }
})
}
diff --git a/html/html_test.go b/html/html_test.go
index 7649fdd..79b0191 100644
--- a/html/html_test.go
+++ b/html/html_test.go
@@ -1,12 +1,15 @@
package html
import (
+ "os"
"testing"
"github.com/stretchr/testify/require"
+ "github.com/vektah/goparsify"
)
func TestParse(t *testing.T) {
+ goparsify.EnableLogging(os.Stdout)
result, err := parse(`<body>hello <p color="blue">world</p></body>`)
require.NoError(t, err)
require.Equal(t, htmlTag{Name: "body", Attributes: map[string]string{}, Body: []interface{}{
diff --git a/json/json.go b/json/json.go
index 7cc53a1..9b1e8e9 100644
--- a/json/json.go
+++ b/json/json.go
@@ -13,22 +13,22 @@ var (
_number = NumberLit()
_properties = Some(Seq(StringLit(`"`), ":", &_value), ",")
- _array = Seq("[", Cut(), Some(&_value, ","), "]").Map(func(n Result) Result {
+ _array = Seq("[", Cut(), Some(&_value, ","), "]").Map(func(n *Result) {
ret := []interface{}{}
for _, child := range n.Child[2].Child {
ret = append(ret, child.Result)
}
- return Result{Result: ret}
+ n.Result = ret
})
- _object = Seq("{", Cut(), _properties, "}").Map(func(n Result) Result {
+ _object = Seq("{", Cut(), _properties, "}").Map(func(n *Result) {
ret := map[string]interface{}{}
for _, prop := range n.Child[2].Child {
ret[prop.Child[0].Result.(string)] = prop.Child[2].Result
}
- return Result{Result: ret}
+ n.Result = ret
})
)
diff --git a/literals.go b/literals.go
index ec508e3..1287001 100644
--- a/literals.go
+++ b/literals.go
@@ -11,12 +11,12 @@ import (
// - escaped characters, eg \" or \n
// - unicode sequences, eg \uBEEF
func StringLit(allowedQuotes string) Parser {
- return NewParser("string literal", func(ps *State) Result {
+ return NewParser("string literal", func(ps *State, node *Result) {
ps.AutoWS()
if !stringContainsByte(allowedQuotes, ps.Input[ps.Pos]) {
ps.ErrorHere(allowedQuotes)
- return Result{}
+ return
}
quote := ps.Input[ps.Pos]
@@ -30,7 +30,7 @@ func StringLit(allowedQuotes string) Parser {
case '\\':
if end+1 >= inputLen {
ps.ErrorHere(string(quote))
- return Result{}
+ return
}
if buf == nil {
@@ -42,14 +42,14 @@ func StringLit(allowedQuotes string) Parser {
if end+6 >= inputLen {
ps.Error.expected = "[a-f0-9]{4}"
ps.Error.pos = end + 2
- return Result{}
+ return
}
r, ok := unhex(ps.Input[end+2 : end+6])
if !ok {
ps.Error.expected = "[a-f0-9]"
ps.Error.pos = end + 2
- return Result{}
+ return
}
buf.WriteRune(r)
end += 6
@@ -59,12 +59,13 @@ func StringLit(allowedQuotes string) Parser {
}
case quote:
if buf == nil {
- result := ps.Input[ps.Pos+1 : end]
+ node.Result = ps.Input[ps.Pos+1 : end]
ps.Pos = end + 1
- return Result{Result: result}
+ return
}
ps.Pos = end + 1
- return Result{Result: buf.String()}
+ node.Result = buf.String()
+ return
default:
if buf == nil {
if ps.Input[end] < 127 {
@@ -82,13 +83,12 @@ func StringLit(allowedQuotes string) Parser {
}
ps.ErrorHere(string(quote))
- return Result{}
})
}
// NumberLit matches a floating point or integer number and returns it as a int64 or float64 in .Result
func NumberLit() Parser {
- return NewParser("number literal", func(ps *State) Result {
+ return NewParser("number literal", func(ps *State, node *Result) {
ps.AutoWS()
end := ps.Pos
float := false
@@ -126,22 +126,20 @@ func NumberLit() Parser {
if end == ps.Pos {
ps.ErrorHere("number")
- return Result{}
+ return
}
- var result interface{}
var err error
if float {
- result, err = strconv.ParseFloat(ps.Input[ps.Pos:end], 10)
+ node.Result, err = strconv.ParseFloat(ps.Input[ps.Pos:end], 10)
} else {
- result, err = strconv.ParseInt(ps.Input[ps.Pos:end], 10, 64)
+ node.Result, err = strconv.ParseInt(ps.Input[ps.Pos:end], 10, 64)
}
if err != nil {
ps.ErrorHere("number")
- return Result{}
+ return
}
ps.Pos = end
- return Result{Result: result}
})
}
diff --git a/parser.go b/parser.go
index 86f2e05..a43bc19 100644
--- a/parser.go
+++ b/parser.go
@@ -7,6 +7,8 @@ import (
"unicode/utf8"
)
+var TrashResult = &Result{}
+
// Result is the output of a parser. Usually only one of its fields will be set and should be though of
// more as a union type. having it avoids interface{} littered all through the parsing code and makes
// the it easy to do the two most common operations, getting a token and finding a child.
@@ -22,10 +24,10 @@ type Result struct {
// - A parser that errors must set state.Error
// - A parser that errors must not change state.Pos
// - A parser that consumed some input should advance state.Pos
-type Parser func(*State) Result
+type Parser func(*State, *Result)
// Map shorthand for Map(p, func())
-func (p Parser) Map(f func(n Result) Result) Parser {
+func (p Parser) Map(f func(n *Result)) Parser {
return Map(p, f)
}
@@ -51,14 +53,14 @@ type Parserish interface{}
// See Parserish for details.
func Parsify(p Parserish) Parser {
switch p := p.(type) {
- case func(*State) Result:
+ case func(*State, *Result):
return p
case Parser:
return p
case *Parser:
// Todo: Maybe capture this stack and on nil show it? Is there a good error library to do this?
- return func(ptr *State) Result {
- return (*p)(ptr)
+ return func(ptr *State, node *Result) {
+ (*p)(ptr, node)
}
case string:
return Exact(p)
@@ -85,7 +87,8 @@ func Run(parser Parserish, input string, ws ...VoidParser) (result interface{},
ps.WS = ws[0]
}
- ret := p(ps)
+ ret := Result{}
+ p(ps, &ret)
ps.AutoWS()
if ps.Error.expected != "" {
@@ -101,32 +104,30 @@ func Run(parser Parserish, input string, ws ...VoidParser) (result interface{},
// WS will consume whitespace, it should only be needed when AutoWS is turned off
func WS() Parser {
- return NewParser("AutoWS", func(ps *State) Result {
+ return NewParser("AutoWS", func(ps *State, ret *Result) {
ps.WS(ps)
- return Result{}
})
}
// Cut prevents backtracking beyond this point. Usually used after keywords when you
// are sure this is the correct path. Improves performance and error reporting.
func Cut() Parser {
- return func(ps *State) Result {
+ return func(ps *State, node *Result) {
ps.Cut = ps.Pos
- return Result{}
}
}
// Regex returns a match if the regex successfully matches
func Regex(pattern string) Parser {
re := regexp.MustCompile("^" + pattern)
- return NewParser(pattern, func(ps *State) Result {
+ return NewParser(pattern, func(ps *State, node *Result) {
ps.AutoWS()
if match := re.FindString(ps.Get()); match != "" {
ps.Advance(len(match))
- return Result{Token: match}
+ node.Token = match
+ return
}
ps.ErrorHere(pattern)
- return Result{}
})
}
@@ -134,29 +135,29 @@ func Regex(pattern string) Parser {
func Exact(match string) Parser {
if len(match) == 1 {
matchByte := match[0]
- return NewParser(match, func(ps *State) Result {
+ return NewParser(match, func(ps *State, node *Result) {
ps.AutoWS()
if ps.Pos >= len(ps.Input) || ps.Input[ps.Pos] != matchByte {
ps.ErrorHere(match)
- return Result{}
+ return
}
ps.Advance(1)
- return Result{Token: match}
+ node.Token = match
})
}
- return NewParser(match, func(ps *State) Result {
+ return NewParser(match, func(ps *State, node *Result) {
ps.AutoWS()
if !strings.HasPrefix(ps.Get(), match) {
ps.ErrorHere(match)
- return Result{}
+ return
}
ps.Advance(len(match))
- return Result{Token: match}
+ node.Token = match
})
}
@@ -222,7 +223,7 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser {
min, max := parseRepetition(1, -1, repetition...)
alphabet, ranges := parseMatcher(matcher)
- return func(ps *State) Result {
+ return func(ps *State, node *Result) {
ps.AutoWS()
matched := 0
for ps.Pos+matched < len(ps.Input) {
@@ -250,11 +251,10 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser {
if matched < min {
ps.ErrorHere(matcher)
- return Result{}
+ return
}
- result := ps.Input[ps.Pos : ps.Pos+matched]
+ node.Token = ps.Input[ps.Pos : ps.Pos+matched]
ps.Advance(matched)
- return Result{Token: result}
}
}
diff --git a/parser_test.go b/parser_test.go
index bc2867e..c64b70a 100644
--- a/parser_test.go
+++ b/parser_test.go
@@ -7,21 +7,21 @@ import (
)
func TestParsify(t *testing.T) {
-
+ result := Result{}
t.Run("strings", func(t *testing.T) {
- require.Equal(t, "ff", Parsify("ff")(NewState("ffooo")).Token)
+ Parsify("ff")(NewState("ffooo"), &result)
+ require.Equal(t, "ff", result.Token)
})
t.Run("parsers", func(t *testing.T) {
- require.Equal(t, "ff", Parsify(Chars("f"))(NewState("ffooo")).Token)
+ Parsify(Chars("f"))(NewState("ffooo"), &result)
+ require.Equal(t, "ff", result.Token)
})
t.Run("parser funcs", func(t *testing.T) {
- node := Parsify(func(p *State) Result {
- return Result{Token: "hello"}
- })(NewState("ffooo"))
+ Parsify(func(p *State, node *Result) { node.Token = "hello" })(NewState("ffooo"), &result)
- require.Equal(t, "hello", node.Token)
+ require.Equal(t, "hello", result.Token)
})
t.Run("*parsers", func(t *testing.T) {
@@ -29,8 +29,8 @@ func TestParsify(t *testing.T) {
parserfied := Parsify(&parser)
parser = Chars("f")
- node := parserfied(NewState("ffooo"))
- require.Equal(t, "ff", node.Token)
+ parserfied(NewState("ffooo"), &result)
+ require.Equal(t, "ff", result.Token)
})
require.Panics(t, func() {
@@ -41,10 +41,12 @@ func TestParsify(t *testing.T) {
func TestParsifyAll(t *testing.T) {
parsers := ParsifyAll("ff", "gg")
- result := parsers[0](NewState("ffooo"))
+ result := Result{}
+ parsers[0](NewState("ffooo"), &result)
require.Equal(t, "ff", result.Token)
- result = parsers[1](NewState("ffooo"))
+ result = Result{}
+ parsers[1](NewState("ffooo"), &result)
require.Equal(t, "", result.Token)
}
@@ -169,7 +171,7 @@ func TestRegex(t *testing.T) {
}
func TestParseString(t *testing.T) {
- Y := Map("hello", func(n Result) Result { return Result{Result: n.Token} })
+ Y := Map("hello", func(n *Result) { n.Result = n.Token })
t.Run("full match", func(t *testing.T) {
result, err := Run(Y, "hello")
@@ -205,17 +207,16 @@ func TestAutoWS(t *testing.T) {
})
t.Run("unicode whitespace", func(t *testing.T) {
- ps := NewState(" \u202f hello")
- ps.WS = UnicodeWhitespace
-
- result := Exact("hello")(ps)
- require.Equal(t, "hello", result.Token)
+ result, ps := runParser(" \u202f hello", NoAutoWS(Seq(WS(), "hello")))
+ require.Equal(t, "hello", result.Child[1].Token)
+ require.Equal(t, "", ps.Get())
require.False(t, ps.Errored())
})
}
func runParser(input string, parser Parser) (Result, *State) {
ps := NewState(input)
- result := parser(ps)
+ result := Result{}
+ parser(ps, &result)
return result, ps
}