diff --git a/calc/calc.go b/calc/calc.go index 6dcd304..a4d6f8a 100644 --- a/calc/calc.go +++ b/calc/calc.go @@ -12,22 +12,22 @@ var ( sumOp = Chars("+-", 1, 1) prodOp = Chars("/*", 1, 1) - groupExpr = Seq("(", sum, ")").Map(func(n Result) Result { - return Result{Result: n.Child[1].Result} + groupExpr = Seq("(", sum, ")").Map(func(n *Result) { + n.Result = n.Child[1].Result }) - number = NumberLit().Map(func(n Result) Result { + number = NumberLit().Map(func(n *Result) { switch i := n.Result.(type) { case int64: - return Result{Result: float64(i)} + n.Result = float64(i) case float64: - return Result{Result: i} + n.Result = i default: panic(fmt.Errorf("unknown value %#v", i)) } }) - sum = Seq(prod, Some(Seq(sumOp, prod))).Map(func(n Result) Result { + sum = Seq(prod, Some(Seq(sumOp, prod))).Map(func(n *Result) { i := n.Child[0].Result.(float64) for _, op := range n.Child[1].Child { @@ -39,10 +39,10 @@ var ( } } - return Result{Result: i} + n.Result = i }) - prod = Seq(&value, Some(Seq(prodOp, &value))).Map(func(n Result) Result { + prod = Seq(&value, Some(Seq(prodOp, &value))).Map(func(n *Result) { i := n.Child[0].Result.(float64) for _, op := range n.Child[1].Child { @@ -54,7 +54,7 @@ var ( } } - return Result{Result: i} + n.Result = i }) y = Maybe(sum) diff --git a/combinator.go b/combinator.go index 01386e6..0c4e0f5 100644 --- a/combinator.go +++ b/combinator.go @@ -8,52 +8,48 @@ import ( func Seq(parsers ...Parserish) Parser { parserfied := ParsifyAll(parsers...) - return NewParser("Seq()", func(ps *State) Result { - result := Result{Child: make([]Result, len(parserfied))} + return NewParser("Seq()", func(ps *State, node *Result) { + node.Child = make([]Result, len(parserfied)) startpos := ps.Pos for i, parser := range parserfied { - result.Child[i] = parser(ps) + parser(ps, &node.Child[i]) if ps.Errored() { ps.Pos = startpos - return result + return } } - return result }) } // NoAutoWS disables automatically ignoring whitespace between tokens for all parsers underneath func NoAutoWS(parser Parserish) Parser { parserfied := Parsify(parser) - return func(ps *State) Result { + return func(ps *State, node *Result) { ps.NoAutoWS = true - - ret := parserfied(ps) - + parserfied(ps, node) ps.NoAutoWS = false - return ret } } // Any matches the first successful parser and returns its result func Any(parsers ...Parserish) Parser { parserfied := ParsifyAll(parsers...) - // For + // Records which parser was successful for each byte, and will use it first next time. predictor := [255]int{} - return NewParser("Any()", func(ps *State) Result { + return NewParser("Any()", func(ps *State, node *Result) { if ps.Pos >= len(ps.Input) { ps.ErrorHere("!EOF") - return Result{} + return } longestError := Error{} startpos := ps.Pos predictorChar := ps.Input[startpos] predicted := predictor[predictorChar] - node := parserfied[predicted](ps) + parserfied[predicted](ps, node) if !ps.Errored() { - return node + return } if ps.Error.pos >= longestError.pos { @@ -62,14 +58,14 @@ func Any(parsers ...Parserish) Parser { if ps.Cut <= startpos { ps.Recover() } else { - return node + return } for i, parser := range parserfied { if i == predicted { continue } - node := parser(ps) + parser(ps, node) if ps.Errored() { if ps.Error.pos >= longestError.pos { longestError = ps.Error @@ -81,12 +77,11 @@ func Any(parsers ...Parserish) Parser { continue } predictor[predictorChar] = i - return node + return } ps.Error = longestError ps.Pos = startpos - return Result{} }) } @@ -111,26 +106,26 @@ func manyImpl(min int, op Parserish, sep ...Parserish) Parser { sepParser = Parsify(sep[0]) } - return func(ps *State) Result { + return func(ps *State, node *Result) { var result Result startpos := ps.Pos for { - node := opParser(ps) + opParser(ps, &result) if ps.Errored() { - if len(result.Child) < min || ps.Cut > ps.Pos { + if len(node.Child) < min || ps.Cut > ps.Pos { ps.Pos = startpos - return result + return } ps.Recover() - return result + return } - result.Child = append(result.Child, node) + node.Child = append(node.Child, result) if sepParser != nil { - sepParser(ps) + sepParser(ps, TrashResult) if ps.Errored() { ps.Recover() - return result + return } } } @@ -141,14 +136,12 @@ func manyImpl(min int, op Parserish, sep ...Parserish) Parser { func Maybe(parser Parserish) Parser { parserfied := Parsify(parser) - return NewParser("Maybe()", func(ps *State) Result { + return NewParser("Maybe()", func(ps *State, node *Result) { startpos := ps.Pos - node := parserfied(ps) + parserfied(ps, node) if ps.Errored() && ps.Cut <= startpos { ps.Recover() } - - return node }) } @@ -158,49 +151,42 @@ func Maybe(parser Parserish) Parser { func Bind(parser Parserish, val interface{}) Parser { p := Parsify(parser) - return func(ps *State) Result { - node := p(ps) + return func(ps *State, node *Result) { + p(ps, node) if ps.Errored() { - return node + return } node.Result = val - return node + return } } // Map applies the callback if the parser matches. This is used to set the Result // based on the matched result. -func Map(parser Parserish, f func(n Result) Result) Parser { +func Map(parser Parserish, f func(n *Result)) Parser { p := Parsify(parser) - return func(ps *State) Result { - node := p(ps) + return func(ps *State, node *Result) { + p(ps, node) if ps.Errored() { - return node + return } - return f(node) + f(node) } } -func flatten(n Result) string { - if n.Token != "" { - return n.Token - } - +func flatten(n *Result) { if len(n.Child) > 0 { sbuf := &bytes.Buffer{} - for _, node := range n.Child { - sbuf.WriteString(flatten(node)) + for _, child := range n.Child { + flatten(&child) + sbuf.WriteString(child.Token) } - return sbuf.String() + n.Token = sbuf.String() } - - return "" } // Merge all child Tokens together recursively func Merge(parser Parserish) Parser { - return Map(parser, func(n Result) Result { - return Result{Token: flatten(n)} - }) + return Map(parser, flatten) } diff --git a/combinator_test.go b/combinator_test.go index 1c95eba..c1eedb3 100644 --- a/combinator_test.go +++ b/combinator_test.go @@ -165,8 +165,8 @@ type htmlTag struct { } func TestMap(t *testing.T) { - parser := Map(Seq("<", Chars("a-zA-Z0-9"), ">"), func(n Result) Result { - return Result{Result: htmlTag{n.Child[1].Token}} + parser := Seq("<", Chars("a-zA-Z0-9"), ">").Map(func(n *Result) { + n.Result = htmlTag{n.Child[1].Token} }) t.Run("success", func(t *testing.T) { @@ -235,8 +235,8 @@ func TestMerge(t *testing.T) { } func TestMapShorthand(t *testing.T) { - Chars("a-z").Map(func(n Result) Result { - return Result{Result: n.Token} + Chars("a-z").Map(func(n *Result) { + n.Result = n.Token }) } diff --git a/debugon.go b/debugon.go index b976839..60efe60 100644 --- a/debugon.go +++ b/debugon.go @@ -53,7 +53,7 @@ func (dp *debugParser) logf(ps *State, result *Result, format string, args ...in buf.WriteString(fmt.Sprintf("%-10s | ", output)) buf.WriteString(strings.Repeat(" ", len(activeParsers)-1)) buf.WriteString(fmt.Sprintf(format, args...)) - + buf.WriteString(fmt.Sprintf(" > %#v", result)) buf.WriteRune('\n') return buf.String() } @@ -77,14 +77,14 @@ func (dp *debugParser) logEnd(ps *State, result *Result) { } } -func (dp *debugParser) Parse(ps *State) Result { +func (dp *debugParser) Parse(ps *State, node *Result) { activeParsers = append(activeParsers, dp) start := time.Now() dp.SelfStart = start dp.logStart(ps) - ret := dp.Next(ps) - dp.logEnd(ps, &ret) + dp.Next(ps, node) + dp.logEnd(ps, node) dp.Cumulative += time.Since(start) dp.Self += time.Since(dp.SelfStart) @@ -94,7 +94,6 @@ func (dp *debugParser) Parse(ps *State) Result { } activeParsers = activeParsers[0 : len(activeParsers)-1] - return ret } // NewParser should be called around the creation of every Parser. @@ -109,13 +108,12 @@ func NewParser(name string, p Parser) Parser { Location: location, } - dp.Next = func(ps *State) Result { + dp.Next = func(ps *State, ret *Result) { dp.Self += time.Since(dp.SelfStart) - ret := p(ps) + p(ps, ret) dp.SelfStart = time.Now() - return ret } if len(dp.Location) > longestLocation { diff --git a/html/html.go b/html/html.go index 33e5cc4..05e091b 100644 --- a/html/html.go +++ b/html/html.go @@ -18,28 +18,26 @@ var ( tag Parser identifier = Regex("[a-zA-Z][a-zA-Z0-9]*") - text = NotChars("<>").Map(func(n Result) Result { - return Result{Result: n.Token} - }) + text = NotChars("<>").Map(func(n *Result) { n.Result = n.Token }) element = Any(text, &tag) - elements = Some(element).Map(func(n Result) Result { + elements = Some(element).Map(func(n *Result) { ret := []interface{}{} for _, child := range n.Child { ret = append(ret, child.Result) } - return Result{Result: ret} + n.Result = ret }) attr = Seq(identifier, "=", StringLit(`"'`)) - attrs = Some(attr).Map(func(node Result) Result { + attrs = Some(attr).Map(func(node *Result) { attr := map[string]string{} for _, attrNode := range node.Child { attr[attrNode.Child[0].Token] = attrNode.Child[2].Result.(string) } - return Result{Result: attr} + node.Result = attr }) tstart = Seq("<", identifier, Cut(), attrs, ">") @@ -47,13 +45,12 @@ var ( ) func init() { - tag = Seq(tstart, Cut(), elements, tend).Map(func(node Result) Result { + tag = Seq(tstart, Cut(), elements, tend).Map(func(node *Result) { openTag := node.Child[0] - return Result{Result: htmlTag{ + node.Result = htmlTag{ Name: openTag.Child[1].Token, Attributes: openTag.Child[3].Result.(map[string]string), Body: node.Child[2].Result.([]interface{}), - }} - + } }) } diff --git a/html/html_test.go b/html/html_test.go index 7649fdd..79b0191 100644 --- a/html/html_test.go +++ b/html/html_test.go @@ -1,12 +1,15 @@ package html import ( + "os" "testing" "github.com/stretchr/testify/require" + "github.com/vektah/goparsify" ) func TestParse(t *testing.T) { + goparsify.EnableLogging(os.Stdout) result, err := parse(`hello

world

`) require.NoError(t, err) require.Equal(t, htmlTag{Name: "body", Attributes: map[string]string{}, Body: []interface{}{ diff --git a/json/json.go b/json/json.go index 7cc53a1..9b1e8e9 100644 --- a/json/json.go +++ b/json/json.go @@ -13,22 +13,22 @@ var ( _number = NumberLit() _properties = Some(Seq(StringLit(`"`), ":", &_value), ",") - _array = Seq("[", Cut(), Some(&_value, ","), "]").Map(func(n Result) Result { + _array = Seq("[", Cut(), Some(&_value, ","), "]").Map(func(n *Result) { ret := []interface{}{} for _, child := range n.Child[2].Child { ret = append(ret, child.Result) } - return Result{Result: ret} + n.Result = ret }) - _object = Seq("{", Cut(), _properties, "}").Map(func(n Result) Result { + _object = Seq("{", Cut(), _properties, "}").Map(func(n *Result) { ret := map[string]interface{}{} for _, prop := range n.Child[2].Child { ret[prop.Child[0].Result.(string)] = prop.Child[2].Result } - return Result{Result: ret} + n.Result = ret }) ) diff --git a/literals.go b/literals.go index ec508e3..1287001 100644 --- a/literals.go +++ b/literals.go @@ -11,12 +11,12 @@ import ( // - escaped characters, eg \" or \n // - unicode sequences, eg \uBEEF func StringLit(allowedQuotes string) Parser { - return NewParser("string literal", func(ps *State) Result { + return NewParser("string literal", func(ps *State, node *Result) { ps.AutoWS() if !stringContainsByte(allowedQuotes, ps.Input[ps.Pos]) { ps.ErrorHere(allowedQuotes) - return Result{} + return } quote := ps.Input[ps.Pos] @@ -30,7 +30,7 @@ func StringLit(allowedQuotes string) Parser { case '\\': if end+1 >= inputLen { ps.ErrorHere(string(quote)) - return Result{} + return } if buf == nil { @@ -42,14 +42,14 @@ func StringLit(allowedQuotes string) Parser { if end+6 >= inputLen { ps.Error.expected = "[a-f0-9]{4}" ps.Error.pos = end + 2 - return Result{} + return } r, ok := unhex(ps.Input[end+2 : end+6]) if !ok { ps.Error.expected = "[a-f0-9]" ps.Error.pos = end + 2 - return Result{} + return } buf.WriteRune(r) end += 6 @@ -59,12 +59,13 @@ func StringLit(allowedQuotes string) Parser { } case quote: if buf == nil { - result := ps.Input[ps.Pos+1 : end] + node.Result = ps.Input[ps.Pos+1 : end] ps.Pos = end + 1 - return Result{Result: result} + return } ps.Pos = end + 1 - return Result{Result: buf.String()} + node.Result = buf.String() + return default: if buf == nil { if ps.Input[end] < 127 { @@ -82,13 +83,12 @@ func StringLit(allowedQuotes string) Parser { } ps.ErrorHere(string(quote)) - return Result{} }) } // NumberLit matches a floating point or integer number and returns it as a int64 or float64 in .Result func NumberLit() Parser { - return NewParser("number literal", func(ps *State) Result { + return NewParser("number literal", func(ps *State, node *Result) { ps.AutoWS() end := ps.Pos float := false @@ -126,22 +126,20 @@ func NumberLit() Parser { if end == ps.Pos { ps.ErrorHere("number") - return Result{} + return } - var result interface{} var err error if float { - result, err = strconv.ParseFloat(ps.Input[ps.Pos:end], 10) + node.Result, err = strconv.ParseFloat(ps.Input[ps.Pos:end], 10) } else { - result, err = strconv.ParseInt(ps.Input[ps.Pos:end], 10, 64) + node.Result, err = strconv.ParseInt(ps.Input[ps.Pos:end], 10, 64) } if err != nil { ps.ErrorHere("number") - return Result{} + return } ps.Pos = end - return Result{Result: result} }) } diff --git a/parser.go b/parser.go index 86f2e05..a43bc19 100644 --- a/parser.go +++ b/parser.go @@ -7,6 +7,8 @@ import ( "unicode/utf8" ) +var TrashResult = &Result{} + // Result is the output of a parser. Usually only one of its fields will be set and should be though of // more as a union type. having it avoids interface{} littered all through the parsing code and makes // the it easy to do the two most common operations, getting a token and finding a child. @@ -22,10 +24,10 @@ type Result struct { // - A parser that errors must set state.Error // - A parser that errors must not change state.Pos // - A parser that consumed some input should advance state.Pos -type Parser func(*State) Result +type Parser func(*State, *Result) // Map shorthand for Map(p, func()) -func (p Parser) Map(f func(n Result) Result) Parser { +func (p Parser) Map(f func(n *Result)) Parser { return Map(p, f) } @@ -51,14 +53,14 @@ type Parserish interface{} // See Parserish for details. func Parsify(p Parserish) Parser { switch p := p.(type) { - case func(*State) Result: + case func(*State, *Result): return p case Parser: return p case *Parser: // Todo: Maybe capture this stack and on nil show it? Is there a good error library to do this? - return func(ptr *State) Result { - return (*p)(ptr) + return func(ptr *State, node *Result) { + (*p)(ptr, node) } case string: return Exact(p) @@ -85,7 +87,8 @@ func Run(parser Parserish, input string, ws ...VoidParser) (result interface{}, ps.WS = ws[0] } - ret := p(ps) + ret := Result{} + p(ps, &ret) ps.AutoWS() if ps.Error.expected != "" { @@ -101,32 +104,30 @@ func Run(parser Parserish, input string, ws ...VoidParser) (result interface{}, // WS will consume whitespace, it should only be needed when AutoWS is turned off func WS() Parser { - return NewParser("AutoWS", func(ps *State) Result { + return NewParser("AutoWS", func(ps *State, ret *Result) { ps.WS(ps) - return Result{} }) } // Cut prevents backtracking beyond this point. Usually used after keywords when you // are sure this is the correct path. Improves performance and error reporting. func Cut() Parser { - return func(ps *State) Result { + return func(ps *State, node *Result) { ps.Cut = ps.Pos - return Result{} } } // Regex returns a match if the regex successfully matches func Regex(pattern string) Parser { re := regexp.MustCompile("^" + pattern) - return NewParser(pattern, func(ps *State) Result { + return NewParser(pattern, func(ps *State, node *Result) { ps.AutoWS() if match := re.FindString(ps.Get()); match != "" { ps.Advance(len(match)) - return Result{Token: match} + node.Token = match + return } ps.ErrorHere(pattern) - return Result{} }) } @@ -134,29 +135,29 @@ func Regex(pattern string) Parser { func Exact(match string) Parser { if len(match) == 1 { matchByte := match[0] - return NewParser(match, func(ps *State) Result { + return NewParser(match, func(ps *State, node *Result) { ps.AutoWS() if ps.Pos >= len(ps.Input) || ps.Input[ps.Pos] != matchByte { ps.ErrorHere(match) - return Result{} + return } ps.Advance(1) - return Result{Token: match} + node.Token = match }) } - return NewParser(match, func(ps *State) Result { + return NewParser(match, func(ps *State, node *Result) { ps.AutoWS() if !strings.HasPrefix(ps.Get(), match) { ps.ErrorHere(match) - return Result{} + return } ps.Advance(len(match)) - return Result{Token: match} + node.Token = match }) } @@ -222,7 +223,7 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser { min, max := parseRepetition(1, -1, repetition...) alphabet, ranges := parseMatcher(matcher) - return func(ps *State) Result { + return func(ps *State, node *Result) { ps.AutoWS() matched := 0 for ps.Pos+matched < len(ps.Input) { @@ -250,11 +251,10 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser { if matched < min { ps.ErrorHere(matcher) - return Result{} + return } - result := ps.Input[ps.Pos : ps.Pos+matched] + node.Token = ps.Input[ps.Pos : ps.Pos+matched] ps.Advance(matched) - return Result{Token: result} } } diff --git a/parser_test.go b/parser_test.go index bc2867e..c64b70a 100644 --- a/parser_test.go +++ b/parser_test.go @@ -7,21 +7,21 @@ import ( ) func TestParsify(t *testing.T) { - + result := Result{} t.Run("strings", func(t *testing.T) { - require.Equal(t, "ff", Parsify("ff")(NewState("ffooo")).Token) + Parsify("ff")(NewState("ffooo"), &result) + require.Equal(t, "ff", result.Token) }) t.Run("parsers", func(t *testing.T) { - require.Equal(t, "ff", Parsify(Chars("f"))(NewState("ffooo")).Token) + Parsify(Chars("f"))(NewState("ffooo"), &result) + require.Equal(t, "ff", result.Token) }) t.Run("parser funcs", func(t *testing.T) { - node := Parsify(func(p *State) Result { - return Result{Token: "hello"} - })(NewState("ffooo")) + Parsify(func(p *State, node *Result) { node.Token = "hello" })(NewState("ffooo"), &result) - require.Equal(t, "hello", node.Token) + require.Equal(t, "hello", result.Token) }) t.Run("*parsers", func(t *testing.T) { @@ -29,8 +29,8 @@ func TestParsify(t *testing.T) { parserfied := Parsify(&parser) parser = Chars("f") - node := parserfied(NewState("ffooo")) - require.Equal(t, "ff", node.Token) + parserfied(NewState("ffooo"), &result) + require.Equal(t, "ff", result.Token) }) require.Panics(t, func() { @@ -41,10 +41,12 @@ func TestParsify(t *testing.T) { func TestParsifyAll(t *testing.T) { parsers := ParsifyAll("ff", "gg") - result := parsers[0](NewState("ffooo")) + result := Result{} + parsers[0](NewState("ffooo"), &result) require.Equal(t, "ff", result.Token) - result = parsers[1](NewState("ffooo")) + result = Result{} + parsers[1](NewState("ffooo"), &result) require.Equal(t, "", result.Token) } @@ -169,7 +171,7 @@ func TestRegex(t *testing.T) { } func TestParseString(t *testing.T) { - Y := Map("hello", func(n Result) Result { return Result{Result: n.Token} }) + Y := Map("hello", func(n *Result) { n.Result = n.Token }) t.Run("full match", func(t *testing.T) { result, err := Run(Y, "hello") @@ -205,17 +207,16 @@ func TestAutoWS(t *testing.T) { }) t.Run("unicode whitespace", func(t *testing.T) { - ps := NewState(" \u202f hello") - ps.WS = UnicodeWhitespace - - result := Exact("hello")(ps) - require.Equal(t, "hello", result.Token) + result, ps := runParser(" \u202f hello", NoAutoWS(Seq(WS(), "hello"))) + require.Equal(t, "hello", result.Child[1].Token) + require.Equal(t, "", ps.Get()) require.False(t, ps.Errored()) }) } func runParser(input string, parser Parser) (Result, *State) { ps := NewState(input) - result := parser(ps) + result := Result{} + parser(ps, &result) return result, ps }