diff --git a/combinator.go b/combinator.go index 9cb661f..f55977c 100644 --- a/combinator.go +++ b/combinator.go @@ -4,7 +4,7 @@ import ( "bytes" ) -// Seq matches all of the given parsers in order and returns their nodes as .Child[n] +// Seq matches all of the given parsers in order and returns their result as .Child[n] func Seq(parsers ...Parserish) Parser { parserfied := ParsifyAll(parsers...) @@ -35,7 +35,7 @@ func NoAutoWS(parser Parserish) Parser { } } -// Any matches the first successful parser and returns its node +// Any matches the first successful parser and returns its result func Any(parsers ...Parserish) Parser { parserfied := ParsifyAll(parsers...) diff --git a/html/html.go b/html/html.go index 01ae9c4..2bbe921 100644 --- a/html/html.go +++ b/html/html.go @@ -17,7 +17,7 @@ type Tag struct { var ( tag Parser - identifier = NoAutoWS(Merge(Seq(WS(), Chars("a-zA-Z", 1), Chars("a-zA-Z0-9", 0)))) + identifier = Regex("[a-zA-Z][a-zA-Z0-9]*") text = Map(NotChars("<>"), func(n Result) Result { return Result{Result: n.Token} }) diff --git a/parser.go b/parser.go index 7b45004..a23cef5 100644 --- a/parser.go +++ b/parser.go @@ -3,6 +3,7 @@ package goparsify import ( "errors" "fmt" + "regexp" "strings" "unicode/utf8" ) @@ -102,6 +103,20 @@ func Run(parser Parserish, input string) (result interface{}, err error) { return ret.Result, nil } +// Regex returns a match if the regex successfully matches +func Regex(pattern string) Parser { + re := regexp.MustCompile("^" + pattern) + return NewParser(pattern, func(ps *State) Result { + ps.AutoWS() + if match := re.FindString(ps.Get()); match != "" { + ps.Advance(len(match)) + return Result{Token: match} + } + ps.ErrorHere(pattern) + return Result{} + }) +} + // Exact will fully match the exact string supplied, or error. The match will be stored in .Token func Exact(match string) Parser { if len(match) == 1 { diff --git a/parser_test.go b/parser_test.go index 8868593..e9da404 100644 --- a/parser_test.go +++ b/parser_test.go @@ -133,6 +133,34 @@ func TestChars(t *testing.T) { }) } +func TestRegex(t *testing.T) { + t.Run("full match", func(t *testing.T) { + node, ps := runParser("hello", Regex("[a-z]*")) + require.Equal(t, "hello", node.Token) + require.Equal(t, "", ps.Get()) + require.False(t, ps.Errored()) + }) + + t.Run("limited match", func(t *testing.T) { + node, ps := runParser("hello world", Regex("[a-z]*")) + require.Equal(t, "hello", node.Token) + require.Equal(t, " world", ps.Get()) + require.False(t, ps.Errored()) + }) + + t.Run("no match", func(t *testing.T) { + _, ps := runParser("1234", Regex("[a-z]*")) + require.Equal(t, "offset 0: expected [a-z]*", ps.Error.Error()) + require.Equal(t, 0, ps.Pos) + }) + + t.Run("eof", func(t *testing.T) { + _, ps := runParser("", Regex("[a-z]*")) + require.Equal(t, "offset 0: expected [a-z]*", ps.Error.Error()) + require.Equal(t, 0, ps.Pos) + }) +} + func TestParseString(t *testing.T) { Y := Map("hello", func(n Result) Result { return Result{Result: n.Token} }) diff --git a/readme.md b/readme.md index c880b77..967e5c5 100644 --- a/readme.md +++ b/readme.md @@ -63,12 +63,12 @@ func TestNumbers(t *testing.T) { Then define a parser for numbers ```go -var number = Map(NumberLit(), func(n Node) Node { +var number = Map(NumberLit(), func(n Result) Result { switch i := n.Result.(type) { case int64: - return Node{Result: float64(i)} + return Result{Result: float64(i)} case float64: - return Node{Result: i} + return Result{Result: i} default: panic(fmt.Errorf("unknown value %#v", i)) } @@ -101,7 +101,7 @@ func TestAddition(t *testing.T) { var sumOp = Chars("+-", 1, 1) -sum = Map(Seq(number, Some(And(sumOp, number))), func(n Node) Node { +sum = Map(Seq(number, Some(And(sumOp, number))), func(n Result) Result { i := n.Child[0].Result.(float64) for _, op := range n.Child[1].Child { @@ -113,7 +113,7 @@ sum = Map(Seq(number, Some(And(sumOp, number))), func(n Node) Node { } } - return Node{Result: i} + return Result{Result: i} }) // and update Calc to point to the new root parser -> `result, err := ParseString(sum, input)`