diff --git a/html/html.go b/html/html.go index dab239e..3ce87d6 100644 --- a/html/html.go +++ b/html/html.go @@ -31,7 +31,7 @@ var ( return Node{Result: ret} }) - attr = And(identifier, "=", Any(String('"'), String('\''))) + attr = And(identifier, "=", String(`"'`)) attrs = Map(Kleene(attr), func(node Node) Node { attr := map[string]string{} diff --git a/json/json.go b/json/json.go index 13db8a5..7a1bfbb 100644 --- a/json/json.go +++ b/json/json.go @@ -16,7 +16,7 @@ var ( } return Node{Result: ret} }) - properties = Kleene(And(String('"'), ":", &value), ",") + properties = Kleene(And(String(`"`), ":", &value), ",") _object = Map(And("{", properties, "}"), func(n Node) Node { ret := map[string]interface{}{} @@ -31,7 +31,7 @@ var ( _true = Bind("true", true) _false = Bind("false", false) - _string = Map(String('"'), func(n Node) Node { + _string = Map(String(`"`), func(n Node) Node { return Node{Result: n.Token} }) ) diff --git a/parser.go b/parser.go index 62383b9..741df56 100644 --- a/parser.go +++ b/parser.go @@ -201,39 +201,92 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser { } } -func String(quote rune) Parser { +func String(allowedQuotes string) Parser { return NewParser("string", func(ps *State) Node { ps.AutoWS() - var r rune - var w int - var matched int - r, matched = utf8.DecodeRuneInString(ps.Input[ps.Pos:]) - if r != quote { - ps.ErrorHere("\"") + + for i := 0; i < len(allowedQuotes); i++ { + if ps.Input[ps.Pos] == allowedQuotes[i] { + + } + } + if !stringContainsByte(allowedQuotes, ps.Input[ps.Pos]) { + ps.ErrorHere(allowedQuotes) return Node{} } + quote := ps.Input[ps.Pos] + var end int = ps.Pos + 1 + + inputLen := len(ps.Input) result := &bytes.Buffer{} - for ps.Pos+matched < len(ps.Input) { - r, w = utf8.DecodeRuneInString(ps.Input[ps.Pos+matched:]) - matched += w + for end < inputLen { + switch ps.Input[end] { + case '\\': + if end+1 >= inputLen { + ps.ErrorHere(string(quote)) + return Node{} + } - if r == '\\' { - r, w = utf8.DecodeRuneInString(ps.Input[ps.Pos+matched:]) - result.WriteRune(r) - matched += w - continue - } + c := ps.Input[end+1] + if c == 'u' { + if end+6 >= inputLen { + ps.Error.Expected = "[a-f0-9]{4}" + ps.Error.pos = end + 2 + return Node{} + } - if r == quote { - ps.Advance(matched) + r, ok := unhex(ps.Input[end+2 : end+6]) + if !ok { + ps.Error.Expected = "[a-f0-9]" + ps.Error.pos = end + 2 + return Node{} + } + result.WriteRune(r) + end += 6 + } else { + result.WriteByte(c) + end += 2 + } + case quote: + ps.Pos = end + 1 return Node{Token: result.String()} + default: + r, w := utf8.DecodeRuneInString(ps.Input[end:]) + result.WriteRune(r) + end += w } - result.WriteRune(r) } - ps.ErrorHere("\"") + ps.ErrorHere(string(quote)) return Node{} }) } + +func stringContainsByte(s string, b byte) bool { + for i := 0; i < len(s); i++ { + if b == s[i] { + return true + } + } + return false +} + +func unhex(b string) (v rune, ok bool) { + for _, c := range b { + v <<= 4 + switch { + case '0' <= c && c <= '9': + v |= c - '0' + case 'a' <= c && c <= 'f': + v |= c - 'a' + 10 + case 'A' <= c && c <= 'F': + v |= c - 'A' + 10 + default: + return 0, false + } + } + + return v, true +} diff --git a/parser_test.go b/parser_test.go index caa2f8b..a9d4d51 100644 --- a/parser_test.go +++ b/parser_test.go @@ -152,29 +152,97 @@ func TestParseString(t *testing.T) { } func TestString(t *testing.T) { - t.Run("test basic match", func(t *testing.T) { - result, p := runParser(`"hello"`, String('"')) + parser := String(`"'`) + t.Run("test double match", func(t *testing.T) { + result, p := runParser(`"hello"`, parser) require.Equal(t, `hello`, result.Token) require.Equal(t, "", p.Get()) }) + t.Run("test single match", func(t *testing.T) { + result, p := runParser(`"hello"`, parser) + require.Equal(t, `hello`, result.Token) + require.Equal(t, "", p.Get()) + }) + + t.Run("test nested quotes", func(t *testing.T) { + result, p := runParser(`"hello 'world'"`, parser) + require.Equal(t, `hello 'world'`, result.Token) + require.Equal(t, "", p.Get()) + }) + t.Run("test non match", func(t *testing.T) { - _, p := runParser(`1`, String('"')) - require.Equal(t, `"`, p.Error.Expected) + _, p := runParser(`1`, parser) + require.Equal(t, `"'`, p.Error.Expected) require.Equal(t, `1`, p.Get()) }) t.Run("test unterminated string", func(t *testing.T) { - _, p := runParser(`"hello `, String('"')) + _, p := runParser(`"hello `, parser) require.Equal(t, `"`, p.Error.Expected) require.Equal(t, `"hello `, p.Get()) }) + t.Run("test unmatched quotes", func(t *testing.T) { + _, p := runParser(`"hello '`, parser) + require.Equal(t, `"`, p.Error.Expected) + require.Equal(t, 0, p.Pos) + }) + + t.Run("test unterminated escape", func(t *testing.T) { + _, p := runParser(`"hello \`, parser) + require.Equal(t, `"`, p.Error.Expected) + require.Equal(t, 0, p.Pos) + }) + t.Run("test escaping", func(t *testing.T) { - result, p := runParser(`"hello \"world\""`, String('"')) + result, p := runParser(`"hello \"world\""`, parser) require.Equal(t, `hello "world"`, result.Token) require.Equal(t, ``, p.Get()) }) + + t.Run("test escaped unicode", func(t *testing.T) { + result, p := runParser(`"hello \ubeef cake"`, parser) + require.Equal(t, "", p.Error.Expected) + require.Equal(t, "hello \uBEEF cake", result.Token) + require.Equal(t, ``, p.Get()) + }) + + t.Run("test invalid escaped unicode", func(t *testing.T) { + _, p := runParser(`"hello \ucake"`, parser) + require.Equal(t, "offset 9: Expected [a-f0-9]", p.Error.Error()) + require.Equal(t, 0, p.Pos) + }) + + t.Run("test incomplete escaped unicode", func(t *testing.T) { + _, p := runParser(`"hello \uca"`, parser) + require.Equal(t, "offset 9: Expected [a-f0-9]{4}", p.Error.Error()) + require.Equal(t, 0, p.Pos) + }) +} + +func TestUnhex(t *testing.T) { + tests := map[int64]string{ + 0xF: "F", + 0x5: "5", + 0xFF: "FF", + 0xFFF: "FFF", + 0xA4B: "a4b", + 0xFFFF: "FFFF", + 0xBEEFCAFE: "beeFCAfe", + } + for expected, input := range tests { + t.Run(input, func(t *testing.T) { + r, ok := unhex(input) + require.True(t, ok) + require.EqualValues(t, expected, r) + }) + } + + t.Run("Fails on non hex chars", func(t *testing.T) { + _, ok := unhex("hello") + require.False(t, ok) + }) } func runParser(input string, parser Parser) (Node, *State) {