diff --git a/html/html.go b/html/html.go
index dab239e..3ce87d6 100644
--- a/html/html.go
+++ b/html/html.go
@@ -31,7 +31,7 @@ var (
return Node{Result: ret}
})
- attr = And(identifier, "=", Any(String('"'), String('\'')))
+ attr = And(identifier, "=", String(`"'`))
attrs = Map(Kleene(attr), func(node Node) Node {
attr := map[string]string{}
diff --git a/json/json.go b/json/json.go
index 13db8a5..7a1bfbb 100644
--- a/json/json.go
+++ b/json/json.go
@@ -16,7 +16,7 @@ var (
}
return Node{Result: ret}
})
- properties = Kleene(And(String('"'), ":", &value), ",")
+ properties = Kleene(And(String(`"`), ":", &value), ",")
_object = Map(And("{", properties, "}"), func(n Node) Node {
ret := map[string]interface{}{}
@@ -31,7 +31,7 @@ var (
_true = Bind("true", true)
_false = Bind("false", false)
- _string = Map(String('"'), func(n Node) Node {
+ _string = Map(String(`"`), func(n Node) Node {
return Node{Result: n.Token}
})
)
diff --git a/parser.go b/parser.go
index 62383b9..741df56 100644
--- a/parser.go
+++ b/parser.go
@@ -201,39 +201,92 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser {
}
}
-func String(quote rune) Parser {
+func String(allowedQuotes string) Parser {
return NewParser("string", func(ps *State) Node {
ps.AutoWS()
- var r rune
- var w int
- var matched int
- r, matched = utf8.DecodeRuneInString(ps.Input[ps.Pos:])
- if r != quote {
- ps.ErrorHere("\"")
+
+ for i := 0; i < len(allowedQuotes); i++ {
+ if ps.Input[ps.Pos] == allowedQuotes[i] {
+
+ }
+ }
+ if !stringContainsByte(allowedQuotes, ps.Input[ps.Pos]) {
+ ps.ErrorHere(allowedQuotes)
return Node{}
}
+ quote := ps.Input[ps.Pos]
+ var end int = ps.Pos + 1
+
+ inputLen := len(ps.Input)
result := &bytes.Buffer{}
- for ps.Pos+matched < len(ps.Input) {
- r, w = utf8.DecodeRuneInString(ps.Input[ps.Pos+matched:])
- matched += w
+ for end < inputLen {
+ switch ps.Input[end] {
+ case '\\':
+ if end+1 >= inputLen {
+ ps.ErrorHere(string(quote))
+ return Node{}
+ }
- if r == '\\' {
- r, w = utf8.DecodeRuneInString(ps.Input[ps.Pos+matched:])
- result.WriteRune(r)
- matched += w
- continue
- }
+ c := ps.Input[end+1]
+ if c == 'u' {
+ if end+6 >= inputLen {
+ ps.Error.Expected = "[a-f0-9]{4}"
+ ps.Error.pos = end + 2
+ return Node{}
+ }
- if r == quote {
- ps.Advance(matched)
+ r, ok := unhex(ps.Input[end+2 : end+6])
+ if !ok {
+ ps.Error.Expected = "[a-f0-9]"
+ ps.Error.pos = end + 2
+ return Node{}
+ }
+ result.WriteRune(r)
+ end += 6
+ } else {
+ result.WriteByte(c)
+ end += 2
+ }
+ case quote:
+ ps.Pos = end + 1
return Node{Token: result.String()}
+ default:
+ r, w := utf8.DecodeRuneInString(ps.Input[end:])
+ result.WriteRune(r)
+ end += w
}
- result.WriteRune(r)
}
- ps.ErrorHere("\"")
+ ps.ErrorHere(string(quote))
return Node{}
})
}
+
+func stringContainsByte(s string, b byte) bool {
+ for i := 0; i < len(s); i++ {
+ if b == s[i] {
+ return true
+ }
+ }
+ return false
+}
+
+func unhex(b string) (v rune, ok bool) {
+ for _, c := range b {
+ v <<= 4
+ switch {
+ case '0' <= c && c <= '9':
+ v |= c - '0'
+ case 'a' <= c && c <= 'f':
+ v |= c - 'a' + 10
+ case 'A' <= c && c <= 'F':
+ v |= c - 'A' + 10
+ default:
+ return 0, false
+ }
+ }
+
+ return v, true
+}
diff --git a/parser_test.go b/parser_test.go
index caa2f8b..a9d4d51 100644
--- a/parser_test.go
+++ b/parser_test.go
@@ -152,29 +152,97 @@ func TestParseString(t *testing.T) {
}
func TestString(t *testing.T) {
- t.Run("test basic match", func(t *testing.T) {
- result, p := runParser(`"hello"`, String('"'))
+ parser := String(`"'`)
+ t.Run("test double match", func(t *testing.T) {
+ result, p := runParser(`"hello"`, parser)
require.Equal(t, `hello`, result.Token)
require.Equal(t, "", p.Get())
})
+ t.Run("test single match", func(t *testing.T) {
+ result, p := runParser(`"hello"`, parser)
+ require.Equal(t, `hello`, result.Token)
+ require.Equal(t, "", p.Get())
+ })
+
+ t.Run("test nested quotes", func(t *testing.T) {
+ result, p := runParser(`"hello 'world'"`, parser)
+ require.Equal(t, `hello 'world'`, result.Token)
+ require.Equal(t, "", p.Get())
+ })
+
t.Run("test non match", func(t *testing.T) {
- _, p := runParser(`1`, String('"'))
- require.Equal(t, `"`, p.Error.Expected)
+ _, p := runParser(`1`, parser)
+ require.Equal(t, `"'`, p.Error.Expected)
require.Equal(t, `1`, p.Get())
})
t.Run("test unterminated string", func(t *testing.T) {
- _, p := runParser(`"hello `, String('"'))
+ _, p := runParser(`"hello `, parser)
require.Equal(t, `"`, p.Error.Expected)
require.Equal(t, `"hello `, p.Get())
})
+ t.Run("test unmatched quotes", func(t *testing.T) {
+ _, p := runParser(`"hello '`, parser)
+ require.Equal(t, `"`, p.Error.Expected)
+ require.Equal(t, 0, p.Pos)
+ })
+
+ t.Run("test unterminated escape", func(t *testing.T) {
+ _, p := runParser(`"hello \`, parser)
+ require.Equal(t, `"`, p.Error.Expected)
+ require.Equal(t, 0, p.Pos)
+ })
+
t.Run("test escaping", func(t *testing.T) {
- result, p := runParser(`"hello \"world\""`, String('"'))
+ result, p := runParser(`"hello \"world\""`, parser)
require.Equal(t, `hello "world"`, result.Token)
require.Equal(t, ``, p.Get())
})
+
+ t.Run("test escaped unicode", func(t *testing.T) {
+ result, p := runParser(`"hello \ubeef cake"`, parser)
+ require.Equal(t, "", p.Error.Expected)
+ require.Equal(t, "hello \uBEEF cake", result.Token)
+ require.Equal(t, ``, p.Get())
+ })
+
+ t.Run("test invalid escaped unicode", func(t *testing.T) {
+ _, p := runParser(`"hello \ucake"`, parser)
+ require.Equal(t, "offset 9: Expected [a-f0-9]", p.Error.Error())
+ require.Equal(t, 0, p.Pos)
+ })
+
+ t.Run("test incomplete escaped unicode", func(t *testing.T) {
+ _, p := runParser(`"hello \uca"`, parser)
+ require.Equal(t, "offset 9: Expected [a-f0-9]{4}", p.Error.Error())
+ require.Equal(t, 0, p.Pos)
+ })
+}
+
+func TestUnhex(t *testing.T) {
+ tests := map[int64]string{
+ 0xF: "F",
+ 0x5: "5",
+ 0xFF: "FF",
+ 0xFFF: "FFF",
+ 0xA4B: "a4b",
+ 0xFFFF: "FFFF",
+ 0xBEEFCAFE: "beeFCAfe",
+ }
+ for expected, input := range tests {
+ t.Run(input, func(t *testing.T) {
+ r, ok := unhex(input)
+ require.True(t, ok)
+ require.EqualValues(t, expected, r)
+ })
+ }
+
+ t.Run("Fails on non hex chars", func(t *testing.T) {
+ _, ok := unhex("hello")
+ require.False(t, ok)
+ })
}
func runParser(input string, parser Parser) (Node, *State) {