complete string implementation

This commit is contained in:
Adam Scarr 2017-08-08 19:24:49 +10:00
parent 6e1c004fea
commit b835581a73
4 changed files with 150 additions and 29 deletions

View File

@ -31,7 +31,7 @@ var (
return Node{Result: ret} return Node{Result: ret}
}) })
attr = And(identifier, "=", Any(String('"'), String('\''))) attr = And(identifier, "=", String(`"'`))
attrs = Map(Kleene(attr), func(node Node) Node { attrs = Map(Kleene(attr), func(node Node) Node {
attr := map[string]string{} attr := map[string]string{}

View File

@ -16,7 +16,7 @@ var (
} }
return Node{Result: ret} return Node{Result: ret}
}) })
properties = Kleene(And(String('"'), ":", &value), ",") properties = Kleene(And(String(`"`), ":", &value), ",")
_object = Map(And("{", properties, "}"), func(n Node) Node { _object = Map(And("{", properties, "}"), func(n Node) Node {
ret := map[string]interface{}{} ret := map[string]interface{}{}
@ -31,7 +31,7 @@ var (
_true = Bind("true", true) _true = Bind("true", true)
_false = Bind("false", false) _false = Bind("false", false)
_string = Map(String('"'), func(n Node) Node { _string = Map(String(`"`), func(n Node) Node {
return Node{Result: n.Token} return Node{Result: n.Token}
}) })
) )

View File

@ -201,39 +201,92 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser {
} }
} }
func String(quote rune) Parser { func String(allowedQuotes string) Parser {
return NewParser("string", func(ps *State) Node { return NewParser("string", func(ps *State) Node {
ps.AutoWS() ps.AutoWS()
var r rune
var w int for i := 0; i < len(allowedQuotes); i++ {
var matched int if ps.Input[ps.Pos] == allowedQuotes[i] {
r, matched = utf8.DecodeRuneInString(ps.Input[ps.Pos:])
if r != quote { }
ps.ErrorHere("\"") }
if !stringContainsByte(allowedQuotes, ps.Input[ps.Pos]) {
ps.ErrorHere(allowedQuotes)
return Node{} return Node{}
} }
quote := ps.Input[ps.Pos]
var end int = ps.Pos + 1
inputLen := len(ps.Input)
result := &bytes.Buffer{} result := &bytes.Buffer{}
for ps.Pos+matched < len(ps.Input) { for end < inputLen {
r, w = utf8.DecodeRuneInString(ps.Input[ps.Pos+matched:]) switch ps.Input[end] {
matched += w case '\\':
if end+1 >= inputLen {
ps.ErrorHere(string(quote))
return Node{}
}
if r == '\\' { c := ps.Input[end+1]
r, w = utf8.DecodeRuneInString(ps.Input[ps.Pos+matched:]) if c == 'u' {
result.WriteRune(r) if end+6 >= inputLen {
matched += w ps.Error.Expected = "[a-f0-9]{4}"
continue ps.Error.pos = end + 2
} return Node{}
}
if r == quote { r, ok := unhex(ps.Input[end+2 : end+6])
ps.Advance(matched) if !ok {
ps.Error.Expected = "[a-f0-9]"
ps.Error.pos = end + 2
return Node{}
}
result.WriteRune(r)
end += 6
} else {
result.WriteByte(c)
end += 2
}
case quote:
ps.Pos = end + 1
return Node{Token: result.String()} return Node{Token: result.String()}
default:
r, w := utf8.DecodeRuneInString(ps.Input[end:])
result.WriteRune(r)
end += w
} }
result.WriteRune(r)
} }
ps.ErrorHere("\"") ps.ErrorHere(string(quote))
return Node{} return Node{}
}) })
} }
func stringContainsByte(s string, b byte) bool {
for i := 0; i < len(s); i++ {
if b == s[i] {
return true
}
}
return false
}
func unhex(b string) (v rune, ok bool) {
for _, c := range b {
v <<= 4
switch {
case '0' <= c && c <= '9':
v |= c - '0'
case 'a' <= c && c <= 'f':
v |= c - 'a' + 10
case 'A' <= c && c <= 'F':
v |= c - 'A' + 10
default:
return 0, false
}
}
return v, true
}

View File

@ -152,29 +152,97 @@ func TestParseString(t *testing.T) {
} }
func TestString(t *testing.T) { func TestString(t *testing.T) {
t.Run("test basic match", func(t *testing.T) { parser := String(`"'`)
result, p := runParser(`"hello"`, String('"')) t.Run("test double match", func(t *testing.T) {
result, p := runParser(`"hello"`, parser)
require.Equal(t, `hello`, result.Token) require.Equal(t, `hello`, result.Token)
require.Equal(t, "", p.Get()) require.Equal(t, "", p.Get())
}) })
t.Run("test single match", func(t *testing.T) {
result, p := runParser(`"hello"`, parser)
require.Equal(t, `hello`, result.Token)
require.Equal(t, "", p.Get())
})
t.Run("test nested quotes", func(t *testing.T) {
result, p := runParser(`"hello 'world'"`, parser)
require.Equal(t, `hello 'world'`, result.Token)
require.Equal(t, "", p.Get())
})
t.Run("test non match", func(t *testing.T) { t.Run("test non match", func(t *testing.T) {
_, p := runParser(`1`, String('"')) _, p := runParser(`1`, parser)
require.Equal(t, `"`, p.Error.Expected) require.Equal(t, `"'`, p.Error.Expected)
require.Equal(t, `1`, p.Get()) require.Equal(t, `1`, p.Get())
}) })
t.Run("test unterminated string", func(t *testing.T) { t.Run("test unterminated string", func(t *testing.T) {
_, p := runParser(`"hello `, String('"')) _, p := runParser(`"hello `, parser)
require.Equal(t, `"`, p.Error.Expected) require.Equal(t, `"`, p.Error.Expected)
require.Equal(t, `"hello `, p.Get()) require.Equal(t, `"hello `, p.Get())
}) })
t.Run("test unmatched quotes", func(t *testing.T) {
_, p := runParser(`"hello '`, parser)
require.Equal(t, `"`, p.Error.Expected)
require.Equal(t, 0, p.Pos)
})
t.Run("test unterminated escape", func(t *testing.T) {
_, p := runParser(`"hello \`, parser)
require.Equal(t, `"`, p.Error.Expected)
require.Equal(t, 0, p.Pos)
})
t.Run("test escaping", func(t *testing.T) { t.Run("test escaping", func(t *testing.T) {
result, p := runParser(`"hello \"world\""`, String('"')) result, p := runParser(`"hello \"world\""`, parser)
require.Equal(t, `hello "world"`, result.Token) require.Equal(t, `hello "world"`, result.Token)
require.Equal(t, ``, p.Get()) require.Equal(t, ``, p.Get())
}) })
t.Run("test escaped unicode", func(t *testing.T) {
result, p := runParser(`"hello \ubeef cake"`, parser)
require.Equal(t, "", p.Error.Expected)
require.Equal(t, "hello \uBEEF cake", result.Token)
require.Equal(t, ``, p.Get())
})
t.Run("test invalid escaped unicode", func(t *testing.T) {
_, p := runParser(`"hello \ucake"`, parser)
require.Equal(t, "offset 9: Expected [a-f0-9]", p.Error.Error())
require.Equal(t, 0, p.Pos)
})
t.Run("test incomplete escaped unicode", func(t *testing.T) {
_, p := runParser(`"hello \uca"`, parser)
require.Equal(t, "offset 9: Expected [a-f0-9]{4}", p.Error.Error())
require.Equal(t, 0, p.Pos)
})
}
func TestUnhex(t *testing.T) {
tests := map[int64]string{
0xF: "F",
0x5: "5",
0xFF: "FF",
0xFFF: "FFF",
0xA4B: "a4b",
0xFFFF: "FFFF",
0xBEEFCAFE: "beeFCAfe",
}
for expected, input := range tests {
t.Run(input, func(t *testing.T) {
r, ok := unhex(input)
require.True(t, ok)
require.EqualValues(t, expected, r)
})
}
t.Run("Fails on non hex chars", func(t *testing.T) {
_, ok := unhex("hello")
require.False(t, ok)
})
} }
func runParser(input string, parser Parser) (Node, *State) { func runParser(input string, parser Parser) (Node, *State) {