diff --git a/html/html.go b/html/html.go index 3ce87d6..90192a1 100644 --- a/html/html.go +++ b/html/html.go @@ -31,7 +31,7 @@ var ( return Node{Result: ret} }) - attr = And(identifier, "=", String(`"'`)) + attr = And(identifier, "=", StringLit(`"'`)) attrs = Map(Kleene(attr), func(node Node) Node { attr := map[string]string{} diff --git a/json/json.go b/json/json.go index 7a1bfbb..4155bd2 100644 --- a/json/json.go +++ b/json/json.go @@ -16,7 +16,7 @@ var ( } return Node{Result: ret} }) - properties = Kleene(And(String(`"`), ":", &value), ",") + properties = Kleene(And(StringLit(`"`), ":", &value), ",") _object = Map(And("{", properties, "}"), func(n Node) Node { ret := map[string]interface{}{} @@ -31,7 +31,7 @@ var ( _true = Bind("true", true) _false = Bind("false", false) - _string = Map(String(`"`), func(n Node) Node { + _string = Map(StringLit(`"`), func(n Node) Node { return Node{Result: n.Token} }) ) diff --git a/literals.go b/literals.go new file mode 100644 index 0000000..241dd93 --- /dev/null +++ b/literals.go @@ -0,0 +1,107 @@ +package goparsify + +import ( + "bytes" + "unicode/utf8" +) + +func StringLit(allowedQuotes string) Parser { + return NewParser("string", func(ps *State) Node { + ps.AutoWS() + + for i := 0; i < len(allowedQuotes); i++ { + if ps.Input[ps.Pos] == allowedQuotes[i] { + + } + } + if !stringContainsByte(allowedQuotes, ps.Input[ps.Pos]) { + ps.ErrorHere(allowedQuotes) + return Node{} + } + quote := ps.Input[ps.Pos] + + var end int = ps.Pos + 1 + + inputLen := len(ps.Input) + var buf *bytes.Buffer + + for end < inputLen { + switch ps.Input[end] { + case '\\': + if end+1 >= inputLen { + ps.ErrorHere(string(quote)) + return Node{} + } + + if buf == nil { + buf = bytes.NewBufferString(ps.Input[ps.Pos+1 : end]) + } + + c := ps.Input[end+1] + if c == 'u' { + if end+6 >= inputLen { + ps.Error.Expected = "[a-f0-9]{4}" + ps.Error.pos = end + 2 + return Node{} + } + + r, ok := unhex(ps.Input[end+2 : end+6]) + if !ok { + ps.Error.Expected = "[a-f0-9]" + ps.Error.pos = end + 2 + return Node{} + } + buf.WriteRune(r) + end += 6 + } else { + buf.WriteByte(c) + end += 2 + } + case quote: + if buf == nil { + result := ps.Input[ps.Pos+1 : end] + ps.Pos = end + 1 + return Node{Token: result} + } + ps.Pos = end + 1 + return Node{Token: buf.String()} + default: + r, w := utf8.DecodeRuneInString(ps.Input[end:]) + end += w + if buf != nil { + buf.WriteRune(r) + } + } + } + + ps.ErrorHere(string(quote)) + return Node{} + }) +} + +func stringContainsByte(s string, b byte) bool { + for i := 0; i < len(s); i++ { + if b == s[i] { + return true + } + } + return false +} + +func unhex(b string) (v rune, ok bool) { + for _, c := range b { + v <<= 4 + switch { + case '0' <= c && c <= '9': + v |= c - '0' + case 'a' <= c && c <= 'f': + v |= c - 'a' + 10 + case 'A' <= c && c <= 'F': + v |= c - 'A' + 10 + default: + return 0, false + } + } + + return v, true +} diff --git a/literals_test.go b/literals_test.go new file mode 100644 index 0000000..15b971d --- /dev/null +++ b/literals_test.go @@ -0,0 +1,77 @@ +package goparsify + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestString(t *testing.T) { + parser := StringLit(`"'`) + t.Run("test double match", func(t *testing.T) { + result, p := runParser(`"hello"`, parser) + require.Equal(t, `hello`, result.Token) + require.Equal(t, "", p.Get()) + }) + + t.Run("test single match", func(t *testing.T) { + result, p := runParser(`"hello"`, parser) + require.Equal(t, `hello`, result.Token) + require.Equal(t, "", p.Get()) + }) + + t.Run("test nested quotes", func(t *testing.T) { + result, p := runParser(`"hello 'world'"`, parser) + require.Equal(t, `hello 'world'`, result.Token) + require.Equal(t, "", p.Get()) + }) + + t.Run("test non match", func(t *testing.T) { + _, p := runParser(`1`, parser) + require.Equal(t, `"'`, p.Error.Expected) + require.Equal(t, `1`, p.Get()) + }) + + t.Run("test unterminated string", func(t *testing.T) { + _, p := runParser(`"hello `, parser) + require.Equal(t, `"`, p.Error.Expected) + require.Equal(t, `"hello `, p.Get()) + }) + + t.Run("test unmatched quotes", func(t *testing.T) { + _, p := runParser(`"hello '`, parser) + require.Equal(t, `"`, p.Error.Expected) + require.Equal(t, 0, p.Pos) + }) + + t.Run("test unterminated escape", func(t *testing.T) { + _, p := runParser(`"hello \`, parser) + require.Equal(t, `"`, p.Error.Expected) + require.Equal(t, 0, p.Pos) + }) + + t.Run("test escaping", func(t *testing.T) { + result, p := runParser(`"hello \"world\""`, parser) + require.Equal(t, `hello "world"`, result.Token) + require.Equal(t, ``, p.Get()) + }) + + t.Run("test escaped unicode", func(t *testing.T) { + result, p := runParser(`"hello \ubeef cake"`, parser) + require.Equal(t, "", p.Error.Expected) + require.Equal(t, "hello \uBEEF cake", result.Token) + require.Equal(t, ``, p.Get()) + }) + + t.Run("test invalid escaped unicode", func(t *testing.T) { + _, p := runParser(`"hello \ucake"`, parser) + require.Equal(t, "offset 9: Expected [a-f0-9]", p.Error.Error()) + require.Equal(t, 0, p.Pos) + }) + + t.Run("test incomplete escaped unicode", func(t *testing.T) { + _, p := runParser(`"hello \uca"`, parser) + require.Equal(t, "offset 9: Expected [a-f0-9]{4}", p.Error.Error()) + require.Equal(t, 0, p.Pos) + }) +} diff --git a/parser.go b/parser.go index f92266e..d7daaf6 100644 --- a/parser.go +++ b/parser.go @@ -1,7 +1,6 @@ package goparsify import ( - "bytes" "fmt" "strings" "unicode/utf8" @@ -200,104 +199,3 @@ func charsImpl(matcher string, stopOn bool, repetition ...int) Parser { return Node{Token: result} } } - -func String(allowedQuotes string) Parser { - return NewParser("string", func(ps *State) Node { - ps.AutoWS() - - for i := 0; i < len(allowedQuotes); i++ { - if ps.Input[ps.Pos] == allowedQuotes[i] { - - } - } - if !stringContainsByte(allowedQuotes, ps.Input[ps.Pos]) { - ps.ErrorHere(allowedQuotes) - return Node{} - } - quote := ps.Input[ps.Pos] - - var end int = ps.Pos + 1 - - inputLen := len(ps.Input) - var buf *bytes.Buffer - - for end < inputLen { - switch ps.Input[end] { - case '\\': - if end+1 >= inputLen { - ps.ErrorHere(string(quote)) - return Node{} - } - - if buf == nil { - buf = bytes.NewBufferString(ps.Input[ps.Pos+1 : end]) - } - - c := ps.Input[end+1] - if c == 'u' { - if end+6 >= inputLen { - ps.Error.Expected = "[a-f0-9]{4}" - ps.Error.pos = end + 2 - return Node{} - } - - r, ok := unhex(ps.Input[end+2 : end+6]) - if !ok { - ps.Error.Expected = "[a-f0-9]" - ps.Error.pos = end + 2 - return Node{} - } - buf.WriteRune(r) - end += 6 - } else { - buf.WriteByte(c) - end += 2 - } - case quote: - if buf == nil { - result := ps.Input[ps.Pos+1 : end] - ps.Pos = end + 1 - return Node{Token: result} - } - ps.Pos = end + 1 - return Node{Token: buf.String()} - default: - r, w := utf8.DecodeRuneInString(ps.Input[end:]) - end += w - if buf != nil { - buf.WriteRune(r) - } - } - } - - ps.ErrorHere(string(quote)) - return Node{} - }) -} - -func stringContainsByte(s string, b byte) bool { - for i := 0; i < len(s); i++ { - if b == s[i] { - return true - } - } - return false -} - -func unhex(b string) (v rune, ok bool) { - for _, c := range b { - v <<= 4 - switch { - case '0' <= c && c <= '9': - v |= c - '0' - case 'a' <= c && c <= 'f': - v |= c - 'a' + 10 - case 'A' <= c && c <= 'F': - v |= c - 'A' + 10 - default: - return 0, false - } - } - - return v, true -} diff --git a/parser_test.go b/parser_test.go index a9d4d51..3a3b778 100644 --- a/parser_test.go +++ b/parser_test.go @@ -151,76 +151,6 @@ func TestParseString(t *testing.T) { }) } -func TestString(t *testing.T) { - parser := String(`"'`) - t.Run("test double match", func(t *testing.T) { - result, p := runParser(`"hello"`, parser) - require.Equal(t, `hello`, result.Token) - require.Equal(t, "", p.Get()) - }) - - t.Run("test single match", func(t *testing.T) { - result, p := runParser(`"hello"`, parser) - require.Equal(t, `hello`, result.Token) - require.Equal(t, "", p.Get()) - }) - - t.Run("test nested quotes", func(t *testing.T) { - result, p := runParser(`"hello 'world'"`, parser) - require.Equal(t, `hello 'world'`, result.Token) - require.Equal(t, "", p.Get()) - }) - - t.Run("test non match", func(t *testing.T) { - _, p := runParser(`1`, parser) - require.Equal(t, `"'`, p.Error.Expected) - require.Equal(t, `1`, p.Get()) - }) - - t.Run("test unterminated string", func(t *testing.T) { - _, p := runParser(`"hello `, parser) - require.Equal(t, `"`, p.Error.Expected) - require.Equal(t, `"hello `, p.Get()) - }) - - t.Run("test unmatched quotes", func(t *testing.T) { - _, p := runParser(`"hello '`, parser) - require.Equal(t, `"`, p.Error.Expected) - require.Equal(t, 0, p.Pos) - }) - - t.Run("test unterminated escape", func(t *testing.T) { - _, p := runParser(`"hello \`, parser) - require.Equal(t, `"`, p.Error.Expected) - require.Equal(t, 0, p.Pos) - }) - - t.Run("test escaping", func(t *testing.T) { - result, p := runParser(`"hello \"world\""`, parser) - require.Equal(t, `hello "world"`, result.Token) - require.Equal(t, ``, p.Get()) - }) - - t.Run("test escaped unicode", func(t *testing.T) { - result, p := runParser(`"hello \ubeef cake"`, parser) - require.Equal(t, "", p.Error.Expected) - require.Equal(t, "hello \uBEEF cake", result.Token) - require.Equal(t, ``, p.Get()) - }) - - t.Run("test invalid escaped unicode", func(t *testing.T) { - _, p := runParser(`"hello \ucake"`, parser) - require.Equal(t, "offset 9: Expected [a-f0-9]", p.Error.Error()) - require.Equal(t, 0, p.Pos) - }) - - t.Run("test incomplete escaped unicode", func(t *testing.T) { - _, p := runParser(`"hello \uca"`, parser) - require.Equal(t, "offset 9: Expected [a-f0-9]{4}", p.Error.Error()) - require.Equal(t, 0, p.Pos) - }) -} - func TestUnhex(t *testing.T) { tests := map[int64]string{ 0xF: "F",