goparsify/state.go

106 lines
2.4 KiB
Go
Raw Permalink Normal View History

2017-08-07 12:07:29 +02:00
package goparsify
2017-08-06 15:32:10 +02:00
2017-08-07 13:20:30 +02:00
import (
2017-08-13 13:20:41 +02:00
"strconv"
2017-08-09 14:14:27 +02:00
"unicode"
"unicode/utf8"
2017-08-07 13:20:30 +02:00
)
2017-08-06 15:32:10 +02:00
2017-08-09 13:18:14 +02:00
// State is the current parse state. It is entirely public because parsers are expected to mutate it during the parse.
2017-08-06 15:32:10 +02:00
type State struct {
2017-08-09 13:18:14 +02:00
// The full input string
Input string
// An offset into the string, pointing to the current tip
Pos int
2017-08-10 13:04:14 +02:00
// Do not backtrack past this point
Cut int
2017-08-09 13:18:14 +02:00
// Error is a secondary return channel from parsers, but used so heavily
// in backtracking that it has been inlined to avoid allocations.
Error Error
2017-08-13 11:27:41 +02:00
// Called to determine what to ignore when WS is called, or when WS fires
WS VoidParser
2017-08-06 15:32:10 +02:00
}
2017-08-09 14:14:27 +02:00
// ASCIIWhitespace matches any of the standard whitespace characters. It is faster
// than the UnicodeWhitespace parser as it does not need to decode unicode runes.
func ASCIIWhitespace(s *State) {
for s.Pos < len(s.Input) {
switch s.Input[s.Pos] {
case '\t', '\n', '\v', '\f', '\r', ' ':
s.Pos++
default:
return
}
}
}
// UnicodeWhitespace matches any unicode space character. Its a little slower
// than the ascii parser because it matches a rune at a time.
func UnicodeWhitespace(s *State) {
for s.Pos < len(s.Input) {
r, w := utf8.DecodeRuneInString(s.Get())
if !unicode.IsSpace(r) {
return
}
s.Pos += w
}
2017-08-13 14:11:27 +02:00
}
// NoWhitespace disables automatic whitespace matching
func NoWhitespace(s *State) {
2017-08-09 14:14:27 +02:00
}
2017-08-09 13:18:14 +02:00
// NewState creates a new State from a string
func NewState(input string) *State {
return &State{
Input: input,
2017-08-10 14:06:08 +02:00
WS: UnicodeWhitespace,
2017-08-09 13:18:14 +02:00
}
}
// Advance the Pos along by i bytes
2017-08-06 15:32:10 +02:00
func (s *State) Advance(i int) {
s.Pos += i
}
2017-08-09 13:18:14 +02:00
// Get the remaining input.
2017-08-06 15:32:10 +02:00
func (s *State) Get() string {
if s.Pos > len(s.Input) {
return ""
}
return s.Input[s.Pos:]
}
2017-08-10 13:04:14 +02:00
// Preview of the the next x characters
func (s *State) Preview(x int) string {
2017-08-10 13:58:14 +02:00
if s.Pos >= len(s.Input) {
2017-08-10 13:04:14 +02:00
return ""
}
2017-08-13 13:20:41 +02:00
quoted := strconv.Quote(s.Get())
quoted = quoted[1 : len(quoted)-1]
if len(quoted) >= x {
return quoted[0:x]
2017-08-10 13:04:14 +02:00
}
2017-08-13 13:20:41 +02:00
return quoted
2017-08-10 13:04:14 +02:00
}
2017-08-09 13:18:14 +02:00
// ErrorHere raises an error at the current position.
2017-08-06 15:32:10 +02:00
func (s *State) ErrorHere(expected string) {
s.Error.pos = s.Pos
2017-08-09 13:18:14 +02:00
s.Error.expected = expected
2017-08-06 15:32:10 +02:00
}
2017-08-09 13:18:14 +02:00
// Recover from the current error. Often called by combinators that can match
// when one of their children succeed, but others have failed.
func (s *State) Recover() {
s.Error.expected = ""
2017-08-06 15:32:10 +02:00
}
2017-08-09 13:18:14 +02:00
// Errored returns true if the current parser has failed.
2017-08-06 15:32:10 +02:00
func (s *State) Errored() bool {
2017-08-09 13:18:14 +02:00
return s.Error.expected != ""
2017-08-06 15:32:10 +02:00
}