goparsify/state.go

package goparsify

import (
	"strconv"
	"unicode"
	"unicode/utf8"
)

// State is the current parse state. It is entirely public because parsers are expected to mutate it during the parse.
type State struct {
	// The full input string
	Input string
	// An offset into the string, pointing to the current tip
	Pos int
	// Do not backtrack past this point
	Cut int
	// Error is a secondary return channel from parsers, but used so heavily
	// in backtracking that it has been inlined to avoid allocations.
	Error Error
	// Called to determine what to ignore when WS is called, or when WS fires
	WS VoidParser
}

// ASCIIWhitespace matches any of the standard whitespace characters. It is faster
// than the UnicodeWhitespace parser as it does not need to decode unicode runes.
func ASCIIWhitespace(s *State) {
	for s.Pos < len(s.Input) {
		switch s.Input[s.Pos] {
		case '\t', '\n', '\v', '\f', '\r', ' ':
			s.Pos++
		default:
			return
		}
	}
}

// UnicodeWhitespace matches any unicode space character. Its a little slower
// than the ascii parser because it matches a rune at a time.
func UnicodeWhitespace(s *State) {
	for s.Pos < len(s.Input) {
		r, w := utf8.DecodeRuneInString(s.Get())
		if !unicode.IsSpace(r) {
			return
		}
		s.Pos += w
	}
}

// NoWhitespace disables automatic whitespace matching
func NoWhitespace(s *State) {

}

// NewState creates a new State from a string
func NewState(input string) *State {
	return &State{
		Input: input,
		WS:    UnicodeWhitespace,
	}
}

// Advance the Pos along by i bytes
func (s *State) Advance(i int) {
	s.Pos += i
}

// Get the remaining input.
func (s *State) Get() string {
	if s.Pos > len(s.Input) {
		return ""
	}
	return s.Input[s.Pos:]
}

// Preview of the the next x characters
func (s *State) Preview(x int) string {
	if s.Pos >= len(s.Input) {
		return ""
	}

	quoted := strconv.Quote(s.Get())
	quoted = quoted[1 : len(quoted)-1]
	if len(quoted) >= x {
		return quoted[0:x]
	}

	return quoted
}

// ErrorHere raises an error at the current position.
func (s *State) ErrorHere(expected string) {
	s.Error.pos = s.Pos
	s.Error.expected = expected
}

// Recover from the current error. Often called by combinators that can match
// when one of their children succeed, but others have failed.
func (s *State) Recover() {
	s.Error.expected = ""
}

// Errored returns true if the current parser has failed.
func (s *State) Errored() bool {
	return s.Error.expected != ""
}
add debugging 2017-08-07 12:07:29 +02:00			`package goparsify`
Eliminate a bunch of allocations 2017-08-06 15:32:10 +02:00
AutoWS 2017-08-07 13:20:30 +02:00			`import (`
Update benchmarks 2017-08-13 13:20:41 +02:00			`"strconv"`
Add a unicode whitespace parser 2017-08-09 14:14:27 +02:00			`"unicode"`
			`"unicode/utf8"`
AutoWS 2017-08-07 13:20:30 +02:00			`)`
Eliminate a bunch of allocations 2017-08-06 15:32:10 +02:00
Add godoc 2017-08-09 13:18:14 +02:00			`// State is the current parse state. It is entirely public because parsers are expected to mutate it during the parse.`
Eliminate a bunch of allocations 2017-08-06 15:32:10 +02:00			`type State struct {`
Add godoc 2017-08-09 13:18:14 +02:00			`// The full input string`
			`Input string`
			`// An offset into the string, pointing to the current tip`
			`Pos int`
Add parse logging 2017-08-10 13:04:14 +02:00			`// Do not backtrack past this point`
			`Cut int`
Add godoc 2017-08-09 13:18:14 +02:00			`// Error is a secondary return channel from parsers, but used so heavily`
			`// in backtracking that it has been inlined to avoid allocations.`
			`Error Error`
Clean up autows 2017-08-13 11:27:41 +02:00			`// Called to determine what to ignore when WS is called, or when WS fires`
			`WS VoidParser`
Eliminate a bunch of allocations 2017-08-06 15:32:10 +02:00			`}`

Add a unicode whitespace parser 2017-08-09 14:14:27 +02:00			`// ASCIIWhitespace matches any of the standard whitespace characters. It is faster`
			`// than the UnicodeWhitespace parser as it does not need to decode unicode runes.`
			`func ASCIIWhitespace(s *State) {`
			`for s.Pos < len(s.Input) {`
			`switch s.Input[s.Pos] {`
			`case '\t', '\n', '\v', '\f', '\r', ' ':`
			`s.Pos++`
			`default:`
			`return`
			`}`
			`}`
			`}`

			`// UnicodeWhitespace matches any unicode space character. Its a little slower`
			`// than the ascii parser because it matches a rune at a time.`
			`func UnicodeWhitespace(s *State) {`
			`for s.Pos < len(s.Input) {`
			`r, w := utf8.DecodeRuneInString(s.Get())`
			`if !unicode.IsSpace(r) {`
			`return`
			`}`
			`s.Pos += w`
			`}`
Add an Until function 2017-08-13 14:11:27 +02:00			`}`

			`// NoWhitespace disables automatic whitespace matching`
			`func NoWhitespace(s *State) {`
Add a unicode whitespace parser 2017-08-09 14:14:27 +02:00
			`}`

Add godoc 2017-08-09 13:18:14 +02:00			`// NewState creates a new State from a string`
			`func NewState(input string) *State {`
			`return &State{`
			`Input: input,`
Unicode safe by default 2017-08-10 14:06:08 +02:00			`WS: UnicodeWhitespace,`
Add godoc 2017-08-09 13:18:14 +02:00			`}`
			`}`

			`// Advance the Pos along by i bytes`
Eliminate a bunch of allocations 2017-08-06 15:32:10 +02:00			`func (s *State) Advance(i int) {`
			`s.Pos += i`
			`}`

Add godoc 2017-08-09 13:18:14 +02:00			`// Get the remaining input.`
Eliminate a bunch of allocations 2017-08-06 15:32:10 +02:00			`func (s *State) Get() string {`
			`if s.Pos > len(s.Input) {`
			`return ""`
			`}`
			`return s.Input[s.Pos:]`
			`}`

Add parse logging 2017-08-10 13:04:14 +02:00			`// Preview of the the next x characters`
			`func (s *State) Preview(x int) string {`
Document cuts 2017-08-10 13:58:14 +02:00			`if s.Pos >= len(s.Input) {`
Add parse logging 2017-08-10 13:04:14 +02:00			`return ""`
			`}`
Update benchmarks 2017-08-13 13:20:41 +02:00
			`quoted := strconv.Quote(s.Get())`
			`quoted = quoted[1 : len(quoted)-1]`
			`if len(quoted) >= x {`
			`return quoted[0:x]`
Add parse logging 2017-08-10 13:04:14 +02:00			`}`

Update benchmarks 2017-08-13 13:20:41 +02:00			`return quoted`
Add parse logging 2017-08-10 13:04:14 +02:00			`}`

Add godoc 2017-08-09 13:18:14 +02:00			`// ErrorHere raises an error at the current position.`
Eliminate a bunch of allocations 2017-08-06 15:32:10 +02:00			`func (s *State) ErrorHere(expected string) {`
			`s.Error.pos = s.Pos`
Add godoc 2017-08-09 13:18:14 +02:00			`s.Error.expected = expected`
Eliminate a bunch of allocations 2017-08-06 15:32:10 +02:00			`}`

Add godoc 2017-08-09 13:18:14 +02:00			`// Recover from the current error. Often called by combinators that can match`
			`// when one of their children succeed, but others have failed.`
			`func (s *State) Recover() {`
			`s.Error.expected = ""`
Eliminate a bunch of allocations 2017-08-06 15:32:10 +02:00			`}`

Add godoc 2017-08-09 13:18:14 +02:00			`// Errored returns true if the current parser has failed.`
Eliminate a bunch of allocations 2017-08-06 15:32:10 +02:00			`func (s *State) Errored() bool {`
Add godoc 2017-08-09 13:18:14 +02:00			`return s.Error.expected != ""`
Eliminate a bunch of allocations 2017-08-06 15:32:10 +02:00			`}`