2017-08-07 12:07:29 +02:00
|
|
|
package goparsify
|
2017-08-06 15:32:10 +02:00
|
|
|
|
2017-08-07 13:20:30 +02:00
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
)
|
2017-08-06 15:32:10 +02:00
|
|
|
|
2017-08-09 13:18:14 +02:00
|
|
|
// Error represents a parse error. These will often be set, the parser will back up a little and
|
|
|
|
// find another viable path. In general when combining errors the longest error should be returned.
|
2017-08-06 15:32:10 +02:00
|
|
|
type Error struct {
|
|
|
|
pos int
|
2017-08-09 13:18:14 +02:00
|
|
|
expected string
|
2017-08-06 15:32:10 +02:00
|
|
|
}
|
|
|
|
|
2017-08-09 13:18:14 +02:00
|
|
|
// Pos is the offset into the document the error was found
|
|
|
|
func (e Error) Pos() int { return e.pos }
|
2017-08-06 15:32:10 +02:00
|
|
|
|
2017-08-09 13:18:14 +02:00
|
|
|
// Error satisfies the golang error interface
|
|
|
|
func (e Error) Error() string { return fmt.Sprintf("offset %d: expected %s", e.pos, e.expected) }
|
|
|
|
|
|
|
|
// WSFunc matches a byte and returns true if it is whitespace
|
2017-08-08 12:59:48 +02:00
|
|
|
type WSFunc func(c byte) bool
|
|
|
|
|
2017-08-09 13:18:14 +02:00
|
|
|
// State is the current parse state. It is entirely public because parsers are expected to mutate it during the parse.
|
2017-08-06 15:32:10 +02:00
|
|
|
type State struct {
|
2017-08-09 13:18:14 +02:00
|
|
|
// The full input string
|
|
|
|
Input string
|
|
|
|
// An offset into the string, pointing to the current tip
|
|
|
|
Pos int
|
|
|
|
// Error is a secondary return channel from parsers, but used so heavily
|
|
|
|
// in backtracking that it has been inlined to avoid allocations.
|
|
|
|
Error Error
|
|
|
|
// Called to determine what to ignore when WS is called, or when AutoWS fires
|
2017-08-08 12:59:48 +02:00
|
|
|
WSFunc WSFunc
|
2017-08-07 13:20:30 +02:00
|
|
|
NoAutoWS bool
|
2017-08-06 15:32:10 +02:00
|
|
|
}
|
|
|
|
|
2017-08-09 13:18:14 +02:00
|
|
|
// NewState creates a new State from a string
|
|
|
|
func NewState(input string) *State {
|
|
|
|
return &State{
|
|
|
|
Input: input,
|
|
|
|
WSFunc: func(b byte) bool {
|
|
|
|
switch b {
|
|
|
|
case '\t', '\n', '\v', '\f', '\r', ' ':
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Advance the Pos along by i bytes
|
2017-08-06 15:32:10 +02:00
|
|
|
func (s *State) Advance(i int) {
|
|
|
|
s.Pos += i
|
|
|
|
}
|
|
|
|
|
2017-08-09 13:18:14 +02:00
|
|
|
// AutoWS consumes all whitespace and advances Pos but can be disabled by the NoAutWS() parser.
|
2017-08-07 13:20:30 +02:00
|
|
|
func (s *State) AutoWS() {
|
|
|
|
if s.NoAutoWS {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
s.WS()
|
|
|
|
}
|
|
|
|
|
2017-08-09 13:18:14 +02:00
|
|
|
// WS consumes all whitespace and advances Pos.
|
2017-08-07 13:20:30 +02:00
|
|
|
func (s *State) WS() {
|
2017-08-08 12:59:48 +02:00
|
|
|
for s.Pos < len(s.Input) && s.WSFunc(s.Input[s.Pos]) {
|
|
|
|
s.Pos++
|
2017-08-07 13:20:30 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-08-09 13:18:14 +02:00
|
|
|
// Get the remaining input.
|
2017-08-06 15:32:10 +02:00
|
|
|
func (s *State) Get() string {
|
|
|
|
if s.Pos > len(s.Input) {
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
return s.Input[s.Pos:]
|
|
|
|
}
|
|
|
|
|
2017-08-09 13:18:14 +02:00
|
|
|
// ErrorHere raises an error at the current position.
|
2017-08-06 15:32:10 +02:00
|
|
|
func (s *State) ErrorHere(expected string) {
|
|
|
|
s.Error.pos = s.Pos
|
2017-08-09 13:18:14 +02:00
|
|
|
s.Error.expected = expected
|
2017-08-06 15:32:10 +02:00
|
|
|
}
|
|
|
|
|
2017-08-09 13:18:14 +02:00
|
|
|
// Recover from the current error. Often called by combinators that can match
|
|
|
|
// when one of their children succeed, but others have failed.
|
|
|
|
func (s *State) Recover() {
|
|
|
|
s.Error.expected = ""
|
2017-08-06 15:32:10 +02:00
|
|
|
}
|
|
|
|
|
2017-08-09 13:18:14 +02:00
|
|
|
// Errored returns true if the current parser has failed.
|
2017-08-06 15:32:10 +02:00
|
|
|
func (s *State) Errored() bool {
|
2017-08-09 13:18:14 +02:00
|
|
|
return s.Error.expected != ""
|
2017-08-06 15:32:10 +02:00
|
|
|
}
|