kilo/vendor/github.com/BurntSushi/toml/lex.go

1226 lines
29 KiB
Go

package toml
import (
"fmt"
"reflect"
"runtime"
"strings"
"unicode"
"unicode/utf8"
)
type itemType int
const (
itemError itemType = iota
itemNIL // used in the parser to indicate no type
itemEOF
itemText
itemString
itemRawString
itemMultilineString
itemRawMultilineString
itemBool
itemInteger
itemFloat
itemDatetime
itemArray // the start of an array
itemArrayEnd
itemTableStart
itemTableEnd
itemArrayTableStart
itemArrayTableEnd
itemKeyStart
itemKeyEnd
itemCommentStart
itemInlineTableStart
itemInlineTableEnd
)
const (
eof = 0
comma = ','
tableStart = '['
tableEnd = ']'
arrayTableStart = '['
arrayTableEnd = ']'
tableSep = '.'
keySep = '='
arrayStart = '['
arrayEnd = ']'
commentStart = '#'
stringStart = '"'
stringEnd = '"'
rawStringStart = '\''
rawStringEnd = '\''
inlineTableStart = '{'
inlineTableEnd = '}'
)
type stateFn func(lx *lexer) stateFn
type lexer struct {
input string
start int
pos int
line int
state stateFn
items chan item
// Allow for backing up up to four runes.
// This is necessary because TOML contains 3-rune tokens (""" and ''').
prevWidths [4]int
nprev int // how many of prevWidths are in use
// If we emit an eof, we can still back up, but it is not OK to call
// next again.
atEOF bool
// A stack of state functions used to maintain context.
// The idea is to reuse parts of the state machine in various places.
// For example, values can appear at the top level or within arbitrarily
// nested arrays. The last state on the stack is used after a value has
// been lexed. Similarly for comments.
stack []stateFn
}
type item struct {
typ itemType
val string
line int
}
func (lx *lexer) nextItem() item {
for {
select {
case item := <-lx.items:
return item
default:
lx.state = lx.state(lx)
//fmt.Printf(" STATE %-24s current: %-10q stack: %s\n", lx.state, lx.current(), lx.stack)
}
}
}
func lex(input string) *lexer {
lx := &lexer{
input: input,
state: lexTop,
line: 1,
items: make(chan item, 10),
stack: make([]stateFn, 0, 10),
}
return lx
}
func (lx *lexer) push(state stateFn) {
lx.stack = append(lx.stack, state)
}
func (lx *lexer) pop() stateFn {
if len(lx.stack) == 0 {
return lx.errorf("BUG in lexer: no states to pop")
}
last := lx.stack[len(lx.stack)-1]
lx.stack = lx.stack[0 : len(lx.stack)-1]
return last
}
func (lx *lexer) current() string {
return lx.input[lx.start:lx.pos]
}
func (lx *lexer) emit(typ itemType) {
lx.items <- item{typ, lx.current(), lx.line}
lx.start = lx.pos
}
func (lx *lexer) emitTrim(typ itemType) {
lx.items <- item{typ, strings.TrimSpace(lx.current()), lx.line}
lx.start = lx.pos
}
func (lx *lexer) next() (r rune) {
if lx.atEOF {
panic("BUG in lexer: next called after EOF")
}
if lx.pos >= len(lx.input) {
lx.atEOF = true
return eof
}
if lx.input[lx.pos] == '\n' {
lx.line++
}
lx.prevWidths[3] = lx.prevWidths[2]
lx.prevWidths[2] = lx.prevWidths[1]
lx.prevWidths[1] = lx.prevWidths[0]
if lx.nprev < 4 {
lx.nprev++
}
r, w := utf8.DecodeRuneInString(lx.input[lx.pos:])
if r == utf8.RuneError {
lx.errorf("invalid UTF-8 byte at position %d (line %d): 0x%02x", lx.pos, lx.line, lx.input[lx.pos])
return utf8.RuneError
}
lx.prevWidths[0] = w
lx.pos += w
return r
}
// ignore skips over the pending input before this point.
func (lx *lexer) ignore() {
lx.start = lx.pos
}
// backup steps back one rune. Can be called 4 times between calls to next.
func (lx *lexer) backup() {
if lx.atEOF {
lx.atEOF = false
return
}
if lx.nprev < 1 {
panic("BUG in lexer: backed up too far")
}
w := lx.prevWidths[0]
lx.prevWidths[0] = lx.prevWidths[1]
lx.prevWidths[1] = lx.prevWidths[2]
lx.prevWidths[2] = lx.prevWidths[3]
lx.nprev--
lx.pos -= w
if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
lx.line--
}
}
// accept consumes the next rune if it's equal to `valid`.
func (lx *lexer) accept(valid rune) bool {
if lx.next() == valid {
return true
}
lx.backup()
return false
}
// peek returns but does not consume the next rune in the input.
func (lx *lexer) peek() rune {
r := lx.next()
lx.backup()
return r
}
// skip ignores all input that matches the given predicate.
func (lx *lexer) skip(pred func(rune) bool) {
for {
r := lx.next()
if pred(r) {
continue
}
lx.backup()
lx.ignore()
return
}
}
// errorf stops all lexing by emitting an error and returning `nil`.
// Note that any value that is a character is escaped if it's a special
// character (newlines, tabs, etc.).
func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
lx.items <- item{
itemError,
fmt.Sprintf(format, values...),
lx.line,
}
return nil
}
// lexTop consumes elements at the top level of TOML data.
func lexTop(lx *lexer) stateFn {
r := lx.next()
if isWhitespace(r) || isNL(r) {
return lexSkip(lx, lexTop)
}
switch r {
case commentStart:
lx.push(lexTop)
return lexCommentStart
case tableStart:
return lexTableStart
case eof:
if lx.pos > lx.start {
return lx.errorf("unexpected EOF")
}
lx.emit(itemEOF)
return nil
}
// At this point, the only valid item can be a key, so we back up
// and let the key lexer do the rest.
lx.backup()
lx.push(lexTopEnd)
return lexKeyStart
}
// lexTopEnd is entered whenever a top-level item has been consumed. (A value
// or a table.) It must see only whitespace, and will turn back to lexTop
// upon a newline. If it sees EOF, it will quit the lexer successfully.
func lexTopEnd(lx *lexer) stateFn {
r := lx.next()
switch {
case r == commentStart:
// a comment will read to a newline for us.
lx.push(lexTop)
return lexCommentStart
case isWhitespace(r):
return lexTopEnd
case isNL(r):
lx.ignore()
return lexTop
case r == eof:
lx.emit(itemEOF)
return nil
}
return lx.errorf(
"expected a top-level item to end with a newline, comment, or EOF, but got %q instead",
r)
}
// lexTable lexes the beginning of a table. Namely, it makes sure that
// it starts with a character other than '.' and ']'.
// It assumes that '[' has already been consumed.
// It also handles the case that this is an item in an array of tables.
// e.g., '[[name]]'.
func lexTableStart(lx *lexer) stateFn {
if lx.peek() == arrayTableStart {
lx.next()
lx.emit(itemArrayTableStart)
lx.push(lexArrayTableEnd)
} else {
lx.emit(itemTableStart)
lx.push(lexTableEnd)
}
return lexTableNameStart
}
func lexTableEnd(lx *lexer) stateFn {
lx.emit(itemTableEnd)
return lexTopEnd
}
func lexArrayTableEnd(lx *lexer) stateFn {
if r := lx.next(); r != arrayTableEnd {
return lx.errorf(
"expected end of table array name delimiter %q, but got %q instead",
arrayTableEnd, r)
}
lx.emit(itemArrayTableEnd)
return lexTopEnd
}
func lexTableNameStart(lx *lexer) stateFn {
lx.skip(isWhitespace)
switch r := lx.peek(); {
case r == tableEnd || r == eof:
return lx.errorf("unexpected end of table name (table names cannot be empty)")
case r == tableSep:
return lx.errorf("unexpected table separator (table names cannot be empty)")
case r == stringStart || r == rawStringStart:
lx.ignore()
lx.push(lexTableNameEnd)
return lexQuotedName
default:
lx.push(lexTableNameEnd)
return lexBareName
}
}
// lexTableNameEnd reads the end of a piece of a table name, optionally
// consuming whitespace.
func lexTableNameEnd(lx *lexer) stateFn {
lx.skip(isWhitespace)
switch r := lx.next(); {
case isWhitespace(r):
return lexTableNameEnd
case r == tableSep:
lx.ignore()
return lexTableNameStart
case r == tableEnd:
return lx.pop()
default:
return lx.errorf("expected '.' or ']' to end table name, but got %q instead", r)
}
}
// lexBareName lexes one part of a key or table.
//
// It assumes that at least one valid character for the table has already been
// read.
//
// Lexes only one part, e.g. only 'a' inside 'a.b'.
func lexBareName(lx *lexer) stateFn {
r := lx.next()
if isBareKeyChar(r) {
return lexBareName
}
lx.backup()
lx.emit(itemText)
return lx.pop()
}
// lexBareName lexes one part of a key or table.
//
// It assumes that at least one valid character for the table has already been
// read.
//
// Lexes only one part, e.g. only '"a"' inside '"a".b'.
func lexQuotedName(lx *lexer) stateFn {
r := lx.next()
switch {
case isWhitespace(r):
return lexSkip(lx, lexValue)
case r == stringStart:
lx.ignore() // ignore the '"'
return lexString
case r == rawStringStart:
lx.ignore() // ignore the "'"
return lexRawString
case r == eof:
return lx.errorf("unexpected EOF; expected value")
default:
return lx.errorf("expected value but found %q instead", r)
}
}
// lexKeyStart consumes all key parts until a '='.
func lexKeyStart(lx *lexer) stateFn {
lx.skip(isWhitespace)
switch r := lx.peek(); {
case r == '=' || r == eof:
return lx.errorf("unexpected '=': key name appears blank")
case r == '.':
return lx.errorf("unexpected '.': keys cannot start with a '.'")
case r == stringStart || r == rawStringStart:
lx.ignore()
fallthrough
default: // Bare key
lx.emit(itemKeyStart)
return lexKeyNameStart
}
}
func lexKeyNameStart(lx *lexer) stateFn {
lx.skip(isWhitespace)
switch r := lx.peek(); {
case r == '=' || r == eof:
return lx.errorf("unexpected '='")
case r == '.':
return lx.errorf("unexpected '.'")
case r == stringStart || r == rawStringStart:
lx.ignore()
lx.push(lexKeyEnd)
return lexQuotedName
default:
lx.push(lexKeyEnd)
return lexBareName
}
}
// lexKeyEnd consumes the end of a key and trims whitespace (up to the key
// separator).
func lexKeyEnd(lx *lexer) stateFn {
lx.skip(isWhitespace)
switch r := lx.next(); {
case isWhitespace(r):
return lexSkip(lx, lexKeyEnd)
case r == eof:
return lx.errorf("unexpected EOF; expected key separator %q", keySep)
case r == '.':
lx.ignore()
return lexKeyNameStart
case r == '=':
lx.emit(itemKeyEnd)
return lexSkip(lx, lexValue)
default:
return lx.errorf("expected '.' or '=', but got %q instead", r)
}
}
// lexValue starts the consumption of a value anywhere a value is expected.
// lexValue will ignore whitespace.
// After a value is lexed, the last state on the next is popped and returned.
func lexValue(lx *lexer) stateFn {
// We allow whitespace to precede a value, but NOT newlines.
// In array syntax, the array states are responsible for ignoring newlines.
r := lx.next()
switch {
case isWhitespace(r):
return lexSkip(lx, lexValue)
case isDigit(r):
lx.backup() // avoid an extra state and use the same as above
return lexNumberOrDateStart
}
switch r {
case arrayStart:
lx.ignore()
lx.emit(itemArray)
return lexArrayValue
case inlineTableStart:
lx.ignore()
lx.emit(itemInlineTableStart)
return lexInlineTableValue
case stringStart:
if lx.accept(stringStart) {
if lx.accept(stringStart) {
lx.ignore() // Ignore """
return lexMultilineString
}
lx.backup()
}
lx.ignore() // ignore the '"'
return lexString
case rawStringStart:
if lx.accept(rawStringStart) {
if lx.accept(rawStringStart) {
lx.ignore() // Ignore """
return lexMultilineRawString
}
lx.backup()
}
lx.ignore() // ignore the "'"
return lexRawString
case '.': // special error case, be kind to users
return lx.errorf("floats must start with a digit, not '.'")
case 'i', 'n':
if (lx.accept('n') && lx.accept('f')) || (lx.accept('a') && lx.accept('n')) {
lx.emit(itemFloat)
return lx.pop()
}
case '-', '+':
return lexDecimalNumberStart
}
if unicode.IsLetter(r) {
// Be permissive here; lexBool will give a nice error if the
// user wrote something like
// x = foo
// (i.e. not 'true' or 'false' but is something else word-like.)
lx.backup()
return lexBool
}
if r == eof {
return lx.errorf("unexpected EOF; expected value")
}
return lx.errorf("expected value but found %q instead", r)
}
// lexArrayValue consumes one value in an array. It assumes that '[' or ','
// have already been consumed. All whitespace and newlines are ignored.
func lexArrayValue(lx *lexer) stateFn {
r := lx.next()
switch {
case isWhitespace(r) || isNL(r):
return lexSkip(lx, lexArrayValue)
case r == commentStart:
lx.push(lexArrayValue)
return lexCommentStart
case r == comma:
return lx.errorf("unexpected comma")
case r == arrayEnd:
// NOTE(caleb): The spec isn't clear about whether you can have
// a trailing comma or not, so we'll allow it.
return lexArrayEnd
}
lx.backup()
lx.push(lexArrayValueEnd)
return lexValue
}
// lexArrayValueEnd consumes everything between the end of an array value and
// the next value (or the end of the array): it ignores whitespace and newlines
// and expects either a ',' or a ']'.
func lexArrayValueEnd(lx *lexer) stateFn {
r := lx.next()
switch {
case isWhitespace(r) || isNL(r):
return lexSkip(lx, lexArrayValueEnd)
case r == commentStart:
lx.push(lexArrayValueEnd)
return lexCommentStart
case r == comma:
lx.ignore()
return lexArrayValue // move on to the next value
case r == arrayEnd:
return lexArrayEnd
}
return lx.errorf(
"expected a comma or array terminator %q, but got %s instead",
arrayEnd, runeOrEOF(r))
}
// lexArrayEnd finishes the lexing of an array.
// It assumes that a ']' has just been consumed.
func lexArrayEnd(lx *lexer) stateFn {
lx.ignore()
lx.emit(itemArrayEnd)
return lx.pop()
}
// lexInlineTableValue consumes one key/value pair in an inline table.
// It assumes that '{' or ',' have already been consumed. Whitespace is ignored.
func lexInlineTableValue(lx *lexer) stateFn {
r := lx.next()
switch {
case isWhitespace(r):
return lexSkip(lx, lexInlineTableValue)
case isNL(r):
return lx.errorf("newlines not allowed within inline tables")
case r == commentStart:
lx.push(lexInlineTableValue)
return lexCommentStart
case r == comma:
return lx.errorf("unexpected comma")
case r == inlineTableEnd:
return lexInlineTableEnd
}
lx.backup()
lx.push(lexInlineTableValueEnd)
return lexKeyStart
}
// lexInlineTableValueEnd consumes everything between the end of an inline table
// key/value pair and the next pair (or the end of the table):
// it ignores whitespace and expects either a ',' or a '}'.
func lexInlineTableValueEnd(lx *lexer) stateFn {
switch r := lx.next(); {
case isWhitespace(r):
return lexSkip(lx, lexInlineTableValueEnd)
case isNL(r):
return lx.errorf("newlines not allowed within inline tables")
case r == commentStart:
lx.push(lexInlineTableValueEnd)
return lexCommentStart
case r == comma:
lx.ignore()
lx.skip(isWhitespace)
if lx.peek() == '}' {
return lx.errorf("trailing comma not allowed in inline tables")
}
return lexInlineTableValue
case r == inlineTableEnd:
return lexInlineTableEnd
default:
return lx.errorf(
"expected a comma or an inline table terminator %q, but got %s instead",
inlineTableEnd, runeOrEOF(r))
}
}
func runeOrEOF(r rune) string {
if r == eof {
return "end of file"
}
return "'" + string(r) + "'"
}
// lexInlineTableEnd finishes the lexing of an inline table.
// It assumes that a '}' has just been consumed.
func lexInlineTableEnd(lx *lexer) stateFn {
lx.ignore()
lx.emit(itemInlineTableEnd)
return lx.pop()
}
// lexString consumes the inner contents of a string. It assumes that the
// beginning '"' has already been consumed and ignored.
func lexString(lx *lexer) stateFn {
r := lx.next()
switch {
case r == eof:
return lx.errorf(`unexpected EOF; expected '"'`)
case isControl(r) || r == '\r':
return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
case isNL(r):
return lx.errorf("strings cannot contain newlines")
case r == '\\':
lx.push(lexString)
return lexStringEscape
case r == stringEnd:
lx.backup()
lx.emit(itemString)
lx.next()
lx.ignore()
return lx.pop()
}
return lexString
}
// lexMultilineString consumes the inner contents of a string. It assumes that
// the beginning '"""' has already been consumed and ignored.
func lexMultilineString(lx *lexer) stateFn {
r := lx.next()
switch r {
case eof:
return lx.errorf(`unexpected EOF; expected '"""'`)
case '\r':
if lx.peek() != '\n' {
return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
}
return lexMultilineString
case '\\':
return lexMultilineStringEscape
case stringEnd:
/// Found " → try to read two more "".
if lx.accept(stringEnd) {
if lx.accept(stringEnd) {
/// Peek ahead: the string can contain " and "", including at the
/// end: """str"""""
/// 6 or more at the end, however, is an error.
if lx.peek() == stringEnd {
/// Check if we already lexed 5 's; if so we have 6 now, and
/// that's just too many man!
if strings.HasSuffix(lx.current(), `"""""`) {
return lx.errorf(`unexpected '""""""'`)
}
lx.backup()
lx.backup()
return lexMultilineString
}
lx.backup() /// backup: don't include the """ in the item.
lx.backup()
lx.backup()
lx.emit(itemMultilineString)
lx.next() /// Read over ''' again and discard it.
lx.next()
lx.next()
lx.ignore()
return lx.pop()
}
lx.backup()
}
}
if isControl(r) {
return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
}
return lexMultilineString
}
// lexRawString consumes a raw string. Nothing can be escaped in such a string.
// It assumes that the beginning "'" has already been consumed and ignored.
func lexRawString(lx *lexer) stateFn {
r := lx.next()
switch {
case r == eof:
return lx.errorf(`unexpected EOF; expected "'"`)
case isControl(r) || r == '\r':
return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
case isNL(r):
return lx.errorf("strings cannot contain newlines")
case r == rawStringEnd:
lx.backup()
lx.emit(itemRawString)
lx.next()
lx.ignore()
return lx.pop()
}
return lexRawString
}
// lexMultilineRawString consumes a raw string. Nothing can be escaped in such
// a string. It assumes that the beginning "'''" has already been consumed and
// ignored.
func lexMultilineRawString(lx *lexer) stateFn {
r := lx.next()
switch r {
case eof:
return lx.errorf(`unexpected EOF; expected "'''"`)
case '\r':
if lx.peek() != '\n' {
return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
}
return lexMultilineRawString
case rawStringEnd:
/// Found ' → try to read two more ''.
if lx.accept(rawStringEnd) {
if lx.accept(rawStringEnd) {
/// Peek ahead: the string can contain ' and '', including at the
/// end: '''str'''''
/// 6 or more at the end, however, is an error.
if lx.peek() == rawStringEnd {
/// Check if we already lexed 5 's; if so we have 6 now, and
/// that's just too many man!
if strings.HasSuffix(lx.current(), "'''''") {
return lx.errorf(`unexpected "''''''"`)
}
lx.backup()
lx.backup()
return lexMultilineRawString
}
lx.backup() /// backup: don't include the ''' in the item.
lx.backup()
lx.backup()
lx.emit(itemRawMultilineString)
lx.next() /// Read over ''' again and discard it.
lx.next()
lx.next()
lx.ignore()
return lx.pop()
}
lx.backup()
}
}
if isControl(r) {
return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
}
return lexMultilineRawString
}
// lexMultilineStringEscape consumes an escaped character. It assumes that the
// preceding '\\' has already been consumed.
func lexMultilineStringEscape(lx *lexer) stateFn {
// Handle the special case first:
if isNL(lx.next()) {
return lexMultilineString
}
lx.backup()
lx.push(lexMultilineString)
return lexStringEscape(lx)
}
func lexStringEscape(lx *lexer) stateFn {
r := lx.next()
switch r {
case 'b':
fallthrough
case 't':
fallthrough
case 'n':
fallthrough
case 'f':
fallthrough
case 'r':
fallthrough
case '"':
fallthrough
case ' ', '\t':
// Inside """ .. """ strings you can use \ to escape newlines, and any
// amount of whitespace can be between the \ and \n.
fallthrough
case '\\':
return lx.pop()
case 'u':
return lexShortUnicodeEscape
case 'U':
return lexLongUnicodeEscape
}
return lx.errorf("invalid escape character %q; only the following escape characters are allowed: "+
`\b, \t, \n, \f, \r, \", \\, \uXXXX, and \UXXXXXXXX`, r)
}
func lexShortUnicodeEscape(lx *lexer) stateFn {
var r rune
for i := 0; i < 4; i++ {
r = lx.next()
if !isHexadecimal(r) {
return lx.errorf(
`expected four hexadecimal digits after '\u', but got %q instead`,
lx.current())
}
}
return lx.pop()
}
func lexLongUnicodeEscape(lx *lexer) stateFn {
var r rune
for i := 0; i < 8; i++ {
r = lx.next()
if !isHexadecimal(r) {
return lx.errorf(
`expected eight hexadecimal digits after '\U', but got %q instead`,
lx.current())
}
}
return lx.pop()
}
// lexNumberOrDateStart processes the first character of a value which begins
// with a digit. It exists to catch values starting with '0', so that
// lexBaseNumberOrDate can differentiate base prefixed integers from other
// types.
func lexNumberOrDateStart(lx *lexer) stateFn {
r := lx.next()
switch r {
case '0':
return lexBaseNumberOrDate
}
if !isDigit(r) {
// The only way to reach this state is if the value starts
// with a digit, so specifically treat anything else as an
// error.
return lx.errorf("expected a digit but got %q", r)
}
return lexNumberOrDate
}
// lexNumberOrDate consumes either an integer, float or datetime.
func lexNumberOrDate(lx *lexer) stateFn {
r := lx.next()
if isDigit(r) {
return lexNumberOrDate
}
switch r {
case '-', ':':
return lexDatetime
case '_':
return lexDecimalNumber
case '.', 'e', 'E':
return lexFloat
}
lx.backup()
lx.emit(itemInteger)
return lx.pop()
}
// lexDatetime consumes a Datetime, to a first approximation.
// The parser validates that it matches one of the accepted formats.
func lexDatetime(lx *lexer) stateFn {
r := lx.next()
if isDigit(r) {
return lexDatetime
}
switch r {
case '-', ':', 'T', 't', ' ', '.', 'Z', 'z', '+':
return lexDatetime
}
lx.backup()
lx.emitTrim(itemDatetime)
return lx.pop()
}
// lexHexInteger consumes a hexadecimal integer after seeing the '0x' prefix.
func lexHexInteger(lx *lexer) stateFn {
r := lx.next()
if isHexadecimal(r) {
return lexHexInteger
}
switch r {
case '_':
return lexHexInteger
}
lx.backup()
lx.emit(itemInteger)
return lx.pop()
}
// lexOctalInteger consumes an octal integer after seeing the '0o' prefix.
func lexOctalInteger(lx *lexer) stateFn {
r := lx.next()
if isOctal(r) {
return lexOctalInteger
}
switch r {
case '_':
return lexOctalInteger
}
lx.backup()
lx.emit(itemInteger)
return lx.pop()
}
// lexBinaryInteger consumes a binary integer after seeing the '0b' prefix.
func lexBinaryInteger(lx *lexer) stateFn {
r := lx.next()
if isBinary(r) {
return lexBinaryInteger
}
switch r {
case '_':
return lexBinaryInteger
}
lx.backup()
lx.emit(itemInteger)
return lx.pop()
}
// lexDecimalNumber consumes a decimal float or integer.
func lexDecimalNumber(lx *lexer) stateFn {
r := lx.next()
if isDigit(r) {
return lexDecimalNumber
}
switch r {
case '.', 'e', 'E':
return lexFloat
case '_':
return lexDecimalNumber
}
lx.backup()
lx.emit(itemInteger)
return lx.pop()
}
// lexDecimalNumber consumes the first digit of a number beginning with a sign.
// It assumes the sign has already been consumed. Values which start with a sign
// are only allowed to be decimal integers or floats.
//
// The special "nan" and "inf" values are also recognized.
func lexDecimalNumberStart(lx *lexer) stateFn {
r := lx.next()
// Special error cases to give users better error messages
switch r {
case 'i':
if !lx.accept('n') || !lx.accept('f') {
return lx.errorf("invalid float: '%s'", lx.current())
}
lx.emit(itemFloat)
return lx.pop()
case 'n':
if !lx.accept('a') || !lx.accept('n') {
return lx.errorf("invalid float: '%s'", lx.current())
}
lx.emit(itemFloat)
return lx.pop()
case '0':
p := lx.peek()
switch p {
case 'b', 'o', 'x':
return lx.errorf("cannot use sign with non-decimal numbers: '%s%c'", lx.current(), p)
}
case '.':
return lx.errorf("floats must start with a digit, not '.'")
}
if isDigit(r) {
return lexDecimalNumber
}
return lx.errorf("expected a digit but got %q", r)
}
// lexBaseNumberOrDate differentiates between the possible values which
// start with '0'. It assumes that before reaching this state, the initial '0'
// has been consumed.
func lexBaseNumberOrDate(lx *lexer) stateFn {
r := lx.next()
// Note: All datetimes start with at least two digits, so we don't
// handle date characters (':', '-', etc.) here.
if isDigit(r) {
return lexNumberOrDate
}
switch r {
case '_':
// Can only be decimal, because there can't be an underscore
// between the '0' and the base designator, and dates can't
// contain underscores.
return lexDecimalNumber
case '.', 'e', 'E':
return lexFloat
case 'b':
r = lx.peek()
if !isBinary(r) {
lx.errorf("not a binary number: '%s%c'", lx.current(), r)
}
return lexBinaryInteger
case 'o':
r = lx.peek()
if !isOctal(r) {
lx.errorf("not an octal number: '%s%c'", lx.current(), r)
}
return lexOctalInteger
case 'x':
r = lx.peek()
if !isHexadecimal(r) {
lx.errorf("not a hexidecimal number: '%s%c'", lx.current(), r)
}
return lexHexInteger
}
lx.backup()
lx.emit(itemInteger)
return lx.pop()
}
// lexFloat consumes the elements of a float. It allows any sequence of
// float-like characters, so floats emitted by the lexer are only a first
// approximation and must be validated by the parser.
func lexFloat(lx *lexer) stateFn {
r := lx.next()
if isDigit(r) {
return lexFloat
}
switch r {
case '_', '.', '-', '+', 'e', 'E':
return lexFloat
}
lx.backup()
lx.emit(itemFloat)
return lx.pop()
}
// lexBool consumes a bool string: 'true' or 'false.
func lexBool(lx *lexer) stateFn {
var rs []rune
for {
r := lx.next()
if !unicode.IsLetter(r) {
lx.backup()
break
}
rs = append(rs, r)
}
s := string(rs)
switch s {
case "true", "false":
lx.emit(itemBool)
return lx.pop()
}
return lx.errorf("expected value but found %q instead", s)
}
// lexCommentStart begins the lexing of a comment. It will emit
// itemCommentStart and consume no characters, passing control to lexComment.
func lexCommentStart(lx *lexer) stateFn {
lx.ignore()
lx.emit(itemCommentStart)
return lexComment
}
// lexComment lexes an entire comment. It assumes that '#' has been consumed.
// It will consume *up to* the first newline character, and pass control
// back to the last state on the stack.
func lexComment(lx *lexer) stateFn {
switch r := lx.next(); {
case isNL(r) || r == eof:
lx.backup()
lx.emit(itemText)
return lx.pop()
case isControl(r):
return lx.errorf("control characters are not allowed inside comments: '0x%02x'", r)
default:
return lexComment
}
}
// lexSkip ignores all slurped input and moves on to the next state.
func lexSkip(lx *lexer, nextState stateFn) stateFn {
lx.ignore()
return nextState
}
// isWhitespace returns true if `r` is a whitespace character according
// to the spec.
func isWhitespace(r rune) bool {
return r == '\t' || r == ' '
}
func isNL(r rune) bool {
return r == '\n' || r == '\r'
}
// Control characters except \n, \t
func isControl(r rune) bool {
switch r {
case '\t', '\r', '\n':
return false
default:
return (r >= 0x00 && r <= 0x1f) || r == 0x7f
}
}
func isDigit(r rune) bool {
return r >= '0' && r <= '9'
}
func isHexadecimal(r rune) bool {
return (r >= '0' && r <= '9') ||
(r >= 'a' && r <= 'f') ||
(r >= 'A' && r <= 'F')
}
func isOctal(r rune) bool {
return r >= '0' && r <= '7'
}
func isBinary(r rune) bool {
return r == '0' || r == '1'
}
func isBareKeyChar(r rune) bool {
return (r >= 'A' && r <= 'Z') ||
(r >= 'a' && r <= 'z') ||
(r >= '0' && r <= '9') ||
r == '_' ||
r == '-'
}
func (s stateFn) String() string {
name := runtime.FuncForPC(reflect.ValueOf(s).Pointer()).Name()
if i := strings.LastIndexByte(name, '.'); i > -1 {
name = name[i+1:]
}
if s == nil {
name = "<nil>"
}
return name + "()"
}
func (itype itemType) String() string {
switch itype {
case itemError:
return "Error"
case itemNIL:
return "NIL"
case itemEOF:
return "EOF"
case itemText:
return "Text"
case itemString, itemRawString, itemMultilineString, itemRawMultilineString:
return "String"
case itemBool:
return "Bool"
case itemInteger:
return "Integer"
case itemFloat:
return "Float"
case itemDatetime:
return "DateTime"
case itemTableStart:
return "TableStart"
case itemTableEnd:
return "TableEnd"
case itemKeyStart:
return "KeyStart"
case itemKeyEnd:
return "KeyEnd"
case itemArray:
return "Array"
case itemArrayEnd:
return "ArrayEnd"
case itemCommentStart:
return "CommentStart"
case itemInlineTableStart:
return "InlineTableStart"
case itemInlineTableEnd:
return "InlineTableEnd"
}
panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype)))
}
func (item item) String() string {
return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val)
}