Path: blob/main/vendor/github.com/pelletier/go-toml/v2/unstable/scanner.go
2893 views
package unstable12import "github.com/pelletier/go-toml/v2/internal/characters"34func scanFollows(b []byte, pattern string) bool {5n := len(pattern)67return len(b) >= n && string(b[:n]) == pattern8}910func scanFollowsMultilineBasicStringDelimiter(b []byte) bool {11return scanFollows(b, `"""`)12}1314func scanFollowsMultilineLiteralStringDelimiter(b []byte) bool {15return scanFollows(b, `'''`)16}1718func scanFollowsTrue(b []byte) bool {19return scanFollows(b, `true`)20}2122func scanFollowsFalse(b []byte) bool {23return scanFollows(b, `false`)24}2526func scanFollowsInf(b []byte) bool {27return scanFollows(b, `inf`)28}2930func scanFollowsNan(b []byte) bool {31return scanFollows(b, `nan`)32}3334func scanUnquotedKey(b []byte) ([]byte, []byte) {35// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _36for i := 0; i < len(b); i++ {37if !isUnquotedKeyChar(b[i]) {38return b[:i], b[i:]39}40}4142return b, b[len(b):]43}4445func isUnquotedKeyChar(r byte) bool {46return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_'47}4849func scanLiteralString(b []byte) ([]byte, []byte, error) {50// literal-string = apostrophe *literal-char apostrophe51// apostrophe = %x27 ; ' apostrophe52// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii53for i := 1; i < len(b); {54switch b[i] {55case '\'':56return b[:i+1], b[i+1:], nil57case '\n', '\r':58return nil, nil, NewParserError(b[i:i+1], "literal strings cannot have new lines")59}60size := characters.Utf8ValidNext(b[i:])61if size == 0 {62return nil, nil, NewParserError(b[i:i+1], "invalid character")63}64i += size65}6667return nil, nil, NewParserError(b[len(b):], "unterminated literal string")68}6970func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) {71// ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body72// ml-literal-string-delim73// ml-literal-string-delim = 3apostrophe74// ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ]75//76// mll-content = mll-char / newline77// mll-char = %x09 / %x20-26 / %x28-7E / non-ascii78// mll-quotes = 1*2apostrophe79for i := 3; i < len(b); {80switch b[i] {81case '\'':82if scanFollowsMultilineLiteralStringDelimiter(b[i:]) {83i += 38485// At that point we found 3 apostrophe, and i is the86// index of the byte after the third one. The scanner87// needs to be eager, because there can be an extra 288// apostrophe that can be accepted at the end of the89// string.9091if i >= len(b) || b[i] != '\'' {92return b[:i], b[i:], nil93}94i++9596if i >= len(b) || b[i] != '\'' {97return b[:i], b[i:], nil98}99i++100101if i < len(b) && b[i] == '\'' {102return nil, nil, NewParserError(b[i-3:i+1], "''' not allowed in multiline literal string")103}104105return b[:i], b[i:], nil106}107case '\r':108if len(b) < i+2 {109return nil, nil, NewParserError(b[len(b):], `need a \n after \r`)110}111if b[i+1] != '\n' {112return nil, nil, NewParserError(b[i:i+2], `need a \n after \r`)113}114i += 2 // skip the \n115continue116}117size := characters.Utf8ValidNext(b[i:])118if size == 0 {119return nil, nil, NewParserError(b[i:i+1], "invalid character")120}121i += size122}123124return nil, nil, NewParserError(b[len(b):], `multiline literal string not terminated by '''`)125}126127func scanWindowsNewline(b []byte) ([]byte, []byte, error) {128const lenCRLF = 2129if len(b) < lenCRLF {130return nil, nil, NewParserError(b, "windows new line expected")131}132133if b[1] != '\n' {134return nil, nil, NewParserError(b, `windows new line should be \r\n`)135}136137return b[:lenCRLF], b[lenCRLF:], nil138}139140func scanWhitespace(b []byte) ([]byte, []byte) {141for i := 0; i < len(b); i++ {142switch b[i] {143case ' ', '\t':144continue145default:146return b[:i], b[i:]147}148}149150return b, b[len(b):]151}152153func scanComment(b []byte) ([]byte, []byte, error) {154// comment-start-symbol = %x23 ; #155// non-ascii = %x80-D7FF / %xE000-10FFFF156// non-eol = %x09 / %x20-7F / non-ascii157//158// comment = comment-start-symbol *non-eol159160for i := 1; i < len(b); {161if b[i] == '\n' {162return b[:i], b[i:], nil163}164if b[i] == '\r' {165if i+1 < len(b) && b[i+1] == '\n' {166return b[:i+1], b[i+1:], nil167}168return nil, nil, NewParserError(b[i:i+1], "invalid character in comment")169}170size := characters.Utf8ValidNext(b[i:])171if size == 0 {172return nil, nil, NewParserError(b[i:i+1], "invalid character in comment")173}174175i += size176}177178return b, b[len(b):], nil179}180181func scanBasicString(b []byte) ([]byte, bool, []byte, error) {182// basic-string = quotation-mark *basic-char quotation-mark183// quotation-mark = %x22 ; "184// basic-char = basic-unescaped / escaped185// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii186// escaped = escape escape-seq-char187escaped := false188i := 1189190for ; i < len(b); i++ {191switch b[i] {192case '"':193return b[:i+1], escaped, b[i+1:], nil194case '\n', '\r':195return nil, escaped, nil, NewParserError(b[i:i+1], "basic strings cannot have new lines")196case '\\':197if len(b) < i+2 {198return nil, escaped, nil, NewParserError(b[i:i+1], "need a character after \\")199}200escaped = true201i++ // skip the next character202}203}204205return nil, escaped, nil, NewParserError(b[len(b):], `basic string not terminated by "`)206}207208func scanMultilineBasicString(b []byte) ([]byte, bool, []byte, error) {209// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body210// ml-basic-string-delim211// ml-basic-string-delim = 3quotation-mark212// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]213//214// mlb-content = mlb-char / newline / mlb-escaped-nl215// mlb-char = mlb-unescaped / escaped216// mlb-quotes = 1*2quotation-mark217// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii218// mlb-escaped-nl = escape ws newline *( wschar / newline )219220escaped := false221i := 3222223for ; i < len(b); i++ {224switch b[i] {225case '"':226if scanFollowsMultilineBasicStringDelimiter(b[i:]) {227i += 3228229// At that point we found 3 apostrophe, and i is the230// index of the byte after the third one. The scanner231// needs to be eager, because there can be an extra 2232// apostrophe that can be accepted at the end of the233// string.234235if i >= len(b) || b[i] != '"' {236return b[:i], escaped, b[i:], nil237}238i++239240if i >= len(b) || b[i] != '"' {241return b[:i], escaped, b[i:], nil242}243i++244245if i < len(b) && b[i] == '"' {246return nil, escaped, nil, NewParserError(b[i-3:i+1], `""" not allowed in multiline basic string`)247}248249return b[:i], escaped, b[i:], nil250}251case '\\':252if len(b) < i+2 {253return nil, escaped, nil, NewParserError(b[len(b):], "need a character after \\")254}255escaped = true256i++ // skip the next character257case '\r':258if len(b) < i+2 {259return nil, escaped, nil, NewParserError(b[len(b):], `need a \n after \r`)260}261if b[i+1] != '\n' {262return nil, escaped, nil, NewParserError(b[i:i+2], `need a \n after \r`)263}264i++ // skip the \n265}266}267268return nil, escaped, nil, NewParserError(b[len(b):], `multiline basic string not terminated by """`)269}270271272