Path: blob/main/vendor/go.yaml.in/yaml/v3/scannerc.go
2872 views
//1// Copyright (c) 2011-2019 Canonical Ltd2// Copyright (c) 2006-2010 Kirill Simonov3//4// Permission is hereby granted, free of charge, to any person obtaining a copy of5// this software and associated documentation files (the "Software"), to deal in6// the Software without restriction, including without limitation the rights to7// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies8// of the Software, and to permit persons to whom the Software is furnished to do9// so, subject to the following conditions:10//11// The above copyright notice and this permission notice shall be included in all12// copies or substantial portions of the Software.13//14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE17// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20// SOFTWARE.2122package yaml2324import (25"bytes"26"fmt"27)2829// Introduction30// ************31//32// The following notes assume that you are familiar with the YAML specification33// (http://yaml.org/spec/1.2/spec.html). We mostly follow it, although in34// some cases we are less restrictive that it requires.35//36// The process of transforming a YAML stream into a sequence of events is37// divided on two steps: Scanning and Parsing.38//39// The Scanner transforms the input stream into a sequence of tokens, while the40// parser transform the sequence of tokens produced by the Scanner into a41// sequence of parsing events.42//43// The Scanner is rather clever and complicated. The Parser, on the contrary,44// is a straightforward implementation of a recursive-descendant parser (or,45// LL(1) parser, as it is usually called).46//47// Actually there are two issues of Scanning that might be called "clever", the48// rest is quite straightforward. The issues are "block collection start" and49// "simple keys". Both issues are explained below in details.50//51// Here the Scanning step is explained and implemented. We start with the list52// of all the tokens produced by the Scanner together with short descriptions.53//54// Now, tokens:55//56// STREAM-START(encoding) # The stream start.57// STREAM-END # The stream end.58// VERSION-DIRECTIVE(major,minor) # The '%YAML' directive.59// TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive.60// DOCUMENT-START # '---'61// DOCUMENT-END # '...'62// BLOCK-SEQUENCE-START # Indentation increase denoting a block63// BLOCK-MAPPING-START # sequence or a block mapping.64// BLOCK-END # Indentation decrease.65// FLOW-SEQUENCE-START # '['66// FLOW-SEQUENCE-END # ']'67// BLOCK-SEQUENCE-START # '{'68// BLOCK-SEQUENCE-END # '}'69// BLOCK-ENTRY # '-'70// FLOW-ENTRY # ','71// KEY # '?' or nothing (simple keys).72// VALUE # ':'73// ALIAS(anchor) # '*anchor'74// ANCHOR(anchor) # '&anchor'75// TAG(handle,suffix) # '!handle!suffix'76// SCALAR(value,style) # A scalar.77//78// The following two tokens are "virtual" tokens denoting the beginning and the79// end of the stream:80//81// STREAM-START(encoding)82// STREAM-END83//84// We pass the information about the input stream encoding with the85// STREAM-START token.86//87// The next two tokens are responsible for tags:88//89// VERSION-DIRECTIVE(major,minor)90// TAG-DIRECTIVE(handle,prefix)91//92// Example:93//94// %YAML 1.195// %TAG ! !foo96// %TAG !yaml! tag:yaml.org,2002:97// ---98//99// The correspoding sequence of tokens:100//101// STREAM-START(utf-8)102// VERSION-DIRECTIVE(1,1)103// TAG-DIRECTIVE("!","!foo")104// TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")105// DOCUMENT-START106// STREAM-END107//108// Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole109// line.110//111// The document start and end indicators are represented by:112//113// DOCUMENT-START114// DOCUMENT-END115//116// Note that if a YAML stream contains an implicit document (without '---'117// and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be118// produced.119//120// In the following examples, we present whole documents together with the121// produced tokens.122//123// 1. An implicit document:124//125// 'a scalar'126//127// Tokens:128//129// STREAM-START(utf-8)130// SCALAR("a scalar",single-quoted)131// STREAM-END132//133// 2. An explicit document:134//135// ---136// 'a scalar'137// ...138//139// Tokens:140//141// STREAM-START(utf-8)142// DOCUMENT-START143// SCALAR("a scalar",single-quoted)144// DOCUMENT-END145// STREAM-END146//147// 3. Several documents in a stream:148//149// 'a scalar'150// ---151// 'another scalar'152// ---153// 'yet another scalar'154//155// Tokens:156//157// STREAM-START(utf-8)158// SCALAR("a scalar",single-quoted)159// DOCUMENT-START160// SCALAR("another scalar",single-quoted)161// DOCUMENT-START162// SCALAR("yet another scalar",single-quoted)163// STREAM-END164//165// We have already introduced the SCALAR token above. The following tokens are166// used to describe aliases, anchors, tag, and scalars:167//168// ALIAS(anchor)169// ANCHOR(anchor)170// TAG(handle,suffix)171// SCALAR(value,style)172//173// The following series of examples illustrate the usage of these tokens:174//175// 1. A recursive sequence:176//177// &A [ *A ]178//179// Tokens:180//181// STREAM-START(utf-8)182// ANCHOR("A")183// FLOW-SEQUENCE-START184// ALIAS("A")185// FLOW-SEQUENCE-END186// STREAM-END187//188// 2. A tagged scalar:189//190// !!float "3.14" # A good approximation.191//192// Tokens:193//194// STREAM-START(utf-8)195// TAG("!!","float")196// SCALAR("3.14",double-quoted)197// STREAM-END198//199// 3. Various scalar styles:200//201// --- # Implicit empty plain scalars do not produce tokens.202// --- a plain scalar203// --- 'a single-quoted scalar'204// --- "a double-quoted scalar"205// --- |-206// a literal scalar207// --- >-208// a folded209// scalar210//211// Tokens:212//213// STREAM-START(utf-8)214// DOCUMENT-START215// DOCUMENT-START216// SCALAR("a plain scalar",plain)217// DOCUMENT-START218// SCALAR("a single-quoted scalar",single-quoted)219// DOCUMENT-START220// SCALAR("a double-quoted scalar",double-quoted)221// DOCUMENT-START222// SCALAR("a literal scalar",literal)223// DOCUMENT-START224// SCALAR("a folded scalar",folded)225// STREAM-END226//227// Now it's time to review collection-related tokens. We will start with228// flow collections:229//230// FLOW-SEQUENCE-START231// FLOW-SEQUENCE-END232// FLOW-MAPPING-START233// FLOW-MAPPING-END234// FLOW-ENTRY235// KEY236// VALUE237//238// The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and239// FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'240// correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the241// indicators '?' and ':', which are used for denoting mapping keys and values,242// are represented by the KEY and VALUE tokens.243//244// The following examples show flow collections:245//246// 1. A flow sequence:247//248// [item 1, item 2, item 3]249//250// Tokens:251//252// STREAM-START(utf-8)253// FLOW-SEQUENCE-START254// SCALAR("item 1",plain)255// FLOW-ENTRY256// SCALAR("item 2",plain)257// FLOW-ENTRY258// SCALAR("item 3",plain)259// FLOW-SEQUENCE-END260// STREAM-END261//262// 2. A flow mapping:263//264// {265// a simple key: a value, # Note that the KEY token is produced.266// ? a complex key: another value,267// }268//269// Tokens:270//271// STREAM-START(utf-8)272// FLOW-MAPPING-START273// KEY274// SCALAR("a simple key",plain)275// VALUE276// SCALAR("a value",plain)277// FLOW-ENTRY278// KEY279// SCALAR("a complex key",plain)280// VALUE281// SCALAR("another value",plain)282// FLOW-ENTRY283// FLOW-MAPPING-END284// STREAM-END285//286// A simple key is a key which is not denoted by the '?' indicator. Note that287// the Scanner still produce the KEY token whenever it encounters a simple key.288//289// For scanning block collections, the following tokens are used (note that we290// repeat KEY and VALUE here):291//292// BLOCK-SEQUENCE-START293// BLOCK-MAPPING-START294// BLOCK-END295// BLOCK-ENTRY296// KEY297// VALUE298//299// The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation300// increase that precedes a block collection (cf. the INDENT token in Python).301// The token BLOCK-END denote indentation decrease that ends a block collection302// (cf. the DEDENT token in Python). However YAML has some syntax pecularities303// that makes detections of these tokens more complex.304//305// The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators306// '-', '?', and ':' correspondingly.307//308// The following examples show how the tokens BLOCK-SEQUENCE-START,309// BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:310//311// 1. Block sequences:312//313// - item 1314// - item 2315// -316// - item 3.1317// - item 3.2318// -319// key 1: value 1320// key 2: value 2321//322// Tokens:323//324// STREAM-START(utf-8)325// BLOCK-SEQUENCE-START326// BLOCK-ENTRY327// SCALAR("item 1",plain)328// BLOCK-ENTRY329// SCALAR("item 2",plain)330// BLOCK-ENTRY331// BLOCK-SEQUENCE-START332// BLOCK-ENTRY333// SCALAR("item 3.1",plain)334// BLOCK-ENTRY335// SCALAR("item 3.2",plain)336// BLOCK-END337// BLOCK-ENTRY338// BLOCK-MAPPING-START339// KEY340// SCALAR("key 1",plain)341// VALUE342// SCALAR("value 1",plain)343// KEY344// SCALAR("key 2",plain)345// VALUE346// SCALAR("value 2",plain)347// BLOCK-END348// BLOCK-END349// STREAM-END350//351// 2. Block mappings:352//353// a simple key: a value # The KEY token is produced here.354// ? a complex key355// : another value356// a mapping:357// key 1: value 1358// key 2: value 2359// a sequence:360// - item 1361// - item 2362//363// Tokens:364//365// STREAM-START(utf-8)366// BLOCK-MAPPING-START367// KEY368// SCALAR("a simple key",plain)369// VALUE370// SCALAR("a value",plain)371// KEY372// SCALAR("a complex key",plain)373// VALUE374// SCALAR("another value",plain)375// KEY376// SCALAR("a mapping",plain)377// BLOCK-MAPPING-START378// KEY379// SCALAR("key 1",plain)380// VALUE381// SCALAR("value 1",plain)382// KEY383// SCALAR("key 2",plain)384// VALUE385// SCALAR("value 2",plain)386// BLOCK-END387// KEY388// SCALAR("a sequence",plain)389// VALUE390// BLOCK-SEQUENCE-START391// BLOCK-ENTRY392// SCALAR("item 1",plain)393// BLOCK-ENTRY394// SCALAR("item 2",plain)395// BLOCK-END396// BLOCK-END397// STREAM-END398//399// YAML does not always require to start a new block collection from a new400// line. If the current line contains only '-', '?', and ':' indicators, a new401// block collection may start at the current line. The following examples402// illustrate this case:403//404// 1. Collections in a sequence:405//406// - - item 1407// - item 2408// - key 1: value 1409// key 2: value 2410// - ? complex key411// : complex value412//413// Tokens:414//415// STREAM-START(utf-8)416// BLOCK-SEQUENCE-START417// BLOCK-ENTRY418// BLOCK-SEQUENCE-START419// BLOCK-ENTRY420// SCALAR("item 1",plain)421// BLOCK-ENTRY422// SCALAR("item 2",plain)423// BLOCK-END424// BLOCK-ENTRY425// BLOCK-MAPPING-START426// KEY427// SCALAR("key 1",plain)428// VALUE429// SCALAR("value 1",plain)430// KEY431// SCALAR("key 2",plain)432// VALUE433// SCALAR("value 2",plain)434// BLOCK-END435// BLOCK-ENTRY436// BLOCK-MAPPING-START437// KEY438// SCALAR("complex key")439// VALUE440// SCALAR("complex value")441// BLOCK-END442// BLOCK-END443// STREAM-END444//445// 2. Collections in a mapping:446//447// ? a sequence448// : - item 1449// - item 2450// ? a mapping451// : key 1: value 1452// key 2: value 2453//454// Tokens:455//456// STREAM-START(utf-8)457// BLOCK-MAPPING-START458// KEY459// SCALAR("a sequence",plain)460// VALUE461// BLOCK-SEQUENCE-START462// BLOCK-ENTRY463// SCALAR("item 1",plain)464// BLOCK-ENTRY465// SCALAR("item 2",plain)466// BLOCK-END467// KEY468// SCALAR("a mapping",plain)469// VALUE470// BLOCK-MAPPING-START471// KEY472// SCALAR("key 1",plain)473// VALUE474// SCALAR("value 1",plain)475// KEY476// SCALAR("key 2",plain)477// VALUE478// SCALAR("value 2",plain)479// BLOCK-END480// BLOCK-END481// STREAM-END482//483// YAML also permits non-indented sequences if they are included into a block484// mapping. In this case, the token BLOCK-SEQUENCE-START is not produced:485//486// key:487// - item 1 # BLOCK-SEQUENCE-START is NOT produced here.488// - item 2489//490// Tokens:491//492// STREAM-START(utf-8)493// BLOCK-MAPPING-START494// KEY495// SCALAR("key",plain)496// VALUE497// BLOCK-ENTRY498// SCALAR("item 1",plain)499// BLOCK-ENTRY500// SCALAR("item 2",plain)501// BLOCK-END502//503504// Ensure that the buffer contains the required number of characters.505// Return true on success, false on failure (reader error or memory error).506func cache(parser *yaml_parser_t, length int) bool {507// [Go] This was inlined: !cache(A, B) -> unread < B && !update(A, B)508return parser.unread >= length || yaml_parser_update_buffer(parser, length)509}510511// Advance the buffer pointer.512func skip(parser *yaml_parser_t) {513if !is_blank(parser.buffer, parser.buffer_pos) {514parser.newlines = 0515}516parser.mark.index++517parser.mark.column++518parser.unread--519parser.buffer_pos += width(parser.buffer[parser.buffer_pos])520}521522func skip_line(parser *yaml_parser_t) {523if is_crlf(parser.buffer, parser.buffer_pos) {524parser.mark.index += 2525parser.mark.column = 0526parser.mark.line++527parser.unread -= 2528parser.buffer_pos += 2529parser.newlines++530} else if is_break(parser.buffer, parser.buffer_pos) {531parser.mark.index++532parser.mark.column = 0533parser.mark.line++534parser.unread--535parser.buffer_pos += width(parser.buffer[parser.buffer_pos])536parser.newlines++537}538}539540// Copy a character to a string buffer and advance pointers.541func read(parser *yaml_parser_t, s []byte) []byte {542if !is_blank(parser.buffer, parser.buffer_pos) {543parser.newlines = 0544}545w := width(parser.buffer[parser.buffer_pos])546if w == 0 {547panic("invalid character sequence")548}549if len(s) == 0 {550s = make([]byte, 0, 32)551}552if w == 1 && len(s)+w <= cap(s) {553s = s[:len(s)+1]554s[len(s)-1] = parser.buffer[parser.buffer_pos]555parser.buffer_pos++556} else {557s = append(s, parser.buffer[parser.buffer_pos:parser.buffer_pos+w]...)558parser.buffer_pos += w559}560parser.mark.index++561parser.mark.column++562parser.unread--563return s564}565566// Copy a line break character to a string buffer and advance pointers.567func read_line(parser *yaml_parser_t, s []byte) []byte {568buf := parser.buffer569pos := parser.buffer_pos570switch {571case buf[pos] == '\r' && buf[pos+1] == '\n':572// CR LF . LF573s = append(s, '\n')574parser.buffer_pos += 2575parser.mark.index++576parser.unread--577case buf[pos] == '\r' || buf[pos] == '\n':578// CR|LF . LF579s = append(s, '\n')580parser.buffer_pos += 1581case buf[pos] == '\xC2' && buf[pos+1] == '\x85':582// NEL . LF583s = append(s, '\n')584parser.buffer_pos += 2585case buf[pos] == '\xE2' && buf[pos+1] == '\x80' && (buf[pos+2] == '\xA8' || buf[pos+2] == '\xA9'):586// LS|PS . LS|PS587s = append(s, buf[parser.buffer_pos:pos+3]...)588parser.buffer_pos += 3589default:590return s591}592parser.mark.index++593parser.mark.column = 0594parser.mark.line++595parser.unread--596parser.newlines++597return s598}599600// Get the next token.601func yaml_parser_scan(parser *yaml_parser_t, token *yaml_token_t) bool {602// Erase the token object.603*token = yaml_token_t{} // [Go] Is this necessary?604605// No tokens after STREAM-END or error.606if parser.stream_end_produced || parser.error != yaml_NO_ERROR {607return true608}609610// Ensure that the tokens queue contains enough tokens.611if !parser.token_available {612if !yaml_parser_fetch_more_tokens(parser) {613return false614}615}616617// Fetch the next token from the queue.618*token = parser.tokens[parser.tokens_head]619parser.tokens_head++620parser.tokens_parsed++621parser.token_available = false622623if token.typ == yaml_STREAM_END_TOKEN {624parser.stream_end_produced = true625}626return true627}628629// Set the scanner error and return false.630func yaml_parser_set_scanner_error(parser *yaml_parser_t, context string, context_mark yaml_mark_t, problem string) bool {631parser.error = yaml_SCANNER_ERROR632parser.context = context633parser.context_mark = context_mark634parser.problem = problem635parser.problem_mark = parser.mark636return false637}638639func yaml_parser_set_scanner_tag_error(parser *yaml_parser_t, directive bool, context_mark yaml_mark_t, problem string) bool {640context := "while parsing a tag"641if directive {642context = "while parsing a %TAG directive"643}644return yaml_parser_set_scanner_error(parser, context, context_mark, problem)645}646647func trace(args ...interface{}) func() {648pargs := append([]interface{}{"+++"}, args...)649fmt.Println(pargs...)650pargs = append([]interface{}{"---"}, args...)651return func() { fmt.Println(pargs...) }652}653654// Ensure that the tokens queue contains at least one token which can be655// returned to the Parser.656func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool {657// While we need more tokens to fetch, do it.658for {659// [Go] The comment parsing logic requires a lookahead of two tokens660// so that foot comments may be parsed in time of associating them661// with the tokens that are parsed before them, and also for line662// comments to be transformed into head comments in some edge cases.663if parser.tokens_head < len(parser.tokens)-2 {664// If a potential simple key is at the head position, we need to fetch665// the next token to disambiguate it.666head_tok_idx, ok := parser.simple_keys_by_tok[parser.tokens_parsed]667if !ok {668break669} else if valid, ok := yaml_simple_key_is_valid(parser, &parser.simple_keys[head_tok_idx]); !ok {670return false671} else if !valid {672break673}674}675// Fetch the next token.676if !yaml_parser_fetch_next_token(parser) {677return false678}679}680681parser.token_available = true682return true683}684685// The dispatcher for token fetchers.686func yaml_parser_fetch_next_token(parser *yaml_parser_t) (ok bool) {687// Ensure that the buffer is initialized.688if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {689return false690}691692// Check if we just started scanning. Fetch STREAM-START then.693if !parser.stream_start_produced {694return yaml_parser_fetch_stream_start(parser)695}696697scan_mark := parser.mark698699// Eat whitespaces and comments until we reach the next token.700if !yaml_parser_scan_to_next_token(parser) {701return false702}703704// [Go] While unrolling indents, transform the head comments of prior705// indentation levels observed after scan_start into foot comments at706// the respective indexes.707708// Check the indentation level against the current column.709if !yaml_parser_unroll_indent(parser, parser.mark.column, scan_mark) {710return false711}712713// Ensure that the buffer contains at least 4 characters. 4 is the length714// of the longest indicators ('--- ' and '... ').715if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) {716return false717}718719// Is it the end of the stream?720if is_z(parser.buffer, parser.buffer_pos) {721return yaml_parser_fetch_stream_end(parser)722}723724// Is it a directive?725if parser.mark.column == 0 && parser.buffer[parser.buffer_pos] == '%' {726return yaml_parser_fetch_directive(parser)727}728729buf := parser.buffer730pos := parser.buffer_pos731732// Is it the document start indicator?733if parser.mark.column == 0 && buf[pos] == '-' && buf[pos+1] == '-' && buf[pos+2] == '-' && is_blankz(buf, pos+3) {734return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_START_TOKEN)735}736737// Is it the document end indicator?738if parser.mark.column == 0 && buf[pos] == '.' && buf[pos+1] == '.' && buf[pos+2] == '.' && is_blankz(buf, pos+3) {739return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_END_TOKEN)740}741742comment_mark := parser.mark743if len(parser.tokens) > 0 && (parser.flow_level == 0 && buf[pos] == ':' || parser.flow_level > 0 && buf[pos] == ',') {744// Associate any following comments with the prior token.745comment_mark = parser.tokens[len(parser.tokens)-1].start_mark746}747defer func() {748if !ok {749return750}751if len(parser.tokens) > 0 && parser.tokens[len(parser.tokens)-1].typ == yaml_BLOCK_ENTRY_TOKEN {752// Sequence indicators alone have no line comments. It becomes753// a head comment for whatever follows.754return755}756if !yaml_parser_scan_line_comment(parser, comment_mark) {757ok = false758return759}760}()761762// Is it the flow sequence start indicator?763if buf[pos] == '[' {764return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_SEQUENCE_START_TOKEN)765}766767// Is it the flow mapping start indicator?768if parser.buffer[parser.buffer_pos] == '{' {769return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_MAPPING_START_TOKEN)770}771772// Is it the flow sequence end indicator?773if parser.buffer[parser.buffer_pos] == ']' {774return yaml_parser_fetch_flow_collection_end(parser,775yaml_FLOW_SEQUENCE_END_TOKEN)776}777778// Is it the flow mapping end indicator?779if parser.buffer[parser.buffer_pos] == '}' {780return yaml_parser_fetch_flow_collection_end(parser,781yaml_FLOW_MAPPING_END_TOKEN)782}783784// Is it the flow entry indicator?785if parser.buffer[parser.buffer_pos] == ',' {786return yaml_parser_fetch_flow_entry(parser)787}788789// Is it the block entry indicator?790if parser.buffer[parser.buffer_pos] == '-' && is_blankz(parser.buffer, parser.buffer_pos+1) {791return yaml_parser_fetch_block_entry(parser)792}793794// Is it the key indicator?795if parser.buffer[parser.buffer_pos] == '?' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) {796return yaml_parser_fetch_key(parser)797}798799// Is it the value indicator?800if parser.buffer[parser.buffer_pos] == ':' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) {801return yaml_parser_fetch_value(parser)802}803804// Is it an alias?805if parser.buffer[parser.buffer_pos] == '*' {806return yaml_parser_fetch_anchor(parser, yaml_ALIAS_TOKEN)807}808809// Is it an anchor?810if parser.buffer[parser.buffer_pos] == '&' {811return yaml_parser_fetch_anchor(parser, yaml_ANCHOR_TOKEN)812}813814// Is it a tag?815if parser.buffer[parser.buffer_pos] == '!' {816return yaml_parser_fetch_tag(parser)817}818819// Is it a literal scalar?820if parser.buffer[parser.buffer_pos] == '|' && parser.flow_level == 0 {821return yaml_parser_fetch_block_scalar(parser, true)822}823824// Is it a folded scalar?825if parser.buffer[parser.buffer_pos] == '>' && parser.flow_level == 0 {826return yaml_parser_fetch_block_scalar(parser, false)827}828829// Is it a single-quoted scalar?830if parser.buffer[parser.buffer_pos] == '\'' {831return yaml_parser_fetch_flow_scalar(parser, true)832}833834// Is it a double-quoted scalar?835if parser.buffer[parser.buffer_pos] == '"' {836return yaml_parser_fetch_flow_scalar(parser, false)837}838839// Is it a plain scalar?840//841// A plain scalar may start with any non-blank characters except842//843// '-', '?', ':', ',', '[', ']', '{', '}',844// '#', '&', '*', '!', '|', '>', '\'', '\"',845// '%', '@', '`'.846//847// In the block context (and, for the '-' indicator, in the flow context848// too), it may also start with the characters849//850// '-', '?', ':'851//852// if it is followed by a non-space character.853//854// The last rule is more restrictive than the specification requires.855// [Go] TODO Make this logic more reasonable.856//switch parser.buffer[parser.buffer_pos] {857//case '-', '?', ':', ',', '?', '-', ',', ':', ']', '[', '}', '{', '&', '#', '!', '*', '>', '|', '"', '\'', '@', '%', '-', '`':858//}859if !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '-' ||860parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':' ||861parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '[' ||862parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' ||863parser.buffer[parser.buffer_pos] == '}' || parser.buffer[parser.buffer_pos] == '#' ||864parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '*' ||865parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '|' ||866parser.buffer[parser.buffer_pos] == '>' || parser.buffer[parser.buffer_pos] == '\'' ||867parser.buffer[parser.buffer_pos] == '"' || parser.buffer[parser.buffer_pos] == '%' ||868parser.buffer[parser.buffer_pos] == '@' || parser.buffer[parser.buffer_pos] == '`') ||869(parser.buffer[parser.buffer_pos] == '-' && !is_blank(parser.buffer, parser.buffer_pos+1)) ||870(parser.flow_level == 0 &&871(parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':') &&872!is_blankz(parser.buffer, parser.buffer_pos+1)) {873return yaml_parser_fetch_plain_scalar(parser)874}875876// If we don't determine the token type so far, it is an error.877return yaml_parser_set_scanner_error(parser,878"while scanning for the next token", parser.mark,879"found character that cannot start any token")880}881882func yaml_simple_key_is_valid(parser *yaml_parser_t, simple_key *yaml_simple_key_t) (valid, ok bool) {883if !simple_key.possible {884return false, true885}886887// The 1.2 specification says:888//889// "If the ? indicator is omitted, parsing needs to see past the890// implicit key to recognize it as such. To limit the amount of891// lookahead required, the “:” indicator must appear at most 1024892// Unicode characters beyond the start of the key. In addition, the key893// is restricted to a single line."894//895if simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index {896// Check if the potential simple key to be removed is required.897if simple_key.required {898return false, yaml_parser_set_scanner_error(parser,899"while scanning a simple key", simple_key.mark,900"could not find expected ':'")901}902simple_key.possible = false903return false, true904}905return true, true906}907908// Check if a simple key may start at the current position and add it if909// needed.910func yaml_parser_save_simple_key(parser *yaml_parser_t) bool {911// A simple key is required at the current position if the scanner is in912// the block context and the current column coincides with the indentation913// level.914915required := parser.flow_level == 0 && parser.indent == parser.mark.column916917//918// If the current position may start a simple key, save it.919//920if parser.simple_key_allowed {921simple_key := yaml_simple_key_t{922possible: true,923required: required,924token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head),925mark: parser.mark,926}927928if !yaml_parser_remove_simple_key(parser) {929return false930}931parser.simple_keys[len(parser.simple_keys)-1] = simple_key932parser.simple_keys_by_tok[simple_key.token_number] = len(parser.simple_keys) - 1933}934return true935}936937// Remove a potential simple key at the current flow level.938func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool {939i := len(parser.simple_keys) - 1940if parser.simple_keys[i].possible {941// If the key is required, it is an error.942if parser.simple_keys[i].required {943return yaml_parser_set_scanner_error(parser,944"while scanning a simple key", parser.simple_keys[i].mark,945"could not find expected ':'")946}947// Remove the key from the stack.948parser.simple_keys[i].possible = false949delete(parser.simple_keys_by_tok, parser.simple_keys[i].token_number)950}951return true952}953954// max_flow_level limits the flow_level955const max_flow_level = 10000956957// Increase the flow level and resize the simple key list if needed.958func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool {959// Reset the simple key on the next level.960parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{961possible: false,962required: false,963token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head),964mark: parser.mark,965})966967// Increase the flow level.968parser.flow_level++969if parser.flow_level > max_flow_level {970return yaml_parser_set_scanner_error(parser,971"while increasing flow level", parser.simple_keys[len(parser.simple_keys)-1].mark,972fmt.Sprintf("exceeded max depth of %d", max_flow_level))973}974return true975}976977// Decrease the flow level.978func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool {979if parser.flow_level > 0 {980parser.flow_level--981last := len(parser.simple_keys) - 1982delete(parser.simple_keys_by_tok, parser.simple_keys[last].token_number)983parser.simple_keys = parser.simple_keys[:last]984}985return true986}987988// max_indents limits the indents stack size989const max_indents = 10000990991// Push the current indentation level to the stack and set the new level992// the current column is greater than the indentation level. In this case,993// append or insert the specified token into the token queue.994func yaml_parser_roll_indent(parser *yaml_parser_t, column, number int, typ yaml_token_type_t, mark yaml_mark_t) bool {995// In the flow context, do nothing.996if parser.flow_level > 0 {997return true998}9991000if parser.indent < column {1001// Push the current indentation level to the stack and set the new1002// indentation level.1003parser.indents = append(parser.indents, parser.indent)1004parser.indent = column1005if len(parser.indents) > max_indents {1006return yaml_parser_set_scanner_error(parser,1007"while increasing indent level", parser.simple_keys[len(parser.simple_keys)-1].mark,1008fmt.Sprintf("exceeded max depth of %d", max_indents))1009}10101011// Create a token and insert it into the queue.1012token := yaml_token_t{1013typ: typ,1014start_mark: mark,1015end_mark: mark,1016}1017if number > -1 {1018number -= parser.tokens_parsed1019}1020yaml_insert_token(parser, number, &token)1021}1022return true1023}10241025// Pop indentation levels from the indents stack until the current level1026// becomes less or equal to the column. For each indentation level, append1027// the BLOCK-END token.1028func yaml_parser_unroll_indent(parser *yaml_parser_t, column int, scan_mark yaml_mark_t) bool {1029// In the flow context, do nothing.1030if parser.flow_level > 0 {1031return true1032}10331034block_mark := scan_mark1035block_mark.index--10361037// Loop through the indentation levels in the stack.1038for parser.indent > column {10391040// [Go] Reposition the end token before potential following1041// foot comments of parent blocks. For that, search1042// backwards for recent comments that were at the same1043// indent as the block that is ending now.1044stop_index := block_mark.index1045for i := len(parser.comments) - 1; i >= 0; i-- {1046comment := &parser.comments[i]10471048if comment.end_mark.index < stop_index {1049// Don't go back beyond the start of the comment/whitespace scan, unless column < 0.1050// If requested indent column is < 0, then the document is over and everything else1051// is a foot anyway.1052break1053}1054if comment.start_mark.column == parser.indent+1 {1055// This is a good match. But maybe there's a former comment1056// at that same indent level, so keep searching.1057block_mark = comment.start_mark1058}10591060// While the end of the former comment matches with1061// the start of the following one, we know there's1062// nothing in between and scanning is still safe.1063stop_index = comment.scan_mark.index1064}10651066// Create a token and append it to the queue.1067token := yaml_token_t{1068typ: yaml_BLOCK_END_TOKEN,1069start_mark: block_mark,1070end_mark: block_mark,1071}1072yaml_insert_token(parser, -1, &token)10731074// Pop the indentation level.1075parser.indent = parser.indents[len(parser.indents)-1]1076parser.indents = parser.indents[:len(parser.indents)-1]1077}1078return true1079}10801081// Initialize the scanner and produce the STREAM-START token.1082func yaml_parser_fetch_stream_start(parser *yaml_parser_t) bool {10831084// Set the initial indentation.1085parser.indent = -110861087// Initialize the simple key stack.1088parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{})10891090parser.simple_keys_by_tok = make(map[int]int)10911092// A simple key is allowed at the beginning of the stream.1093parser.simple_key_allowed = true10941095// We have started.1096parser.stream_start_produced = true10971098// Create the STREAM-START token and append it to the queue.1099token := yaml_token_t{1100typ: yaml_STREAM_START_TOKEN,1101start_mark: parser.mark,1102end_mark: parser.mark,1103encoding: parser.encoding,1104}1105yaml_insert_token(parser, -1, &token)1106return true1107}11081109// Produce the STREAM-END token and shut down the scanner.1110func yaml_parser_fetch_stream_end(parser *yaml_parser_t) bool {11111112// Force new line.1113if parser.mark.column != 0 {1114parser.mark.column = 01115parser.mark.line++1116}11171118// Reset the indentation level.1119if !yaml_parser_unroll_indent(parser, -1, parser.mark) {1120return false1121}11221123// Reset simple keys.1124if !yaml_parser_remove_simple_key(parser) {1125return false1126}11271128parser.simple_key_allowed = false11291130// Create the STREAM-END token and append it to the queue.1131token := yaml_token_t{1132typ: yaml_STREAM_END_TOKEN,1133start_mark: parser.mark,1134end_mark: parser.mark,1135}1136yaml_insert_token(parser, -1, &token)1137return true1138}11391140// Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token.1141func yaml_parser_fetch_directive(parser *yaml_parser_t) bool {1142// Reset the indentation level.1143if !yaml_parser_unroll_indent(parser, -1, parser.mark) {1144return false1145}11461147// Reset simple keys.1148if !yaml_parser_remove_simple_key(parser) {1149return false1150}11511152parser.simple_key_allowed = false11531154// Create the YAML-DIRECTIVE or TAG-DIRECTIVE token.1155token := yaml_token_t{}1156if !yaml_parser_scan_directive(parser, &token) {1157return false1158}1159// Append the token to the queue.1160yaml_insert_token(parser, -1, &token)1161return true1162}11631164// Produce the DOCUMENT-START or DOCUMENT-END token.1165func yaml_parser_fetch_document_indicator(parser *yaml_parser_t, typ yaml_token_type_t) bool {1166// Reset the indentation level.1167if !yaml_parser_unroll_indent(parser, -1, parser.mark) {1168return false1169}11701171// Reset simple keys.1172if !yaml_parser_remove_simple_key(parser) {1173return false1174}11751176parser.simple_key_allowed = false11771178// Consume the token.1179start_mark := parser.mark11801181skip(parser)1182skip(parser)1183skip(parser)11841185end_mark := parser.mark11861187// Create the DOCUMENT-START or DOCUMENT-END token.1188token := yaml_token_t{1189typ: typ,1190start_mark: start_mark,1191end_mark: end_mark,1192}1193// Append the token to the queue.1194yaml_insert_token(parser, -1, &token)1195return true1196}11971198// Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token.1199func yaml_parser_fetch_flow_collection_start(parser *yaml_parser_t, typ yaml_token_type_t) bool {12001201// The indicators '[' and '{' may start a simple key.1202if !yaml_parser_save_simple_key(parser) {1203return false1204}12051206// Increase the flow level.1207if !yaml_parser_increase_flow_level(parser) {1208return false1209}12101211// A simple key may follow the indicators '[' and '{'.1212parser.simple_key_allowed = true12131214// Consume the token.1215start_mark := parser.mark1216skip(parser)1217end_mark := parser.mark12181219// Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token.1220token := yaml_token_t{1221typ: typ,1222start_mark: start_mark,1223end_mark: end_mark,1224}1225// Append the token to the queue.1226yaml_insert_token(parser, -1, &token)1227return true1228}12291230// Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token.1231func yaml_parser_fetch_flow_collection_end(parser *yaml_parser_t, typ yaml_token_type_t) bool {1232// Reset any potential simple key on the current flow level.1233if !yaml_parser_remove_simple_key(parser) {1234return false1235}12361237// Decrease the flow level.1238if !yaml_parser_decrease_flow_level(parser) {1239return false1240}12411242// No simple keys after the indicators ']' and '}'.1243parser.simple_key_allowed = false12441245// Consume the token.12461247start_mark := parser.mark1248skip(parser)1249end_mark := parser.mark12501251// Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token.1252token := yaml_token_t{1253typ: typ,1254start_mark: start_mark,1255end_mark: end_mark,1256}1257// Append the token to the queue.1258yaml_insert_token(parser, -1, &token)1259return true1260}12611262// Produce the FLOW-ENTRY token.1263func yaml_parser_fetch_flow_entry(parser *yaml_parser_t) bool {1264// Reset any potential simple keys on the current flow level.1265if !yaml_parser_remove_simple_key(parser) {1266return false1267}12681269// Simple keys are allowed after ','.1270parser.simple_key_allowed = true12711272// Consume the token.1273start_mark := parser.mark1274skip(parser)1275end_mark := parser.mark12761277// Create the FLOW-ENTRY token and append it to the queue.1278token := yaml_token_t{1279typ: yaml_FLOW_ENTRY_TOKEN,1280start_mark: start_mark,1281end_mark: end_mark,1282}1283yaml_insert_token(parser, -1, &token)1284return true1285}12861287// Produce the BLOCK-ENTRY token.1288func yaml_parser_fetch_block_entry(parser *yaml_parser_t) bool {1289// Check if the scanner is in the block context.1290if parser.flow_level == 0 {1291// Check if we are allowed to start a new entry.1292if !parser.simple_key_allowed {1293return yaml_parser_set_scanner_error(parser, "", parser.mark,1294"block sequence entries are not allowed in this context")1295}1296// Add the BLOCK-SEQUENCE-START token if needed.1297if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_SEQUENCE_START_TOKEN, parser.mark) {1298return false1299}1300} else {1301// It is an error for the '-' indicator to occur in the flow context,1302// but we let the Parser detect and report about it because the Parser1303// is able to point to the context.1304}13051306// Reset any potential simple keys on the current flow level.1307if !yaml_parser_remove_simple_key(parser) {1308return false1309}13101311// Simple keys are allowed after '-'.1312parser.simple_key_allowed = true13131314// Consume the token.1315start_mark := parser.mark1316skip(parser)1317end_mark := parser.mark13181319// Create the BLOCK-ENTRY token and append it to the queue.1320token := yaml_token_t{1321typ: yaml_BLOCK_ENTRY_TOKEN,1322start_mark: start_mark,1323end_mark: end_mark,1324}1325yaml_insert_token(parser, -1, &token)1326return true1327}13281329// Produce the KEY token.1330func yaml_parser_fetch_key(parser *yaml_parser_t) bool {13311332// In the block context, additional checks are required.1333if parser.flow_level == 0 {1334// Check if we are allowed to start a new key (not nessesary simple).1335if !parser.simple_key_allowed {1336return yaml_parser_set_scanner_error(parser, "", parser.mark,1337"mapping keys are not allowed in this context")1338}1339// Add the BLOCK-MAPPING-START token if needed.1340if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) {1341return false1342}1343}13441345// Reset any potential simple keys on the current flow level.1346if !yaml_parser_remove_simple_key(parser) {1347return false1348}13491350// Simple keys are allowed after '?' in the block context.1351parser.simple_key_allowed = parser.flow_level == 013521353// Consume the token.1354start_mark := parser.mark1355skip(parser)1356end_mark := parser.mark13571358// Create the KEY token and append it to the queue.1359token := yaml_token_t{1360typ: yaml_KEY_TOKEN,1361start_mark: start_mark,1362end_mark: end_mark,1363}1364yaml_insert_token(parser, -1, &token)1365return true1366}13671368// Produce the VALUE token.1369func yaml_parser_fetch_value(parser *yaml_parser_t) bool {13701371simple_key := &parser.simple_keys[len(parser.simple_keys)-1]13721373// Have we found a simple key?1374if valid, ok := yaml_simple_key_is_valid(parser, simple_key); !ok {1375return false13761377} else if valid {13781379// Create the KEY token and insert it into the queue.1380token := yaml_token_t{1381typ: yaml_KEY_TOKEN,1382start_mark: simple_key.mark,1383end_mark: simple_key.mark,1384}1385yaml_insert_token(parser, simple_key.token_number-parser.tokens_parsed, &token)13861387// In the block context, we may need to add the BLOCK-MAPPING-START token.1388if !yaml_parser_roll_indent(parser, simple_key.mark.column,1389simple_key.token_number,1390yaml_BLOCK_MAPPING_START_TOKEN, simple_key.mark) {1391return false1392}13931394// Remove the simple key.1395simple_key.possible = false1396delete(parser.simple_keys_by_tok, simple_key.token_number)13971398// A simple key cannot follow another simple key.1399parser.simple_key_allowed = false14001401} else {1402// The ':' indicator follows a complex key.14031404// In the block context, extra checks are required.1405if parser.flow_level == 0 {14061407// Check if we are allowed to start a complex value.1408if !parser.simple_key_allowed {1409return yaml_parser_set_scanner_error(parser, "", parser.mark,1410"mapping values are not allowed in this context")1411}14121413// Add the BLOCK-MAPPING-START token if needed.1414if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) {1415return false1416}1417}14181419// Simple keys after ':' are allowed in the block context.1420parser.simple_key_allowed = parser.flow_level == 01421}14221423// Consume the token.1424start_mark := parser.mark1425skip(parser)1426end_mark := parser.mark14271428// Create the VALUE token and append it to the queue.1429token := yaml_token_t{1430typ: yaml_VALUE_TOKEN,1431start_mark: start_mark,1432end_mark: end_mark,1433}1434yaml_insert_token(parser, -1, &token)1435return true1436}14371438// Produce the ALIAS or ANCHOR token.1439func yaml_parser_fetch_anchor(parser *yaml_parser_t, typ yaml_token_type_t) bool {1440// An anchor or an alias could be a simple key.1441if !yaml_parser_save_simple_key(parser) {1442return false1443}14441445// A simple key cannot follow an anchor or an alias.1446parser.simple_key_allowed = false14471448// Create the ALIAS or ANCHOR token and append it to the queue.1449var token yaml_token_t1450if !yaml_parser_scan_anchor(parser, &token, typ) {1451return false1452}1453yaml_insert_token(parser, -1, &token)1454return true1455}14561457// Produce the TAG token.1458func yaml_parser_fetch_tag(parser *yaml_parser_t) bool {1459// A tag could be a simple key.1460if !yaml_parser_save_simple_key(parser) {1461return false1462}14631464// A simple key cannot follow a tag.1465parser.simple_key_allowed = false14661467// Create the TAG token and append it to the queue.1468var token yaml_token_t1469if !yaml_parser_scan_tag(parser, &token) {1470return false1471}1472yaml_insert_token(parser, -1, &token)1473return true1474}14751476// Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens.1477func yaml_parser_fetch_block_scalar(parser *yaml_parser_t, literal bool) bool {1478// Remove any potential simple keys.1479if !yaml_parser_remove_simple_key(parser) {1480return false1481}14821483// A simple key may follow a block scalar.1484parser.simple_key_allowed = true14851486// Create the SCALAR token and append it to the queue.1487var token yaml_token_t1488if !yaml_parser_scan_block_scalar(parser, &token, literal) {1489return false1490}1491yaml_insert_token(parser, -1, &token)1492return true1493}14941495// Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens.1496func yaml_parser_fetch_flow_scalar(parser *yaml_parser_t, single bool) bool {1497// A plain scalar could be a simple key.1498if !yaml_parser_save_simple_key(parser) {1499return false1500}15011502// A simple key cannot follow a flow scalar.1503parser.simple_key_allowed = false15041505// Create the SCALAR token and append it to the queue.1506var token yaml_token_t1507if !yaml_parser_scan_flow_scalar(parser, &token, single) {1508return false1509}1510yaml_insert_token(parser, -1, &token)1511return true1512}15131514// Produce the SCALAR(...,plain) token.1515func yaml_parser_fetch_plain_scalar(parser *yaml_parser_t) bool {1516// A plain scalar could be a simple key.1517if !yaml_parser_save_simple_key(parser) {1518return false1519}15201521// A simple key cannot follow a flow scalar.1522parser.simple_key_allowed = false15231524// Create the SCALAR token and append it to the queue.1525var token yaml_token_t1526if !yaml_parser_scan_plain_scalar(parser, &token) {1527return false1528}1529yaml_insert_token(parser, -1, &token)1530return true1531}15321533// Eat whitespaces and comments until the next token is found.1534func yaml_parser_scan_to_next_token(parser *yaml_parser_t) bool {15351536scan_mark := parser.mark15371538// Until the next token is not found.1539for {1540// Allow the BOM mark to start a line.1541if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {1542return false1543}1544if parser.mark.column == 0 && is_bom(parser.buffer, parser.buffer_pos) {1545skip(parser)1546}15471548// Eat whitespaces.1549// Tabs are allowed:1550// - in the flow context1551// - in the block context, but not at the beginning of the line or1552// after '-', '?', or ':' (complex value).1553if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {1554return false1555}15561557for parser.buffer[parser.buffer_pos] == ' ' || ((parser.flow_level > 0 || !parser.simple_key_allowed) && parser.buffer[parser.buffer_pos] == '\t') {1558skip(parser)1559if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {1560return false1561}1562}15631564// Check if we just had a line comment under a sequence entry that1565// looks more like a header to the following content. Similar to this:1566//1567// - # The comment1568// - Some data1569//1570// If so, transform the line comment to a head comment and reposition.1571if len(parser.comments) > 0 && len(parser.tokens) > 1 {1572tokenA := parser.tokens[len(parser.tokens)-2]1573tokenB := parser.tokens[len(parser.tokens)-1]1574comment := &parser.comments[len(parser.comments)-1]1575if tokenA.typ == yaml_BLOCK_SEQUENCE_START_TOKEN && tokenB.typ == yaml_BLOCK_ENTRY_TOKEN && len(comment.line) > 0 && !is_break(parser.buffer, parser.buffer_pos) {1576// If it was in the prior line, reposition so it becomes a1577// header of the follow up token. Otherwise, keep it in place1578// so it becomes a header of the former.1579comment.head = comment.line1580comment.line = nil1581if comment.start_mark.line == parser.mark.line-1 {1582comment.token_mark = parser.mark1583}1584}1585}15861587// Eat a comment until a line break.1588if parser.buffer[parser.buffer_pos] == '#' {1589if !yaml_parser_scan_comments(parser, scan_mark) {1590return false1591}1592}15931594// If it is a line break, eat it.1595if is_break(parser.buffer, parser.buffer_pos) {1596if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {1597return false1598}1599skip_line(parser)16001601// In the block context, a new line may start a simple key.1602if parser.flow_level == 0 {1603parser.simple_key_allowed = true1604}1605} else {1606break // We have found a token.1607}1608}16091610return true1611}16121613// Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token.1614//1615// Scope:1616//1617// %YAML 1.1 # a comment \n1618// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^1619// %TAG !yaml! tag:yaml.org,2002: \n1620// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^1621func yaml_parser_scan_directive(parser *yaml_parser_t, token *yaml_token_t) bool {1622// Eat '%'.1623start_mark := parser.mark1624skip(parser)16251626// Scan the directive name.1627var name []byte1628if !yaml_parser_scan_directive_name(parser, start_mark, &name) {1629return false1630}16311632// Is it a YAML directive?1633if bytes.Equal(name, []byte("YAML")) {1634// Scan the VERSION directive value.1635var major, minor int81636if !yaml_parser_scan_version_directive_value(parser, start_mark, &major, &minor) {1637return false1638}1639end_mark := parser.mark16401641// Create a VERSION-DIRECTIVE token.1642*token = yaml_token_t{1643typ: yaml_VERSION_DIRECTIVE_TOKEN,1644start_mark: start_mark,1645end_mark: end_mark,1646major: major,1647minor: minor,1648}16491650// Is it a TAG directive?1651} else if bytes.Equal(name, []byte("TAG")) {1652// Scan the TAG directive value.1653var handle, prefix []byte1654if !yaml_parser_scan_tag_directive_value(parser, start_mark, &handle, &prefix) {1655return false1656}1657end_mark := parser.mark16581659// Create a TAG-DIRECTIVE token.1660*token = yaml_token_t{1661typ: yaml_TAG_DIRECTIVE_TOKEN,1662start_mark: start_mark,1663end_mark: end_mark,1664value: handle,1665prefix: prefix,1666}16671668// Unknown directive.1669} else {1670yaml_parser_set_scanner_error(parser, "while scanning a directive",1671start_mark, "found unknown directive name")1672return false1673}16741675// Eat the rest of the line including any comments.1676if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {1677return false1678}16791680for is_blank(parser.buffer, parser.buffer_pos) {1681skip(parser)1682if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {1683return false1684}1685}16861687if parser.buffer[parser.buffer_pos] == '#' {1688// [Go] Discard this inline comment for the time being.1689//if !yaml_parser_scan_line_comment(parser, start_mark) {1690// return false1691//}1692for !is_breakz(parser.buffer, parser.buffer_pos) {1693skip(parser)1694if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {1695return false1696}1697}1698}16991700// Check if we are at the end of the line.1701if !is_breakz(parser.buffer, parser.buffer_pos) {1702yaml_parser_set_scanner_error(parser, "while scanning a directive",1703start_mark, "did not find expected comment or line break")1704return false1705}17061707// Eat a line break.1708if is_break(parser.buffer, parser.buffer_pos) {1709if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {1710return false1711}1712skip_line(parser)1713}17141715return true1716}17171718// Scan the directive name.1719//1720// Scope:1721//1722// %YAML 1.1 # a comment \n1723// ^^^^1724// %TAG !yaml! tag:yaml.org,2002: \n1725// ^^^1726func yaml_parser_scan_directive_name(parser *yaml_parser_t, start_mark yaml_mark_t, name *[]byte) bool {1727// Consume the directive name.1728if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {1729return false1730}17311732var s []byte1733for is_alpha(parser.buffer, parser.buffer_pos) {1734s = read(parser, s)1735if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {1736return false1737}1738}17391740// Check if the name is empty.1741if len(s) == 0 {1742yaml_parser_set_scanner_error(parser, "while scanning a directive",1743start_mark, "could not find expected directive name")1744return false1745}17461747// Check for an blank character after the name.1748if !is_blankz(parser.buffer, parser.buffer_pos) {1749yaml_parser_set_scanner_error(parser, "while scanning a directive",1750start_mark, "found unexpected non-alphabetical character")1751return false1752}1753*name = s1754return true1755}17561757// Scan the value of VERSION-DIRECTIVE.1758//1759// Scope:1760//1761// %YAML 1.1 # a comment \n1762// ^^^^^^1763func yaml_parser_scan_version_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, major, minor *int8) bool {1764// Eat whitespaces.1765if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {1766return false1767}1768for is_blank(parser.buffer, parser.buffer_pos) {1769skip(parser)1770if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {1771return false1772}1773}17741775// Consume the major version number.1776if !yaml_parser_scan_version_directive_number(parser, start_mark, major) {1777return false1778}17791780// Eat '.'.1781if parser.buffer[parser.buffer_pos] != '.' {1782return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",1783start_mark, "did not find expected digit or '.' character")1784}17851786skip(parser)17871788// Consume the minor version number.1789if !yaml_parser_scan_version_directive_number(parser, start_mark, minor) {1790return false1791}1792return true1793}17941795const max_number_length = 217961797// Scan the version number of VERSION-DIRECTIVE.1798//1799// Scope:1800//1801// %YAML 1.1 # a comment \n1802// ^1803// %YAML 1.1 # a comment \n1804// ^1805func yaml_parser_scan_version_directive_number(parser *yaml_parser_t, start_mark yaml_mark_t, number *int8) bool {18061807// Repeat while the next character is digit.1808if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {1809return false1810}1811var value, length int81812for is_digit(parser.buffer, parser.buffer_pos) {1813// Check if the number is too long.1814length++1815if length > max_number_length {1816return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",1817start_mark, "found extremely long version number")1818}1819value = value*10 + int8(as_digit(parser.buffer, parser.buffer_pos))1820skip(parser)1821if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {1822return false1823}1824}18251826// Check if the number was present.1827if length == 0 {1828return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",1829start_mark, "did not find expected version number")1830}1831*number = value1832return true1833}18341835// Scan the value of a TAG-DIRECTIVE token.1836//1837// Scope:1838//1839// %TAG !yaml! tag:yaml.org,2002: \n1840// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^1841func yaml_parser_scan_tag_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, handle, prefix *[]byte) bool {1842var handle_value, prefix_value []byte18431844// Eat whitespaces.1845if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {1846return false1847}18481849for is_blank(parser.buffer, parser.buffer_pos) {1850skip(parser)1851if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {1852return false1853}1854}18551856// Scan a handle.1857if !yaml_parser_scan_tag_handle(parser, true, start_mark, &handle_value) {1858return false1859}18601861// Expect a whitespace.1862if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {1863return false1864}1865if !is_blank(parser.buffer, parser.buffer_pos) {1866yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",1867start_mark, "did not find expected whitespace")1868return false1869}18701871// Eat whitespaces.1872for is_blank(parser.buffer, parser.buffer_pos) {1873skip(parser)1874if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {1875return false1876}1877}18781879// Scan a prefix.1880if !yaml_parser_scan_tag_uri(parser, true, nil, start_mark, &prefix_value) {1881return false1882}18831884// Expect a whitespace or line break.1885if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {1886return false1887}1888if !is_blankz(parser.buffer, parser.buffer_pos) {1889yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",1890start_mark, "did not find expected whitespace or line break")1891return false1892}18931894*handle = handle_value1895*prefix = prefix_value1896return true1897}18981899func yaml_parser_scan_anchor(parser *yaml_parser_t, token *yaml_token_t, typ yaml_token_type_t) bool {1900var s []byte19011902// Eat the indicator character.1903start_mark := parser.mark1904skip(parser)19051906// Consume the value.1907if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {1908return false1909}19101911for is_alpha(parser.buffer, parser.buffer_pos) {1912s = read(parser, s)1913if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {1914return false1915}1916}19171918end_mark := parser.mark19191920/*1921* Check if length of the anchor is greater than 0 and it is followed by1922* a whitespace character or one of the indicators:1923*1924* '?', ':', ',', ']', '}', '%', '@', '`'.1925*/19261927if len(s) == 0 ||1928!(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '?' ||1929parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == ',' ||1930parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '}' ||1931parser.buffer[parser.buffer_pos] == '%' || parser.buffer[parser.buffer_pos] == '@' ||1932parser.buffer[parser.buffer_pos] == '`') {1933context := "while scanning an alias"1934if typ == yaml_ANCHOR_TOKEN {1935context = "while scanning an anchor"1936}1937yaml_parser_set_scanner_error(parser, context, start_mark,1938"did not find expected alphabetic or numeric character")1939return false1940}19411942// Create a token.1943*token = yaml_token_t{1944typ: typ,1945start_mark: start_mark,1946end_mark: end_mark,1947value: s,1948}19491950return true1951}19521953/*1954* Scan a TAG token.1955*/19561957func yaml_parser_scan_tag(parser *yaml_parser_t, token *yaml_token_t) bool {1958var handle, suffix []byte19591960start_mark := parser.mark19611962// Check if the tag is in the canonical form.1963if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {1964return false1965}19661967if parser.buffer[parser.buffer_pos+1] == '<' {1968// Keep the handle as ''19691970// Eat '!<'1971skip(parser)1972skip(parser)19731974// Consume the tag value.1975if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) {1976return false1977}19781979// Check for '>' and eat it.1980if parser.buffer[parser.buffer_pos] != '>' {1981yaml_parser_set_scanner_error(parser, "while scanning a tag",1982start_mark, "did not find the expected '>'")1983return false1984}19851986skip(parser)1987} else {1988// The tag has either the '!suffix' or the '!handle!suffix' form.19891990// First, try to scan a handle.1991if !yaml_parser_scan_tag_handle(parser, false, start_mark, &handle) {1992return false1993}19941995// Check if it is, indeed, handle.1996if handle[0] == '!' && len(handle) > 1 && handle[len(handle)-1] == '!' {1997// Scan the suffix now.1998if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) {1999return false2000}2001} else {2002// It wasn't a handle after all. Scan the rest of the tag.2003if !yaml_parser_scan_tag_uri(parser, false, handle, start_mark, &suffix) {2004return false2005}20062007// Set the handle to '!'.2008handle = []byte{'!'}20092010// A special case: the '!' tag. Set the handle to '' and the2011// suffix to '!'.2012if len(suffix) == 0 {2013handle, suffix = suffix, handle2014}2015}2016}20172018// Check the character which ends the tag.2019if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {2020return false2021}2022if !is_blankz(parser.buffer, parser.buffer_pos) {2023yaml_parser_set_scanner_error(parser, "while scanning a tag",2024start_mark, "did not find expected whitespace or line break")2025return false2026}20272028end_mark := parser.mark20292030// Create a token.2031*token = yaml_token_t{2032typ: yaml_TAG_TOKEN,2033start_mark: start_mark,2034end_mark: end_mark,2035value: handle,2036suffix: suffix,2037}2038return true2039}20402041// Scan a tag handle.2042func yaml_parser_scan_tag_handle(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, handle *[]byte) bool {2043// Check the initial '!' character.2044if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {2045return false2046}2047if parser.buffer[parser.buffer_pos] != '!' {2048yaml_parser_set_scanner_tag_error(parser, directive,2049start_mark, "did not find expected '!'")2050return false2051}20522053var s []byte20542055// Copy the '!' character.2056s = read(parser, s)20572058// Copy all subsequent alphabetical and numerical characters.2059if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {2060return false2061}2062for is_alpha(parser.buffer, parser.buffer_pos) {2063s = read(parser, s)2064if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {2065return false2066}2067}20682069// Check if the trailing character is '!' and copy it.2070if parser.buffer[parser.buffer_pos] == '!' {2071s = read(parser, s)2072} else {2073// It's either the '!' tag or not really a tag handle. If it's a %TAG2074// directive, it's an error. If it's a tag token, it must be a part of URI.2075if directive && string(s) != "!" {2076yaml_parser_set_scanner_tag_error(parser, directive,2077start_mark, "did not find expected '!'")2078return false2079}2080}20812082*handle = s2083return true2084}20852086// Scan a tag.2087func yaml_parser_scan_tag_uri(parser *yaml_parser_t, directive bool, head []byte, start_mark yaml_mark_t, uri *[]byte) bool {2088//size_t length = head ? strlen((char *)head) : 02089var s []byte2090hasTag := len(head) > 020912092// Copy the head if needed.2093//2094// Note that we don't copy the leading '!' character.2095if len(head) > 1 {2096s = append(s, head[1:]...)2097}20982099// Scan the tag.2100if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {2101return false2102}21032104// The set of characters that may appear in URI is as follows:2105//2106// '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',2107// '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',2108// '%'.2109// [Go] TODO Convert this into more reasonable logic.2110for is_alpha(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == ';' ||2111parser.buffer[parser.buffer_pos] == '/' || parser.buffer[parser.buffer_pos] == '?' ||2112parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == '@' ||2113parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '=' ||2114parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '$' ||2115parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '.' ||2116parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '~' ||2117parser.buffer[parser.buffer_pos] == '*' || parser.buffer[parser.buffer_pos] == '\'' ||2118parser.buffer[parser.buffer_pos] == '(' || parser.buffer[parser.buffer_pos] == ')' ||2119parser.buffer[parser.buffer_pos] == '[' || parser.buffer[parser.buffer_pos] == ']' ||2120parser.buffer[parser.buffer_pos] == '%' {2121// Check if it is a URI-escape sequence.2122if parser.buffer[parser.buffer_pos] == '%' {2123if !yaml_parser_scan_uri_escapes(parser, directive, start_mark, &s) {2124return false2125}2126} else {2127s = read(parser, s)2128}2129if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {2130return false2131}2132hasTag = true2133}21342135if !hasTag {2136yaml_parser_set_scanner_tag_error(parser, directive,2137start_mark, "did not find expected tag URI")2138return false2139}2140*uri = s2141return true2142}21432144// Decode an URI-escape sequence corresponding to a single UTF-8 character.2145func yaml_parser_scan_uri_escapes(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, s *[]byte) bool {21462147// Decode the required number of characters.2148w := 10242149for w > 0 {2150// Check for a URI-escaped octet.2151if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) {2152return false2153}21542155if !(parser.buffer[parser.buffer_pos] == '%' &&2156is_hex(parser.buffer, parser.buffer_pos+1) &&2157is_hex(parser.buffer, parser.buffer_pos+2)) {2158return yaml_parser_set_scanner_tag_error(parser, directive,2159start_mark, "did not find URI escaped octet")2160}21612162// Get the octet.2163octet := byte((as_hex(parser.buffer, parser.buffer_pos+1) << 4) + as_hex(parser.buffer, parser.buffer_pos+2))21642165// If it is the leading octet, determine the length of the UTF-8 sequence.2166if w == 1024 {2167w = width(octet)2168if w == 0 {2169return yaml_parser_set_scanner_tag_error(parser, directive,2170start_mark, "found an incorrect leading UTF-8 octet")2171}2172} else {2173// Check if the trailing octet is correct.2174if octet&0xC0 != 0x80 {2175return yaml_parser_set_scanner_tag_error(parser, directive,2176start_mark, "found an incorrect trailing UTF-8 octet")2177}2178}21792180// Copy the octet and move the pointers.2181*s = append(*s, octet)2182skip(parser)2183skip(parser)2184skip(parser)2185w--2186}2187return true2188}21892190// Scan a block scalar.2191func yaml_parser_scan_block_scalar(parser *yaml_parser_t, token *yaml_token_t, literal bool) bool {2192// Eat the indicator '|' or '>'.2193start_mark := parser.mark2194skip(parser)21952196// Scan the additional block scalar indicators.2197if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {2198return false2199}22002201// Check for a chomping indicator.2202var chomping, increment int2203if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' {2204// Set the chomping method and eat the indicator.2205if parser.buffer[parser.buffer_pos] == '+' {2206chomping = +12207} else {2208chomping = -12209}2210skip(parser)22112212// Check for an indentation indicator.2213if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {2214return false2215}2216if is_digit(parser.buffer, parser.buffer_pos) {2217// Check that the indentation is greater than 0.2218if parser.buffer[parser.buffer_pos] == '0' {2219yaml_parser_set_scanner_error(parser, "while scanning a block scalar",2220start_mark, "found an indentation indicator equal to 0")2221return false2222}22232224// Get the indentation level and eat the indicator.2225increment = as_digit(parser.buffer, parser.buffer_pos)2226skip(parser)2227}22282229} else if is_digit(parser.buffer, parser.buffer_pos) {2230// Do the same as above, but in the opposite order.22312232if parser.buffer[parser.buffer_pos] == '0' {2233yaml_parser_set_scanner_error(parser, "while scanning a block scalar",2234start_mark, "found an indentation indicator equal to 0")2235return false2236}2237increment = as_digit(parser.buffer, parser.buffer_pos)2238skip(parser)22392240if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {2241return false2242}2243if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' {2244if parser.buffer[parser.buffer_pos] == '+' {2245chomping = +12246} else {2247chomping = -12248}2249skip(parser)2250}2251}22522253// Eat whitespaces and comments to the end of the line.2254if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {2255return false2256}2257for is_blank(parser.buffer, parser.buffer_pos) {2258skip(parser)2259if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {2260return false2261}2262}2263if parser.buffer[parser.buffer_pos] == '#' {2264if !yaml_parser_scan_line_comment(parser, start_mark) {2265return false2266}2267for !is_breakz(parser.buffer, parser.buffer_pos) {2268skip(parser)2269if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {2270return false2271}2272}2273}22742275// Check if we are at the end of the line.2276if !is_breakz(parser.buffer, parser.buffer_pos) {2277yaml_parser_set_scanner_error(parser, "while scanning a block scalar",2278start_mark, "did not find expected comment or line break")2279return false2280}22812282// Eat a line break.2283if is_break(parser.buffer, parser.buffer_pos) {2284if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {2285return false2286}2287skip_line(parser)2288}22892290end_mark := parser.mark22912292// Set the indentation level if it was specified.2293var indent int2294if increment > 0 {2295if parser.indent >= 0 {2296indent = parser.indent + increment2297} else {2298indent = increment2299}2300}23012302// Scan the leading line breaks and determine the indentation level if needed.2303var s, leading_break, trailing_breaks []byte2304if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) {2305return false2306}23072308// Scan the block scalar content.2309if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {2310return false2311}2312var leading_blank, trailing_blank bool2313for parser.mark.column == indent && !is_z(parser.buffer, parser.buffer_pos) {2314// We are at the beginning of a non-empty line.23152316// Is it a trailing whitespace?2317trailing_blank = is_blank(parser.buffer, parser.buffer_pos)23182319// Check if we need to fold the leading line break.2320if !literal && !leading_blank && !trailing_blank && len(leading_break) > 0 && leading_break[0] == '\n' {2321// Do we need to join the lines by space?2322if len(trailing_breaks) == 0 {2323s = append(s, ' ')2324}2325} else {2326s = append(s, leading_break...)2327}2328leading_break = leading_break[:0]23292330// Append the remaining line breaks.2331s = append(s, trailing_breaks...)2332trailing_breaks = trailing_breaks[:0]23332334// Is it a leading whitespace?2335leading_blank = is_blank(parser.buffer, parser.buffer_pos)23362337// Consume the current line.2338for !is_breakz(parser.buffer, parser.buffer_pos) {2339s = read(parser, s)2340if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {2341return false2342}2343}23442345// Consume the line break.2346if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {2347return false2348}23492350leading_break = read_line(parser, leading_break)23512352// Eat the following indentation spaces and line breaks.2353if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) {2354return false2355}2356}23572358// Chomp the tail.2359if chomping != -1 {2360s = append(s, leading_break...)2361}2362if chomping == 1 {2363s = append(s, trailing_breaks...)2364}23652366// Create a token.2367*token = yaml_token_t{2368typ: yaml_SCALAR_TOKEN,2369start_mark: start_mark,2370end_mark: end_mark,2371value: s,2372style: yaml_LITERAL_SCALAR_STYLE,2373}2374if !literal {2375token.style = yaml_FOLDED_SCALAR_STYLE2376}2377return true2378}23792380// Scan indentation spaces and line breaks for a block scalar. Determine the2381// indentation level if needed.2382func yaml_parser_scan_block_scalar_breaks(parser *yaml_parser_t, indent *int, breaks *[]byte, start_mark yaml_mark_t, end_mark *yaml_mark_t) bool {2383*end_mark = parser.mark23842385// Eat the indentation spaces and line breaks.2386max_indent := 02387for {2388// Eat the indentation spaces.2389if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {2390return false2391}2392for (*indent == 0 || parser.mark.column < *indent) && is_space(parser.buffer, parser.buffer_pos) {2393skip(parser)2394if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {2395return false2396}2397}2398if parser.mark.column > max_indent {2399max_indent = parser.mark.column2400}24012402// Check for a tab character messing the indentation.2403if (*indent == 0 || parser.mark.column < *indent) && is_tab(parser.buffer, parser.buffer_pos) {2404return yaml_parser_set_scanner_error(parser, "while scanning a block scalar",2405start_mark, "found a tab character where an indentation space is expected")2406}24072408// Have we found a non-empty line?2409if !is_break(parser.buffer, parser.buffer_pos) {2410break2411}24122413// Consume the line break.2414if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {2415return false2416}2417// [Go] Should really be returning breaks instead.2418*breaks = read_line(parser, *breaks)2419*end_mark = parser.mark2420}24212422// Determine the indentation level if needed.2423if *indent == 0 {2424*indent = max_indent2425if *indent < parser.indent+1 {2426*indent = parser.indent + 12427}2428if *indent < 1 {2429*indent = 12430}2431}2432return true2433}24342435// Scan a quoted scalar.2436func yaml_parser_scan_flow_scalar(parser *yaml_parser_t, token *yaml_token_t, single bool) bool {2437// Eat the left quote.2438start_mark := parser.mark2439skip(parser)24402441// Consume the content of the quoted scalar.2442var s, leading_break, trailing_breaks, whitespaces []byte2443for {2444// Check that there are no document indicators at the beginning of the line.2445if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) {2446return false2447}24482449if parser.mark.column == 0 &&2450((parser.buffer[parser.buffer_pos+0] == '-' &&2451parser.buffer[parser.buffer_pos+1] == '-' &&2452parser.buffer[parser.buffer_pos+2] == '-') ||2453(parser.buffer[parser.buffer_pos+0] == '.' &&2454parser.buffer[parser.buffer_pos+1] == '.' &&2455parser.buffer[parser.buffer_pos+2] == '.')) &&2456is_blankz(parser.buffer, parser.buffer_pos+3) {2457yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",2458start_mark, "found unexpected document indicator")2459return false2460}24612462// Check for EOF.2463if is_z(parser.buffer, parser.buffer_pos) {2464yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",2465start_mark, "found unexpected end of stream")2466return false2467}24682469// Consume non-blank characters.2470leading_blanks := false2471for !is_blankz(parser.buffer, parser.buffer_pos) {2472if single && parser.buffer[parser.buffer_pos] == '\'' && parser.buffer[parser.buffer_pos+1] == '\'' {2473// Is is an escaped single quote.2474s = append(s, '\'')2475skip(parser)2476skip(parser)24772478} else if single && parser.buffer[parser.buffer_pos] == '\'' {2479// It is a right single quote.2480break2481} else if !single && parser.buffer[parser.buffer_pos] == '"' {2482// It is a right double quote.2483break24842485} else if !single && parser.buffer[parser.buffer_pos] == '\\' && is_break(parser.buffer, parser.buffer_pos+1) {2486// It is an escaped line break.2487if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) {2488return false2489}2490skip(parser)2491skip_line(parser)2492leading_blanks = true2493break24942495} else if !single && parser.buffer[parser.buffer_pos] == '\\' {2496// It is an escape sequence.2497code_length := 024982499// Check the escape character.2500switch parser.buffer[parser.buffer_pos+1] {2501case '0':2502s = append(s, 0)2503case 'a':2504s = append(s, '\x07')2505case 'b':2506s = append(s, '\x08')2507case 't', '\t':2508s = append(s, '\x09')2509case 'n':2510s = append(s, '\x0A')2511case 'v':2512s = append(s, '\x0B')2513case 'f':2514s = append(s, '\x0C')2515case 'r':2516s = append(s, '\x0D')2517case 'e':2518s = append(s, '\x1B')2519case ' ':2520s = append(s, '\x20')2521case '"':2522s = append(s, '"')2523case '\'':2524s = append(s, '\'')2525case '\\':2526s = append(s, '\\')2527case 'N': // NEL (#x85)2528s = append(s, '\xC2')2529s = append(s, '\x85')2530case '_': // #xA02531s = append(s, '\xC2')2532s = append(s, '\xA0')2533case 'L': // LS (#x2028)2534s = append(s, '\xE2')2535s = append(s, '\x80')2536s = append(s, '\xA8')2537case 'P': // PS (#x2029)2538s = append(s, '\xE2')2539s = append(s, '\x80')2540s = append(s, '\xA9')2541case 'x':2542code_length = 22543case 'u':2544code_length = 42545case 'U':2546code_length = 82547default:2548yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",2549start_mark, "found unknown escape character")2550return false2551}25522553skip(parser)2554skip(parser)25552556// Consume an arbitrary escape code.2557if code_length > 0 {2558var value int25592560// Scan the character value.2561if parser.unread < code_length && !yaml_parser_update_buffer(parser, code_length) {2562return false2563}2564for k := 0; k < code_length; k++ {2565if !is_hex(parser.buffer, parser.buffer_pos+k) {2566yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",2567start_mark, "did not find expected hexdecimal number")2568return false2569}2570value = (value << 4) + as_hex(parser.buffer, parser.buffer_pos+k)2571}25722573// Check the value and write the character.2574if (value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF {2575yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",2576start_mark, "found invalid Unicode character escape code")2577return false2578}2579if value <= 0x7F {2580s = append(s, byte(value))2581} else if value <= 0x7FF {2582s = append(s, byte(0xC0+(value>>6)))2583s = append(s, byte(0x80+(value&0x3F)))2584} else if value <= 0xFFFF {2585s = append(s, byte(0xE0+(value>>12)))2586s = append(s, byte(0x80+((value>>6)&0x3F)))2587s = append(s, byte(0x80+(value&0x3F)))2588} else {2589s = append(s, byte(0xF0+(value>>18)))2590s = append(s, byte(0x80+((value>>12)&0x3F)))2591s = append(s, byte(0x80+((value>>6)&0x3F)))2592s = append(s, byte(0x80+(value&0x3F)))2593}25942595// Advance the pointer.2596for k := 0; k < code_length; k++ {2597skip(parser)2598}2599}2600} else {2601// It is a non-escaped non-blank character.2602s = read(parser, s)2603}2604if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {2605return false2606}2607}26082609if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {2610return false2611}26122613// Check if we are at the end of the scalar.2614if single {2615if parser.buffer[parser.buffer_pos] == '\'' {2616break2617}2618} else {2619if parser.buffer[parser.buffer_pos] == '"' {2620break2621}2622}26232624// Consume blank characters.2625for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) {2626if is_blank(parser.buffer, parser.buffer_pos) {2627// Consume a space or a tab character.2628if !leading_blanks {2629whitespaces = read(parser, whitespaces)2630} else {2631skip(parser)2632}2633} else {2634if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {2635return false2636}26372638// Check if it is a first line break.2639if !leading_blanks {2640whitespaces = whitespaces[:0]2641leading_break = read_line(parser, leading_break)2642leading_blanks = true2643} else {2644trailing_breaks = read_line(parser, trailing_breaks)2645}2646}2647if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {2648return false2649}2650}26512652// Join the whitespaces or fold line breaks.2653if leading_blanks {2654// Do we need to fold line breaks?2655if len(leading_break) > 0 && leading_break[0] == '\n' {2656if len(trailing_breaks) == 0 {2657s = append(s, ' ')2658} else {2659s = append(s, trailing_breaks...)2660}2661} else {2662s = append(s, leading_break...)2663s = append(s, trailing_breaks...)2664}2665trailing_breaks = trailing_breaks[:0]2666leading_break = leading_break[:0]2667} else {2668s = append(s, whitespaces...)2669whitespaces = whitespaces[:0]2670}2671}26722673// Eat the right quote.2674skip(parser)2675end_mark := parser.mark26762677// Create a token.2678*token = yaml_token_t{2679typ: yaml_SCALAR_TOKEN,2680start_mark: start_mark,2681end_mark: end_mark,2682value: s,2683style: yaml_SINGLE_QUOTED_SCALAR_STYLE,2684}2685if !single {2686token.style = yaml_DOUBLE_QUOTED_SCALAR_STYLE2687}2688return true2689}26902691// Scan a plain scalar.2692func yaml_parser_scan_plain_scalar(parser *yaml_parser_t, token *yaml_token_t) bool {26932694var s, leading_break, trailing_breaks, whitespaces []byte2695var leading_blanks bool2696var indent = parser.indent + 126972698start_mark := parser.mark2699end_mark := parser.mark27002701// Consume the content of the plain scalar.2702for {2703// Check for a document indicator.2704if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) {2705return false2706}2707if parser.mark.column == 0 &&2708((parser.buffer[parser.buffer_pos+0] == '-' &&2709parser.buffer[parser.buffer_pos+1] == '-' &&2710parser.buffer[parser.buffer_pos+2] == '-') ||2711(parser.buffer[parser.buffer_pos+0] == '.' &&2712parser.buffer[parser.buffer_pos+1] == '.' &&2713parser.buffer[parser.buffer_pos+2] == '.')) &&2714is_blankz(parser.buffer, parser.buffer_pos+3) {2715break2716}27172718// Check for a comment.2719if parser.buffer[parser.buffer_pos] == '#' {2720break2721}27222723// Consume non-blank characters.2724for !is_blankz(parser.buffer, parser.buffer_pos) {27252726// Check for indicators that may end a plain scalar.2727if (parser.buffer[parser.buffer_pos] == ':' && is_blankz(parser.buffer, parser.buffer_pos+1)) ||2728(parser.flow_level > 0 &&2729(parser.buffer[parser.buffer_pos] == ',' ||2730parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == '[' ||2731parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' ||2732parser.buffer[parser.buffer_pos] == '}')) {2733break2734}27352736// Check if we need to join whitespaces and breaks.2737if leading_blanks || len(whitespaces) > 0 {2738if leading_blanks {2739// Do we need to fold line breaks?2740if leading_break[0] == '\n' {2741if len(trailing_breaks) == 0 {2742s = append(s, ' ')2743} else {2744s = append(s, trailing_breaks...)2745}2746} else {2747s = append(s, leading_break...)2748s = append(s, trailing_breaks...)2749}2750trailing_breaks = trailing_breaks[:0]2751leading_break = leading_break[:0]2752leading_blanks = false2753} else {2754s = append(s, whitespaces...)2755whitespaces = whitespaces[:0]2756}2757}27582759// Copy the character.2760s = read(parser, s)27612762end_mark = parser.mark2763if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {2764return false2765}2766}27672768// Is it the end?2769if !(is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos)) {2770break2771}27722773// Consume blank characters.2774if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {2775return false2776}27772778for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) {2779if is_blank(parser.buffer, parser.buffer_pos) {27802781// Check for tab characters that abuse indentation.2782if leading_blanks && parser.mark.column < indent && is_tab(parser.buffer, parser.buffer_pos) {2783yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",2784start_mark, "found a tab character that violates indentation")2785return false2786}27872788// Consume a space or a tab character.2789if !leading_blanks {2790whitespaces = read(parser, whitespaces)2791} else {2792skip(parser)2793}2794} else {2795if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {2796return false2797}27982799// Check if it is a first line break.2800if !leading_blanks {2801whitespaces = whitespaces[:0]2802leading_break = read_line(parser, leading_break)2803leading_blanks = true2804} else {2805trailing_breaks = read_line(parser, trailing_breaks)2806}2807}2808if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {2809return false2810}2811}28122813// Check indentation level.2814if parser.flow_level == 0 && parser.mark.column < indent {2815break2816}2817}28182819// Create a token.2820*token = yaml_token_t{2821typ: yaml_SCALAR_TOKEN,2822start_mark: start_mark,2823end_mark: end_mark,2824value: s,2825style: yaml_PLAIN_SCALAR_STYLE,2826}28272828// Note that we change the 'simple_key_allowed' flag.2829if leading_blanks {2830parser.simple_key_allowed = true2831}2832return true2833}28342835func yaml_parser_scan_line_comment(parser *yaml_parser_t, token_mark yaml_mark_t) bool {2836if parser.newlines > 0 {2837return true2838}28392840var start_mark yaml_mark_t2841var text []byte28422843for peek := 0; peek < 512; peek++ {2844if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) {2845break2846}2847if is_blank(parser.buffer, parser.buffer_pos+peek) {2848continue2849}2850if parser.buffer[parser.buffer_pos+peek] == '#' {2851seen := parser.mark.index + peek2852for {2853if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {2854return false2855}2856if is_breakz(parser.buffer, parser.buffer_pos) {2857if parser.mark.index >= seen {2858break2859}2860if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {2861return false2862}2863skip_line(parser)2864} else if parser.mark.index >= seen {2865if len(text) == 0 {2866start_mark = parser.mark2867}2868text = read(parser, text)2869} else {2870skip(parser)2871}2872}2873}2874break2875}2876if len(text) > 0 {2877parser.comments = append(parser.comments, yaml_comment_t{2878token_mark: token_mark,2879start_mark: start_mark,2880line: text,2881})2882}2883return true2884}28852886func yaml_parser_scan_comments(parser *yaml_parser_t, scan_mark yaml_mark_t) bool {2887token := parser.tokens[len(parser.tokens)-1]28882889if token.typ == yaml_FLOW_ENTRY_TOKEN && len(parser.tokens) > 1 {2890token = parser.tokens[len(parser.tokens)-2]2891}28922893var token_mark = token.start_mark2894var start_mark yaml_mark_t2895var next_indent = parser.indent2896if next_indent < 0 {2897next_indent = 02898}28992900var recent_empty = false2901var first_empty = parser.newlines <= 129022903var line = parser.mark.line2904var column = parser.mark.column29052906var text []byte29072908// The foot line is the place where a comment must start to2909// still be considered as a foot of the prior content.2910// If there's some content in the currently parsed line, then2911// the foot is the line below it.2912var foot_line = -12913if scan_mark.line > 0 {2914foot_line = parser.mark.line - parser.newlines + 12915if parser.newlines == 0 && parser.mark.column > 1 {2916foot_line++2917}2918}29192920var peek = 02921for ; peek < 512; peek++ {2922if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) {2923break2924}2925column++2926if is_blank(parser.buffer, parser.buffer_pos+peek) {2927continue2928}2929c := parser.buffer[parser.buffer_pos+peek]2930var close_flow = parser.flow_level > 0 && (c == ']' || c == '}')2931if close_flow || is_breakz(parser.buffer, parser.buffer_pos+peek) {2932// Got line break or terminator.2933if close_flow || !recent_empty {2934if close_flow || first_empty && (start_mark.line == foot_line && token.typ != yaml_VALUE_TOKEN || start_mark.column-1 < next_indent) {2935// This is the first empty line and there were no empty lines before,2936// so this initial part of the comment is a foot of the prior token2937// instead of being a head for the following one. Split it up.2938// Alternatively, this might also be the last comment inside a flow2939// scope, so it must be a footer.2940if len(text) > 0 {2941if start_mark.column-1 < next_indent {2942// If dedented it's unrelated to the prior token.2943token_mark = start_mark2944}2945parser.comments = append(parser.comments, yaml_comment_t{2946scan_mark: scan_mark,2947token_mark: token_mark,2948start_mark: start_mark,2949end_mark: yaml_mark_t{parser.mark.index + peek, line, column},2950foot: text,2951})2952scan_mark = yaml_mark_t{parser.mark.index + peek, line, column}2953token_mark = scan_mark2954text = nil2955}2956} else {2957if len(text) > 0 && parser.buffer[parser.buffer_pos+peek] != 0 {2958text = append(text, '\n')2959}2960}2961}2962if !is_break(parser.buffer, parser.buffer_pos+peek) {2963break2964}2965first_empty = false2966recent_empty = true2967column = 02968line++2969continue2970}29712972if len(text) > 0 && (close_flow || column-1 < next_indent && column != start_mark.column) {2973// The comment at the different indentation is a foot of the2974// preceding data rather than a head of the upcoming one.2975parser.comments = append(parser.comments, yaml_comment_t{2976scan_mark: scan_mark,2977token_mark: token_mark,2978start_mark: start_mark,2979end_mark: yaml_mark_t{parser.mark.index + peek, line, column},2980foot: text,2981})2982scan_mark = yaml_mark_t{parser.mark.index + peek, line, column}2983token_mark = scan_mark2984text = nil2985}29862987if parser.buffer[parser.buffer_pos+peek] != '#' {2988break2989}29902991if len(text) == 0 {2992start_mark = yaml_mark_t{parser.mark.index + peek, line, column}2993} else {2994text = append(text, '\n')2995}29962997recent_empty = false29982999// Consume until after the consumed comment line.3000seen := parser.mark.index + peek3001for {3002if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {3003return false3004}3005if is_breakz(parser.buffer, parser.buffer_pos) {3006if parser.mark.index >= seen {3007break3008}3009if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {3010return false3011}3012skip_line(parser)3013} else if parser.mark.index >= seen {3014text = read(parser, text)3015} else {3016skip(parser)3017}3018}30193020peek = 03021column = 03022line = parser.mark.line3023next_indent = parser.indent3024if next_indent < 0 {3025next_indent = 03026}3027}30283029if len(text) > 0 {3030parser.comments = append(parser.comments, yaml_comment_t{3031scan_mark: scan_mark,3032token_mark: start_mark,3033start_mark: start_mark,3034end_mark: yaml_mark_t{parser.mark.index + peek - 1, line, column},3035head: text,3036})3037}3038return true3039}304030413042