Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
kardolus
GitHub Repository: kardolus/chatgpt-cli
Path: blob/main/vendor/github.com/pelletier/go-toml/v2/unstable/parser.go
2893 views
1
package unstable
2
3
import (
4
"bytes"
5
"fmt"
6
"unicode"
7
8
"github.com/pelletier/go-toml/v2/internal/characters"
9
"github.com/pelletier/go-toml/v2/internal/danger"
10
)
11
12
// ParserError describes an error relative to the content of the document.
13
//
14
// It cannot outlive the instance of Parser it refers to, and may cause panics
15
// if the parser is reset.
16
type ParserError struct {
17
Highlight []byte
18
Message string
19
Key []string // optional
20
}
21
22
// Error is the implementation of the error interface.
23
func (e *ParserError) Error() string {
24
return e.Message
25
}
26
27
// NewParserError is a convenience function to create a ParserError
28
//
29
// Warning: Highlight needs to be a subslice of Parser.data, so only slices
30
// returned by Parser.Raw are valid candidates.
31
func NewParserError(highlight []byte, format string, args ...interface{}) error {
32
return &ParserError{
33
Highlight: highlight,
34
Message: fmt.Errorf(format, args...).Error(),
35
}
36
}
37
38
// Parser scans over a TOML-encoded document and generates an iterative AST.
39
//
40
// To prime the Parser, first reset it with the contents of a TOML document.
41
// Then, process all top-level expressions sequentially. See Example.
42
//
43
// Don't forget to check Error() after you're done parsing.
44
//
45
// Each top-level expression needs to be fully processed before calling
46
// NextExpression() again. Otherwise, calls to various Node methods may panic if
47
// the parser has moved on the next expression.
48
//
49
// For performance reasons, go-toml doesn't make a copy of the input bytes to
50
// the parser. Make sure to copy all the bytes you need to outlive the slice
51
// given to the parser.
52
type Parser struct {
53
data []byte
54
builder builder
55
ref reference
56
left []byte
57
err error
58
first bool
59
60
KeepComments bool
61
}
62
63
// Data returns the slice provided to the last call to Reset.
64
func (p *Parser) Data() []byte {
65
return p.data
66
}
67
68
// Range returns a range description that corresponds to a given slice of the
69
// input. If the argument is not a subslice of the parser input, this function
70
// panics.
71
func (p *Parser) Range(b []byte) Range {
72
return Range{
73
Offset: uint32(danger.SubsliceOffset(p.data, b)),
74
Length: uint32(len(b)),
75
}
76
}
77
78
// Raw returns the slice corresponding to the bytes in the given range.
79
func (p *Parser) Raw(raw Range) []byte {
80
return p.data[raw.Offset : raw.Offset+raw.Length]
81
}
82
83
// Reset brings the parser to its initial state for a given input. It wipes an
84
// reuses internal storage to reduce allocation.
85
func (p *Parser) Reset(b []byte) {
86
p.builder.Reset()
87
p.ref = invalidReference
88
p.data = b
89
p.left = b
90
p.err = nil
91
p.first = true
92
}
93
94
// NextExpression parses the next top-level expression. If an expression was
95
// successfully parsed, it returns true. If the parser is at the end of the
96
// document or an error occurred, it returns false.
97
//
98
// Retrieve the parsed expression with Expression().
99
func (p *Parser) NextExpression() bool {
100
if len(p.left) == 0 || p.err != nil {
101
return false
102
}
103
104
p.builder.Reset()
105
p.ref = invalidReference
106
107
for {
108
if len(p.left) == 0 || p.err != nil {
109
return false
110
}
111
112
if !p.first {
113
p.left, p.err = p.parseNewline(p.left)
114
}
115
116
if len(p.left) == 0 || p.err != nil {
117
return false
118
}
119
120
p.ref, p.left, p.err = p.parseExpression(p.left)
121
122
if p.err != nil {
123
return false
124
}
125
126
p.first = false
127
128
if p.ref.Valid() {
129
return true
130
}
131
}
132
}
133
134
// Expression returns a pointer to the node representing the last successfully
135
// parsed expression.
136
func (p *Parser) Expression() *Node {
137
return p.builder.NodeAt(p.ref)
138
}
139
140
// Error returns any error that has occurred during parsing.
141
func (p *Parser) Error() error {
142
return p.err
143
}
144
145
// Position describes a position in the input.
146
type Position struct {
147
// Number of bytes from the beginning of the input.
148
Offset int
149
// Line number, starting at 1.
150
Line int
151
// Column number, starting at 1.
152
Column int
153
}
154
155
// Shape describes the position of a range in the input.
156
type Shape struct {
157
Start Position
158
End Position
159
}
160
161
func (p *Parser) position(b []byte) Position {
162
offset := danger.SubsliceOffset(p.data, b)
163
164
lead := p.data[:offset]
165
166
return Position{
167
Offset: offset,
168
Line: bytes.Count(lead, []byte{'\n'}) + 1,
169
Column: len(lead) - bytes.LastIndex(lead, []byte{'\n'}),
170
}
171
}
172
173
// Shape returns the shape of the given range in the input. Will
174
// panic if the range is not a subslice of the input.
175
func (p *Parser) Shape(r Range) Shape {
176
raw := p.Raw(r)
177
return Shape{
178
Start: p.position(raw),
179
End: p.position(raw[r.Length:]),
180
}
181
}
182
183
func (p *Parser) parseNewline(b []byte) ([]byte, error) {
184
if b[0] == '\n' {
185
return b[1:], nil
186
}
187
188
if b[0] == '\r' {
189
_, rest, err := scanWindowsNewline(b)
190
return rest, err
191
}
192
193
return nil, NewParserError(b[0:1], "expected newline but got %#U", b[0])
194
}
195
196
func (p *Parser) parseComment(b []byte) (reference, []byte, error) {
197
ref := invalidReference
198
data, rest, err := scanComment(b)
199
if p.KeepComments && err == nil {
200
ref = p.builder.Push(Node{
201
Kind: Comment,
202
Raw: p.Range(data),
203
Data: data,
204
})
205
}
206
return ref, rest, err
207
}
208
209
func (p *Parser) parseExpression(b []byte) (reference, []byte, error) {
210
// expression = ws [ comment ]
211
// expression =/ ws keyval ws [ comment ]
212
// expression =/ ws table ws [ comment ]
213
ref := invalidReference
214
215
b = p.parseWhitespace(b)
216
217
if len(b) == 0 {
218
return ref, b, nil
219
}
220
221
if b[0] == '#' {
222
ref, rest, err := p.parseComment(b)
223
return ref, rest, err
224
}
225
226
if b[0] == '\n' || b[0] == '\r' {
227
return ref, b, nil
228
}
229
230
var err error
231
if b[0] == '[' {
232
ref, b, err = p.parseTable(b)
233
} else {
234
ref, b, err = p.parseKeyval(b)
235
}
236
237
if err != nil {
238
return ref, nil, err
239
}
240
241
b = p.parseWhitespace(b)
242
243
if len(b) > 0 && b[0] == '#' {
244
cref, rest, err := p.parseComment(b)
245
if cref != invalidReference {
246
p.builder.Chain(ref, cref)
247
}
248
return ref, rest, err
249
}
250
251
return ref, b, nil
252
}
253
254
func (p *Parser) parseTable(b []byte) (reference, []byte, error) {
255
// table = std-table / array-table
256
if len(b) > 1 && b[1] == '[' {
257
return p.parseArrayTable(b)
258
}
259
260
return p.parseStdTable(b)
261
}
262
263
func (p *Parser) parseArrayTable(b []byte) (reference, []byte, error) {
264
// array-table = array-table-open key array-table-close
265
// array-table-open = %x5B.5B ws ; [[ Double left square bracket
266
// array-table-close = ws %x5D.5D ; ]] Double right square bracket
267
ref := p.builder.Push(Node{
268
Kind: ArrayTable,
269
})
270
271
b = b[2:]
272
b = p.parseWhitespace(b)
273
274
k, b, err := p.parseKey(b)
275
if err != nil {
276
return ref, nil, err
277
}
278
279
p.builder.AttachChild(ref, k)
280
b = p.parseWhitespace(b)
281
282
b, err = expect(']', b)
283
if err != nil {
284
return ref, nil, err
285
}
286
287
b, err = expect(']', b)
288
289
return ref, b, err
290
}
291
292
func (p *Parser) parseStdTable(b []byte) (reference, []byte, error) {
293
// std-table = std-table-open key std-table-close
294
// std-table-open = %x5B ws ; [ Left square bracket
295
// std-table-close = ws %x5D ; ] Right square bracket
296
ref := p.builder.Push(Node{
297
Kind: Table,
298
})
299
300
b = b[1:]
301
b = p.parseWhitespace(b)
302
303
key, b, err := p.parseKey(b)
304
if err != nil {
305
return ref, nil, err
306
}
307
308
p.builder.AttachChild(ref, key)
309
310
b = p.parseWhitespace(b)
311
312
b, err = expect(']', b)
313
314
return ref, b, err
315
}
316
317
func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
318
// keyval = key keyval-sep val
319
ref := p.builder.Push(Node{
320
Kind: KeyValue,
321
})
322
323
key, b, err := p.parseKey(b)
324
if err != nil {
325
return invalidReference, nil, err
326
}
327
328
// keyval-sep = ws %x3D ws ; =
329
330
b = p.parseWhitespace(b)
331
332
if len(b) == 0 {
333
return invalidReference, nil, NewParserError(b, "expected = after a key, but the document ends there")
334
}
335
336
b, err = expect('=', b)
337
if err != nil {
338
return invalidReference, nil, err
339
}
340
341
b = p.parseWhitespace(b)
342
343
valRef, b, err := p.parseVal(b)
344
if err != nil {
345
return ref, b, err
346
}
347
348
p.builder.Chain(valRef, key)
349
p.builder.AttachChild(ref, valRef)
350
351
return ref, b, err
352
}
353
354
//nolint:cyclop,funlen
355
func (p *Parser) parseVal(b []byte) (reference, []byte, error) {
356
// val = string / boolean / array / inline-table / date-time / float / integer
357
ref := invalidReference
358
359
if len(b) == 0 {
360
return ref, nil, NewParserError(b, "expected value, not eof")
361
}
362
363
var err error
364
c := b[0]
365
366
switch c {
367
case '"':
368
var raw []byte
369
var v []byte
370
if scanFollowsMultilineBasicStringDelimiter(b) {
371
raw, v, b, err = p.parseMultilineBasicString(b)
372
} else {
373
raw, v, b, err = p.parseBasicString(b)
374
}
375
376
if err == nil {
377
ref = p.builder.Push(Node{
378
Kind: String,
379
Raw: p.Range(raw),
380
Data: v,
381
})
382
}
383
384
return ref, b, err
385
case '\'':
386
var raw []byte
387
var v []byte
388
if scanFollowsMultilineLiteralStringDelimiter(b) {
389
raw, v, b, err = p.parseMultilineLiteralString(b)
390
} else {
391
raw, v, b, err = p.parseLiteralString(b)
392
}
393
394
if err == nil {
395
ref = p.builder.Push(Node{
396
Kind: String,
397
Raw: p.Range(raw),
398
Data: v,
399
})
400
}
401
402
return ref, b, err
403
case 't':
404
if !scanFollowsTrue(b) {
405
return ref, nil, NewParserError(atmost(b, 4), "expected 'true'")
406
}
407
408
ref = p.builder.Push(Node{
409
Kind: Bool,
410
Data: b[:4],
411
})
412
413
return ref, b[4:], nil
414
case 'f':
415
if !scanFollowsFalse(b) {
416
return ref, nil, NewParserError(atmost(b, 5), "expected 'false'")
417
}
418
419
ref = p.builder.Push(Node{
420
Kind: Bool,
421
Data: b[:5],
422
})
423
424
return ref, b[5:], nil
425
case '[':
426
return p.parseValArray(b)
427
case '{':
428
return p.parseInlineTable(b)
429
default:
430
return p.parseIntOrFloatOrDateTime(b)
431
}
432
}
433
434
func atmost(b []byte, n int) []byte {
435
if n >= len(b) {
436
return b
437
}
438
439
return b[:n]
440
}
441
442
func (p *Parser) parseLiteralString(b []byte) ([]byte, []byte, []byte, error) {
443
v, rest, err := scanLiteralString(b)
444
if err != nil {
445
return nil, nil, nil, err
446
}
447
448
return v, v[1 : len(v)-1], rest, nil
449
}
450
451
func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) {
452
// inline-table = inline-table-open [ inline-table-keyvals ] inline-table-close
453
// inline-table-open = %x7B ws ; {
454
// inline-table-close = ws %x7D ; }
455
// inline-table-sep = ws %x2C ws ; , Comma
456
// inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ]
457
parent := p.builder.Push(Node{
458
Kind: InlineTable,
459
Raw: p.Range(b[:1]),
460
})
461
462
first := true
463
464
var child reference
465
466
b = b[1:]
467
468
var err error
469
470
for len(b) > 0 {
471
previousB := b
472
b = p.parseWhitespace(b)
473
474
if len(b) == 0 {
475
return parent, nil, NewParserError(previousB[:1], "inline table is incomplete")
476
}
477
478
if b[0] == '}' {
479
break
480
}
481
482
if !first {
483
b, err = expect(',', b)
484
if err != nil {
485
return parent, nil, err
486
}
487
b = p.parseWhitespace(b)
488
}
489
490
var kv reference
491
492
kv, b, err = p.parseKeyval(b)
493
if err != nil {
494
return parent, nil, err
495
}
496
497
if first {
498
p.builder.AttachChild(parent, kv)
499
} else {
500
p.builder.Chain(child, kv)
501
}
502
child = kv
503
504
first = false
505
}
506
507
rest, err := expect('}', b)
508
509
return parent, rest, err
510
}
511
512
//nolint:funlen,cyclop
513
func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
514
// array = array-open [ array-values ] ws-comment-newline array-close
515
// array-open = %x5B ; [
516
// array-close = %x5D ; ]
517
// array-values = ws-comment-newline val ws-comment-newline array-sep array-values
518
// array-values =/ ws-comment-newline val ws-comment-newline [ array-sep ]
519
// array-sep = %x2C ; , Comma
520
// ws-comment-newline = *( wschar / [ comment ] newline )
521
arrayStart := b
522
b = b[1:]
523
524
parent := p.builder.Push(Node{
525
Kind: Array,
526
})
527
528
// First indicates whether the parser is looking for the first element
529
// (non-comment) of the array.
530
first := true
531
532
lastChild := invalidReference
533
534
addChild := func(valueRef reference) {
535
if lastChild == invalidReference {
536
p.builder.AttachChild(parent, valueRef)
537
} else {
538
p.builder.Chain(lastChild, valueRef)
539
}
540
lastChild = valueRef
541
}
542
543
var err error
544
for len(b) > 0 {
545
cref := invalidReference
546
cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
547
if err != nil {
548
return parent, nil, err
549
}
550
551
if cref != invalidReference {
552
addChild(cref)
553
}
554
555
if len(b) == 0 {
556
return parent, nil, NewParserError(arrayStart[:1], "array is incomplete")
557
}
558
559
if b[0] == ']' {
560
break
561
}
562
563
if b[0] == ',' {
564
if first {
565
return parent, nil, NewParserError(b[0:1], "array cannot start with comma")
566
}
567
b = b[1:]
568
569
cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
570
if err != nil {
571
return parent, nil, err
572
}
573
if cref != invalidReference {
574
addChild(cref)
575
}
576
} else if !first {
577
return parent, nil, NewParserError(b[0:1], "array elements must be separated by commas")
578
}
579
580
// TOML allows trailing commas in arrays.
581
if len(b) > 0 && b[0] == ']' {
582
break
583
}
584
585
var valueRef reference
586
valueRef, b, err = p.parseVal(b)
587
if err != nil {
588
return parent, nil, err
589
}
590
591
addChild(valueRef)
592
593
cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
594
if err != nil {
595
return parent, nil, err
596
}
597
if cref != invalidReference {
598
addChild(cref)
599
}
600
601
first = false
602
}
603
604
rest, err := expect(']', b)
605
606
return parent, rest, err
607
}
608
609
func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) (reference, []byte, error) {
610
rootCommentRef := invalidReference
611
latestCommentRef := invalidReference
612
613
addComment := func(ref reference) {
614
if rootCommentRef == invalidReference {
615
rootCommentRef = ref
616
} else if latestCommentRef == invalidReference {
617
p.builder.AttachChild(rootCommentRef, ref)
618
latestCommentRef = ref
619
} else {
620
p.builder.Chain(latestCommentRef, ref)
621
latestCommentRef = ref
622
}
623
}
624
625
for len(b) > 0 {
626
var err error
627
b = p.parseWhitespace(b)
628
629
if len(b) > 0 && b[0] == '#' {
630
var ref reference
631
ref, b, err = p.parseComment(b)
632
if err != nil {
633
return invalidReference, nil, err
634
}
635
if ref != invalidReference {
636
addComment(ref)
637
}
638
}
639
640
if len(b) == 0 {
641
break
642
}
643
644
if b[0] == '\n' || b[0] == '\r' {
645
b, err = p.parseNewline(b)
646
if err != nil {
647
return invalidReference, nil, err
648
}
649
} else {
650
break
651
}
652
}
653
654
return rootCommentRef, b, nil
655
}
656
657
func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) {
658
token, rest, err := scanMultilineLiteralString(b)
659
if err != nil {
660
return nil, nil, nil, err
661
}
662
663
i := 3
664
665
// skip the immediate new line
666
if token[i] == '\n' {
667
i++
668
} else if token[i] == '\r' && token[i+1] == '\n' {
669
i += 2
670
}
671
672
return token, token[i : len(token)-3], rest, err
673
}
674
675
//nolint:funlen,gocognit,cyclop
676
func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, error) {
677
// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
678
// ml-basic-string-delim
679
// ml-basic-string-delim = 3quotation-mark
680
// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
681
//
682
// mlb-content = mlb-char / newline / mlb-escaped-nl
683
// mlb-char = mlb-unescaped / escaped
684
// mlb-quotes = 1*2quotation-mark
685
// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
686
// mlb-escaped-nl = escape ws newline *( wschar / newline )
687
token, escaped, rest, err := scanMultilineBasicString(b)
688
if err != nil {
689
return nil, nil, nil, err
690
}
691
692
i := 3
693
694
// skip the immediate new line
695
if token[i] == '\n' {
696
i++
697
} else if token[i] == '\r' && token[i+1] == '\n' {
698
i += 2
699
}
700
701
// fast path
702
startIdx := i
703
endIdx := len(token) - len(`"""`)
704
705
if !escaped {
706
str := token[startIdx:endIdx]
707
verr := characters.Utf8TomlValidAlreadyEscaped(str)
708
if verr.Zero() {
709
return token, str, rest, nil
710
}
711
return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
712
}
713
714
var builder bytes.Buffer
715
716
// The scanner ensures that the token starts and ends with quotes and that
717
// escapes are balanced.
718
for i < len(token)-3 {
719
c := token[i]
720
721
//nolint:nestif
722
if c == '\\' {
723
// When the last non-whitespace character on a line is an unescaped \,
724
// it will be trimmed along with all whitespace (including newlines) up
725
// to the next non-whitespace character or closing delimiter.
726
727
isLastNonWhitespaceOnLine := false
728
j := 1
729
findEOLLoop:
730
for ; j < len(token)-3-i; j++ {
731
switch token[i+j] {
732
case ' ', '\t':
733
continue
734
case '\r':
735
if token[i+j+1] == '\n' {
736
continue
737
}
738
case '\n':
739
isLastNonWhitespaceOnLine = true
740
}
741
break findEOLLoop
742
}
743
if isLastNonWhitespaceOnLine {
744
i += j
745
for ; i < len(token)-3; i++ {
746
c := token[i]
747
if !(c == '\n' || c == '\r' || c == ' ' || c == '\t') {
748
i--
749
break
750
}
751
}
752
i++
753
continue
754
}
755
756
// handle escaping
757
i++
758
c = token[i]
759
760
switch c {
761
case '"', '\\':
762
builder.WriteByte(c)
763
case 'b':
764
builder.WriteByte('\b')
765
case 'f':
766
builder.WriteByte('\f')
767
case 'n':
768
builder.WriteByte('\n')
769
case 'r':
770
builder.WriteByte('\r')
771
case 't':
772
builder.WriteByte('\t')
773
case 'e':
774
builder.WriteByte(0x1B)
775
case 'u':
776
x, err := hexToRune(atmost(token[i+1:], 4), 4)
777
if err != nil {
778
return nil, nil, nil, err
779
}
780
builder.WriteRune(x)
781
i += 4
782
case 'U':
783
x, err := hexToRune(atmost(token[i+1:], 8), 8)
784
if err != nil {
785
return nil, nil, nil, err
786
}
787
788
builder.WriteRune(x)
789
i += 8
790
default:
791
return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
792
}
793
i++
794
} else {
795
size := characters.Utf8ValidNext(token[i:])
796
if size == 0 {
797
return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
798
}
799
builder.Write(token[i : i+size])
800
i += size
801
}
802
}
803
804
return token, builder.Bytes(), rest, nil
805
}
806
807
func (p *Parser) parseKey(b []byte) (reference, []byte, error) {
808
// key = simple-key / dotted-key
809
// simple-key = quoted-key / unquoted-key
810
//
811
// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
812
// quoted-key = basic-string / literal-string
813
// dotted-key = simple-key 1*( dot-sep simple-key )
814
//
815
// dot-sep = ws %x2E ws ; . Period
816
raw, key, b, err := p.parseSimpleKey(b)
817
if err != nil {
818
return invalidReference, nil, err
819
}
820
821
ref := p.builder.Push(Node{
822
Kind: Key,
823
Raw: p.Range(raw),
824
Data: key,
825
})
826
827
for {
828
b = p.parseWhitespace(b)
829
if len(b) > 0 && b[0] == '.' {
830
b = p.parseWhitespace(b[1:])
831
832
raw, key, b, err = p.parseSimpleKey(b)
833
if err != nil {
834
return ref, nil, err
835
}
836
837
p.builder.PushAndChain(Node{
838
Kind: Key,
839
Raw: p.Range(raw),
840
Data: key,
841
})
842
} else {
843
break
844
}
845
}
846
847
return ref, b, nil
848
}
849
850
func (p *Parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) {
851
if len(b) == 0 {
852
return nil, nil, nil, NewParserError(b, "expected key but found none")
853
}
854
855
// simple-key = quoted-key / unquoted-key
856
// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
857
// quoted-key = basic-string / literal-string
858
switch {
859
case b[0] == '\'':
860
return p.parseLiteralString(b)
861
case b[0] == '"':
862
return p.parseBasicString(b)
863
case isUnquotedKeyChar(b[0]):
864
key, rest = scanUnquotedKey(b)
865
return key, key, rest, nil
866
default:
867
return nil, nil, nil, NewParserError(b[0:1], "invalid character at start of key: %c", b[0])
868
}
869
}
870
871
//nolint:funlen,cyclop
872
func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
873
// basic-string = quotation-mark *basic-char quotation-mark
874
// quotation-mark = %x22 ; "
875
// basic-char = basic-unescaped / escaped
876
// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
877
// escaped = escape escape-seq-char
878
// escape-seq-char = %x22 ; " quotation mark U+0022
879
// escape-seq-char =/ %x5C ; \ reverse solidus U+005C
880
// escape-seq-char =/ %x62 ; b backspace U+0008
881
// escape-seq-char =/ %x66 ; f form feed U+000C
882
// escape-seq-char =/ %x6E ; n line feed U+000A
883
// escape-seq-char =/ %x72 ; r carriage return U+000D
884
// escape-seq-char =/ %x74 ; t tab U+0009
885
// escape-seq-char =/ %x75 4HEXDIG ; uXXXX U+XXXX
886
// escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX
887
token, escaped, rest, err := scanBasicString(b)
888
if err != nil {
889
return nil, nil, nil, err
890
}
891
892
startIdx := len(`"`)
893
endIdx := len(token) - len(`"`)
894
895
// Fast path. If there is no escape sequence, the string should just be
896
// an UTF-8 encoded string, which is the same as Go. In that case,
897
// validate the string and return a direct reference to the buffer.
898
if !escaped {
899
str := token[startIdx:endIdx]
900
verr := characters.Utf8TomlValidAlreadyEscaped(str)
901
if verr.Zero() {
902
return token, str, rest, nil
903
}
904
return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
905
}
906
907
i := startIdx
908
909
var builder bytes.Buffer
910
911
// The scanner ensures that the token starts and ends with quotes and that
912
// escapes are balanced.
913
for i < len(token)-1 {
914
c := token[i]
915
if c == '\\' {
916
i++
917
c = token[i]
918
919
switch c {
920
case '"', '\\':
921
builder.WriteByte(c)
922
case 'b':
923
builder.WriteByte('\b')
924
case 'f':
925
builder.WriteByte('\f')
926
case 'n':
927
builder.WriteByte('\n')
928
case 'r':
929
builder.WriteByte('\r')
930
case 't':
931
builder.WriteByte('\t')
932
case 'e':
933
builder.WriteByte(0x1B)
934
case 'u':
935
x, err := hexToRune(token[i+1:len(token)-1], 4)
936
if err != nil {
937
return nil, nil, nil, err
938
}
939
940
builder.WriteRune(x)
941
i += 4
942
case 'U':
943
x, err := hexToRune(token[i+1:len(token)-1], 8)
944
if err != nil {
945
return nil, nil, nil, err
946
}
947
948
builder.WriteRune(x)
949
i += 8
950
default:
951
return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
952
}
953
i++
954
} else {
955
size := characters.Utf8ValidNext(token[i:])
956
if size == 0 {
957
return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
958
}
959
builder.Write(token[i : i+size])
960
i += size
961
}
962
}
963
964
return token, builder.Bytes(), rest, nil
965
}
966
967
func hexToRune(b []byte, length int) (rune, error) {
968
if len(b) < length {
969
return -1, NewParserError(b, "unicode point needs %d character, not %d", length, len(b))
970
}
971
b = b[:length]
972
973
var r uint32
974
for i, c := range b {
975
d := uint32(0)
976
switch {
977
case '0' <= c && c <= '9':
978
d = uint32(c - '0')
979
case 'a' <= c && c <= 'f':
980
d = uint32(c - 'a' + 10)
981
case 'A' <= c && c <= 'F':
982
d = uint32(c - 'A' + 10)
983
default:
984
return -1, NewParserError(b[i:i+1], "non-hex character")
985
}
986
r = r*16 + d
987
}
988
989
if r > unicode.MaxRune || 0xD800 <= r && r < 0xE000 {
990
return -1, NewParserError(b, "escape sequence is invalid Unicode code point")
991
}
992
993
return rune(r), nil
994
}
995
996
func (p *Parser) parseWhitespace(b []byte) []byte {
997
// ws = *wschar
998
// wschar = %x20 ; Space
999
// wschar =/ %x09 ; Horizontal tab
1000
_, rest := scanWhitespace(b)
1001
1002
return rest
1003
}
1004
1005
//nolint:cyclop
1006
func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error) {
1007
switch b[0] {
1008
case 'i':
1009
if !scanFollowsInf(b) {
1010
return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'inf'")
1011
}
1012
1013
return p.builder.Push(Node{
1014
Kind: Float,
1015
Data: b[:3],
1016
Raw: p.Range(b[:3]),
1017
}), b[3:], nil
1018
case 'n':
1019
if !scanFollowsNan(b) {
1020
return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'nan'")
1021
}
1022
1023
return p.builder.Push(Node{
1024
Kind: Float,
1025
Data: b[:3],
1026
Raw: p.Range(b[:3]),
1027
}), b[3:], nil
1028
case '+', '-':
1029
return p.scanIntOrFloat(b)
1030
}
1031
1032
if len(b) < 3 {
1033
return p.scanIntOrFloat(b)
1034
}
1035
1036
s := 5
1037
if len(b) < s {
1038
s = len(b)
1039
}
1040
1041
for idx, c := range b[:s] {
1042
if isDigit(c) {
1043
continue
1044
}
1045
1046
if idx == 2 && c == ':' || (idx == 4 && c == '-') {
1047
return p.scanDateTime(b)
1048
}
1049
1050
break
1051
}
1052
1053
return p.scanIntOrFloat(b)
1054
}
1055
1056
func (p *Parser) scanDateTime(b []byte) (reference, []byte, error) {
1057
// scans for contiguous characters in [0-9T:Z.+-], and up to one space if
1058
// followed by a digit.
1059
hasDate := false
1060
hasTime := false
1061
hasTz := false
1062
seenSpace := false
1063
1064
i := 0
1065
byteLoop:
1066
for ; i < len(b); i++ {
1067
c := b[i]
1068
1069
switch {
1070
case isDigit(c):
1071
case c == '-':
1072
hasDate = true
1073
const minOffsetOfTz = 8
1074
if i >= minOffsetOfTz {
1075
hasTz = true
1076
}
1077
case c == 'T' || c == 't' || c == ':' || c == '.':
1078
hasTime = true
1079
case c == '+' || c == '-' || c == 'Z' || c == 'z':
1080
hasTz = true
1081
case c == ' ':
1082
if !seenSpace && i+1 < len(b) && isDigit(b[i+1]) {
1083
i += 2
1084
// Avoid reaching past the end of the document in case the time
1085
// is malformed. See TestIssue585.
1086
if i >= len(b) {
1087
i--
1088
}
1089
seenSpace = true
1090
hasTime = true
1091
} else {
1092
break byteLoop
1093
}
1094
default:
1095
break byteLoop
1096
}
1097
}
1098
1099
var kind Kind
1100
1101
if hasTime {
1102
if hasDate {
1103
if hasTz {
1104
kind = DateTime
1105
} else {
1106
kind = LocalDateTime
1107
}
1108
} else {
1109
kind = LocalTime
1110
}
1111
} else {
1112
kind = LocalDate
1113
}
1114
1115
return p.builder.Push(Node{
1116
Kind: kind,
1117
Data: b[:i],
1118
}), b[i:], nil
1119
}
1120
1121
//nolint:funlen,gocognit,cyclop
1122
func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) {
1123
i := 0
1124
1125
if len(b) > 2 && b[0] == '0' && b[1] != '.' && b[1] != 'e' && b[1] != 'E' {
1126
var isValidRune validRuneFn
1127
1128
switch b[1] {
1129
case 'x':
1130
isValidRune = isValidHexRune
1131
case 'o':
1132
isValidRune = isValidOctalRune
1133
case 'b':
1134
isValidRune = isValidBinaryRune
1135
default:
1136
i++
1137
}
1138
1139
if isValidRune != nil {
1140
i += 2
1141
for ; i < len(b); i++ {
1142
if !isValidRune(b[i]) {
1143
break
1144
}
1145
}
1146
}
1147
1148
return p.builder.Push(Node{
1149
Kind: Integer,
1150
Data: b[:i],
1151
Raw: p.Range(b[:i]),
1152
}), b[i:], nil
1153
}
1154
1155
isFloat := false
1156
1157
for ; i < len(b); i++ {
1158
c := b[i]
1159
1160
if c >= '0' && c <= '9' || c == '+' || c == '-' || c == '_' {
1161
continue
1162
}
1163
1164
if c == '.' || c == 'e' || c == 'E' {
1165
isFloat = true
1166
1167
continue
1168
}
1169
1170
if c == 'i' {
1171
if scanFollowsInf(b[i:]) {
1172
return p.builder.Push(Node{
1173
Kind: Float,
1174
Data: b[:i+3],
1175
Raw: p.Range(b[:i+3]),
1176
}), b[i+3:], nil
1177
}
1178
1179
return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'i' while scanning for a number")
1180
}
1181
1182
if c == 'n' {
1183
if scanFollowsNan(b[i:]) {
1184
return p.builder.Push(Node{
1185
Kind: Float,
1186
Data: b[:i+3],
1187
Raw: p.Range(b[:i+3]),
1188
}), b[i+3:], nil
1189
}
1190
1191
return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'n' while scanning for a number")
1192
}
1193
1194
break
1195
}
1196
1197
if i == 0 {
1198
return invalidReference, b, NewParserError(b, "incomplete number")
1199
}
1200
1201
kind := Integer
1202
1203
if isFloat {
1204
kind = Float
1205
}
1206
1207
return p.builder.Push(Node{
1208
Kind: kind,
1209
Data: b[:i],
1210
Raw: p.Range(b[:i]),
1211
}), b[i:], nil
1212
}
1213
1214
func isDigit(r byte) bool {
1215
return r >= '0' && r <= '9'
1216
}
1217
1218
type validRuneFn func(r byte) bool
1219
1220
func isValidHexRune(r byte) bool {
1221
return r >= 'a' && r <= 'f' ||
1222
r >= 'A' && r <= 'F' ||
1223
r >= '0' && r <= '9' ||
1224
r == '_'
1225
}
1226
1227
func isValidOctalRune(r byte) bool {
1228
return r >= '0' && r <= '7' || r == '_'
1229
}
1230
1231
func isValidBinaryRune(r byte) bool {
1232
return r == '0' || r == '1' || r == '_'
1233
}
1234
1235
func expect(x byte, b []byte) ([]byte, error) {
1236
if len(b) == 0 {
1237
return nil, NewParserError(b, "expected character %c but the document ended here", x)
1238
}
1239
1240
if b[0] != x {
1241
return nil, NewParserError(b[0:1], "expected character %c", x)
1242
}
1243
1244
return b[1:], nil
1245
}
1246
1247