Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
kardolus
GitHub Repository: kardolus/chatgpt-cli
Path: blob/main/vendor/golang.org/x/text/runes/runes.go
2880 views
1
// Copyright 2014 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
// Package runes provide transforms for UTF-8 encoded text.
6
package runes // import "golang.org/x/text/runes"
7
8
import (
9
"unicode"
10
"unicode/utf8"
11
12
"golang.org/x/text/transform"
13
)
14
15
// A Set is a collection of runes.
16
type Set interface {
17
// Contains returns true if r is contained in the set.
18
Contains(r rune) bool
19
}
20
21
type setFunc func(rune) bool
22
23
func (s setFunc) Contains(r rune) bool {
24
return s(r)
25
}
26
27
// Note: using funcs here instead of wrapping types result in cleaner
28
// documentation and a smaller API.
29
30
// In creates a Set with a Contains method that returns true for all runes in
31
// the given RangeTable.
32
func In(rt *unicode.RangeTable) Set {
33
return setFunc(func(r rune) bool { return unicode.Is(rt, r) })
34
}
35
36
// NotIn creates a Set with a Contains method that returns true for all runes not
37
// in the given RangeTable.
38
func NotIn(rt *unicode.RangeTable) Set {
39
return setFunc(func(r rune) bool { return !unicode.Is(rt, r) })
40
}
41
42
// Predicate creates a Set with a Contains method that returns f(r).
43
func Predicate(f func(rune) bool) Set {
44
return setFunc(f)
45
}
46
47
// Transformer implements the transform.Transformer interface.
48
type Transformer struct {
49
t transform.SpanningTransformer
50
}
51
52
func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
53
return t.t.Transform(dst, src, atEOF)
54
}
55
56
func (t Transformer) Span(b []byte, atEOF bool) (n int, err error) {
57
return t.t.Span(b, atEOF)
58
}
59
60
func (t Transformer) Reset() { t.t.Reset() }
61
62
// Bytes returns a new byte slice with the result of converting b using t. It
63
// calls Reset on t. It returns nil if any error was found. This can only happen
64
// if an error-producing Transformer is passed to If.
65
func (t Transformer) Bytes(b []byte) []byte {
66
b, _, err := transform.Bytes(t, b)
67
if err != nil {
68
return nil
69
}
70
return b
71
}
72
73
// String returns a string with the result of converting s using t. It calls
74
// Reset on t. It returns the empty string if any error was found. This can only
75
// happen if an error-producing Transformer is passed to If.
76
func (t Transformer) String(s string) string {
77
s, _, err := transform.String(t, s)
78
if err != nil {
79
return ""
80
}
81
return s
82
}
83
84
// TODO:
85
// - Copy: copying strings and bytes in whole-rune units.
86
// - Validation (maybe)
87
// - Well-formed-ness (maybe)
88
89
const runeErrorString = string(utf8.RuneError)
90
91
// Remove returns a Transformer that removes runes r for which s.Contains(r).
92
// Illegal input bytes are replaced by RuneError before being passed to f.
93
func Remove(s Set) Transformer {
94
if f, ok := s.(setFunc); ok {
95
// This little trick cuts the running time of BenchmarkRemove for sets
96
// created by Predicate roughly in half.
97
// TODO: special-case RangeTables as well.
98
return Transformer{remove(f)}
99
}
100
return Transformer{remove(s.Contains)}
101
}
102
103
// TODO: remove transform.RemoveFunc.
104
105
type remove func(r rune) bool
106
107
func (remove) Reset() {}
108
109
// Span implements transform.Spanner.
110
func (t remove) Span(src []byte, atEOF bool) (n int, err error) {
111
for r, size := rune(0), 0; n < len(src); {
112
if r = rune(src[n]); r < utf8.RuneSelf {
113
size = 1
114
} else if r, size = utf8.DecodeRune(src[n:]); size == 1 {
115
// Invalid rune.
116
if !atEOF && !utf8.FullRune(src[n:]) {
117
err = transform.ErrShortSrc
118
} else {
119
err = transform.ErrEndOfSpan
120
}
121
break
122
}
123
if t(r) {
124
err = transform.ErrEndOfSpan
125
break
126
}
127
n += size
128
}
129
return
130
}
131
132
// Transform implements transform.Transformer.
133
func (t remove) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
134
for r, size := rune(0), 0; nSrc < len(src); {
135
if r = rune(src[nSrc]); r < utf8.RuneSelf {
136
size = 1
137
} else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 {
138
// Invalid rune.
139
if !atEOF && !utf8.FullRune(src[nSrc:]) {
140
err = transform.ErrShortSrc
141
break
142
}
143
// We replace illegal bytes with RuneError. Not doing so might
144
// otherwise turn a sequence of invalid UTF-8 into valid UTF-8.
145
// The resulting byte sequence may subsequently contain runes
146
// for which t(r) is true that were passed unnoticed.
147
if !t(utf8.RuneError) {
148
if nDst+3 > len(dst) {
149
err = transform.ErrShortDst
150
break
151
}
152
dst[nDst+0] = runeErrorString[0]
153
dst[nDst+1] = runeErrorString[1]
154
dst[nDst+2] = runeErrorString[2]
155
nDst += 3
156
}
157
nSrc++
158
continue
159
}
160
if t(r) {
161
nSrc += size
162
continue
163
}
164
if nDst+size > len(dst) {
165
err = transform.ErrShortDst
166
break
167
}
168
for i := 0; i < size; i++ {
169
dst[nDst] = src[nSrc]
170
nDst++
171
nSrc++
172
}
173
}
174
return
175
}
176
177
// Map returns a Transformer that maps the runes in the input using the given
178
// mapping. Illegal bytes in the input are converted to utf8.RuneError before
179
// being passed to the mapping func.
180
func Map(mapping func(rune) rune) Transformer {
181
return Transformer{mapper(mapping)}
182
}
183
184
type mapper func(rune) rune
185
186
func (mapper) Reset() {}
187
188
// Span implements transform.Spanner.
189
func (t mapper) Span(src []byte, atEOF bool) (n int, err error) {
190
for r, size := rune(0), 0; n < len(src); n += size {
191
if r = rune(src[n]); r < utf8.RuneSelf {
192
size = 1
193
} else if r, size = utf8.DecodeRune(src[n:]); size == 1 {
194
// Invalid rune.
195
if !atEOF && !utf8.FullRune(src[n:]) {
196
err = transform.ErrShortSrc
197
} else {
198
err = transform.ErrEndOfSpan
199
}
200
break
201
}
202
if t(r) != r {
203
err = transform.ErrEndOfSpan
204
break
205
}
206
}
207
return n, err
208
}
209
210
// Transform implements transform.Transformer.
211
func (t mapper) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
212
var replacement rune
213
var b [utf8.UTFMax]byte
214
215
for r, size := rune(0), 0; nSrc < len(src); {
216
if r = rune(src[nSrc]); r < utf8.RuneSelf {
217
if replacement = t(r); replacement < utf8.RuneSelf {
218
if nDst == len(dst) {
219
err = transform.ErrShortDst
220
break
221
}
222
dst[nDst] = byte(replacement)
223
nDst++
224
nSrc++
225
continue
226
}
227
size = 1
228
} else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 {
229
// Invalid rune.
230
if !atEOF && !utf8.FullRune(src[nSrc:]) {
231
err = transform.ErrShortSrc
232
break
233
}
234
235
if replacement = t(utf8.RuneError); replacement == utf8.RuneError {
236
if nDst+3 > len(dst) {
237
err = transform.ErrShortDst
238
break
239
}
240
dst[nDst+0] = runeErrorString[0]
241
dst[nDst+1] = runeErrorString[1]
242
dst[nDst+2] = runeErrorString[2]
243
nDst += 3
244
nSrc++
245
continue
246
}
247
} else if replacement = t(r); replacement == r {
248
if nDst+size > len(dst) {
249
err = transform.ErrShortDst
250
break
251
}
252
for i := 0; i < size; i++ {
253
dst[nDst] = src[nSrc]
254
nDst++
255
nSrc++
256
}
257
continue
258
}
259
260
n := utf8.EncodeRune(b[:], replacement)
261
262
if nDst+n > len(dst) {
263
err = transform.ErrShortDst
264
break
265
}
266
for i := 0; i < n; i++ {
267
dst[nDst] = b[i]
268
nDst++
269
}
270
nSrc += size
271
}
272
return
273
}
274
275
// ReplaceIllFormed returns a transformer that replaces all input bytes that are
276
// not part of a well-formed UTF-8 code sequence with utf8.RuneError.
277
func ReplaceIllFormed() Transformer {
278
return Transformer{&replaceIllFormed{}}
279
}
280
281
type replaceIllFormed struct{ transform.NopResetter }
282
283
func (t replaceIllFormed) Span(src []byte, atEOF bool) (n int, err error) {
284
for n < len(src) {
285
// ASCII fast path.
286
if src[n] < utf8.RuneSelf {
287
n++
288
continue
289
}
290
291
r, size := utf8.DecodeRune(src[n:])
292
293
// Look for a valid non-ASCII rune.
294
if r != utf8.RuneError || size != 1 {
295
n += size
296
continue
297
}
298
299
// Look for short source data.
300
if !atEOF && !utf8.FullRune(src[n:]) {
301
err = transform.ErrShortSrc
302
break
303
}
304
305
// We have an invalid rune.
306
err = transform.ErrEndOfSpan
307
break
308
}
309
return n, err
310
}
311
312
func (t replaceIllFormed) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
313
for nSrc < len(src) {
314
// ASCII fast path.
315
if r := src[nSrc]; r < utf8.RuneSelf {
316
if nDst == len(dst) {
317
err = transform.ErrShortDst
318
break
319
}
320
dst[nDst] = r
321
nDst++
322
nSrc++
323
continue
324
}
325
326
// Look for a valid non-ASCII rune.
327
if _, size := utf8.DecodeRune(src[nSrc:]); size != 1 {
328
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
329
err = transform.ErrShortDst
330
break
331
}
332
nDst += size
333
nSrc += size
334
continue
335
}
336
337
// Look for short source data.
338
if !atEOF && !utf8.FullRune(src[nSrc:]) {
339
err = transform.ErrShortSrc
340
break
341
}
342
343
// We have an invalid rune.
344
if nDst+3 > len(dst) {
345
err = transform.ErrShortDst
346
break
347
}
348
dst[nDst+0] = runeErrorString[0]
349
dst[nDst+1] = runeErrorString[1]
350
dst[nDst+2] = runeErrorString[2]
351
nDst += 3
352
nSrc++
353
}
354
return nDst, nSrc, err
355
}
356
357