Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
kardolus
GitHub Repository: kardolus/chatgpt-cli
Path: blob/main/vendor/golang.org/x/text/cases/map.go
2880 views
1
// Copyright 2014 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
package cases
6
7
// This file contains the definitions of case mappings for all supported
8
// languages. The rules for the language-specific tailorings were taken and
9
// modified from the CLDR transform definitions in common/transforms.
10
11
import (
12
"strings"
13
"unicode"
14
"unicode/utf8"
15
16
"golang.org/x/text/internal"
17
"golang.org/x/text/language"
18
"golang.org/x/text/transform"
19
"golang.org/x/text/unicode/norm"
20
)
21
22
// A mapFunc takes a context set to the current rune and writes the mapped
23
// version to the same context. It may advance the context to the next rune. It
24
// returns whether a checkpoint is possible: whether the pDst bytes written to
25
// dst so far won't need changing as we see more source bytes.
26
type mapFunc func(*context) bool
27
28
// A spanFunc takes a context set to the current rune and returns whether this
29
// rune would be altered when written to the output. It may advance the context
30
// to the next rune. It returns whether a checkpoint is possible.
31
type spanFunc func(*context) bool
32
33
// maxIgnorable defines the maximum number of ignorables to consider for
34
// lookahead operations.
35
const maxIgnorable = 30
36
37
// supported lists the language tags for which we have tailorings.
38
const supported = "und af az el lt nl tr"
39
40
func init() {
41
tags := []language.Tag{}
42
for _, s := range strings.Split(supported, " ") {
43
tags = append(tags, language.MustParse(s))
44
}
45
matcher = internal.NewInheritanceMatcher(tags)
46
Supported = language.NewCoverage(tags)
47
}
48
49
var (
50
matcher *internal.InheritanceMatcher
51
52
Supported language.Coverage
53
54
// We keep the following lists separate, instead of having a single per-
55
// language struct, to give the compiler a chance to remove unused code.
56
57
// Some uppercase mappers are stateless, so we can precompute the
58
// Transformers and save a bit on runtime allocations.
59
upperFunc = []struct {
60
upper mapFunc
61
span spanFunc
62
}{
63
{nil, nil}, // und
64
{nil, nil}, // af
65
{aztrUpper(upper), isUpper}, // az
66
{elUpper, noSpan}, // el
67
{ltUpper(upper), noSpan}, // lt
68
{nil, nil}, // nl
69
{aztrUpper(upper), isUpper}, // tr
70
}
71
72
undUpper transform.SpanningTransformer = &undUpperCaser{}
73
undLower transform.SpanningTransformer = &undLowerCaser{}
74
undLowerIgnoreSigma transform.SpanningTransformer = &undLowerIgnoreSigmaCaser{}
75
76
lowerFunc = []mapFunc{
77
nil, // und
78
nil, // af
79
aztrLower, // az
80
nil, // el
81
ltLower, // lt
82
nil, // nl
83
aztrLower, // tr
84
}
85
86
titleInfos = []struct {
87
title mapFunc
88
lower mapFunc
89
titleSpan spanFunc
90
rewrite func(*context)
91
}{
92
{title, lower, isTitle, nil}, // und
93
{title, lower, isTitle, afnlRewrite}, // af
94
{aztrUpper(title), aztrLower, isTitle, nil}, // az
95
{title, lower, isTitle, nil}, // el
96
{ltUpper(title), ltLower, noSpan, nil}, // lt
97
{nlTitle, lower, nlTitleSpan, afnlRewrite}, // nl
98
{aztrUpper(title), aztrLower, isTitle, nil}, // tr
99
}
100
)
101
102
func makeUpper(t language.Tag, o options) transform.SpanningTransformer {
103
_, i, _ := matcher.Match(t)
104
f := upperFunc[i].upper
105
if f == nil {
106
return undUpper
107
}
108
return &simpleCaser{f: f, span: upperFunc[i].span}
109
}
110
111
func makeLower(t language.Tag, o options) transform.SpanningTransformer {
112
_, i, _ := matcher.Match(t)
113
f := lowerFunc[i]
114
if f == nil {
115
if o.ignoreFinalSigma {
116
return undLowerIgnoreSigma
117
}
118
return undLower
119
}
120
if o.ignoreFinalSigma {
121
return &simpleCaser{f: f, span: isLower}
122
}
123
return &lowerCaser{
124
first: f,
125
midWord: finalSigma(f),
126
}
127
}
128
129
func makeTitle(t language.Tag, o options) transform.SpanningTransformer {
130
_, i, _ := matcher.Match(t)
131
x := &titleInfos[i]
132
lower := x.lower
133
if o.noLower {
134
lower = (*context).copy
135
} else if !o.ignoreFinalSigma {
136
lower = finalSigma(lower)
137
}
138
return &titleCaser{
139
title: x.title,
140
lower: lower,
141
titleSpan: x.titleSpan,
142
rewrite: x.rewrite,
143
}
144
}
145
146
func noSpan(c *context) bool {
147
c.err = transform.ErrEndOfSpan
148
return false
149
}
150
151
// TODO: consider a similar special case for the fast majority lower case. This
152
// is a bit more involved so will require some more precise benchmarking to
153
// justify it.
154
155
type undUpperCaser struct{ transform.NopResetter }
156
157
// undUpperCaser implements the Transformer interface for doing an upper case
158
// mapping for the root locale (und). It eliminates the need for an allocation
159
// as it prevents escaping by not using function pointers.
160
func (t undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
161
c := context{dst: dst, src: src, atEOF: atEOF}
162
for c.next() {
163
upper(&c)
164
c.checkpoint()
165
}
166
return c.ret()
167
}
168
169
func (t undUpperCaser) Span(src []byte, atEOF bool) (n int, err error) {
170
c := context{src: src, atEOF: atEOF}
171
for c.next() && isUpper(&c) {
172
c.checkpoint()
173
}
174
return c.retSpan()
175
}
176
177
// undLowerIgnoreSigmaCaser implements the Transformer interface for doing
178
// a lower case mapping for the root locale (und) ignoring final sigma
179
// handling. This casing algorithm is used in some performance-critical packages
180
// like secure/precis and x/net/http/idna, which warrants its special-casing.
181
type undLowerIgnoreSigmaCaser struct{ transform.NopResetter }
182
183
func (t undLowerIgnoreSigmaCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
184
c := context{dst: dst, src: src, atEOF: atEOF}
185
for c.next() && lower(&c) {
186
c.checkpoint()
187
}
188
return c.ret()
189
190
}
191
192
// Span implements a generic lower-casing. This is possible as isLower works
193
// for all lowercasing variants. All lowercase variants only vary in how they
194
// transform a non-lowercase letter. They will never change an already lowercase
195
// letter. In addition, there is no state.
196
func (t undLowerIgnoreSigmaCaser) Span(src []byte, atEOF bool) (n int, err error) {
197
c := context{src: src, atEOF: atEOF}
198
for c.next() && isLower(&c) {
199
c.checkpoint()
200
}
201
return c.retSpan()
202
}
203
204
type simpleCaser struct {
205
context
206
f mapFunc
207
span spanFunc
208
}
209
210
// simpleCaser implements the Transformer interface for doing a case operation
211
// on a rune-by-rune basis.
212
func (t *simpleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
213
c := context{dst: dst, src: src, atEOF: atEOF}
214
for c.next() && t.f(&c) {
215
c.checkpoint()
216
}
217
return c.ret()
218
}
219
220
func (t *simpleCaser) Span(src []byte, atEOF bool) (n int, err error) {
221
c := context{src: src, atEOF: atEOF}
222
for c.next() && t.span(&c) {
223
c.checkpoint()
224
}
225
return c.retSpan()
226
}
227
228
// undLowerCaser implements the Transformer interface for doing a lower case
229
// mapping for the root locale (und) ignoring final sigma handling. This casing
230
// algorithm is used in some performance-critical packages like secure/precis
231
// and x/net/http/idna, which warrants its special-casing.
232
type undLowerCaser struct{ transform.NopResetter }
233
234
func (t undLowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
235
c := context{dst: dst, src: src, atEOF: atEOF}
236
237
for isInterWord := true; c.next(); {
238
if isInterWord {
239
if c.info.isCased() {
240
if !lower(&c) {
241
break
242
}
243
isInterWord = false
244
} else if !c.copy() {
245
break
246
}
247
} else {
248
if c.info.isNotCasedAndNotCaseIgnorable() {
249
if !c.copy() {
250
break
251
}
252
isInterWord = true
253
} else if !c.hasPrefix("Σ") {
254
if !lower(&c) {
255
break
256
}
257
} else if !finalSigmaBody(&c) {
258
break
259
}
260
}
261
c.checkpoint()
262
}
263
return c.ret()
264
}
265
266
func (t undLowerCaser) Span(src []byte, atEOF bool) (n int, err error) {
267
c := context{src: src, atEOF: atEOF}
268
for c.next() && isLower(&c) {
269
c.checkpoint()
270
}
271
return c.retSpan()
272
}
273
274
// lowerCaser implements the Transformer interface. The default Unicode lower
275
// casing requires different treatment for the first and subsequent characters
276
// of a word, most notably to handle the Greek final Sigma.
277
type lowerCaser struct {
278
undLowerIgnoreSigmaCaser
279
280
context
281
282
first, midWord mapFunc
283
}
284
285
func (t *lowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
286
t.context = context{dst: dst, src: src, atEOF: atEOF}
287
c := &t.context
288
289
for isInterWord := true; c.next(); {
290
if isInterWord {
291
if c.info.isCased() {
292
if !t.first(c) {
293
break
294
}
295
isInterWord = false
296
} else if !c.copy() {
297
break
298
}
299
} else {
300
if c.info.isNotCasedAndNotCaseIgnorable() {
301
if !c.copy() {
302
break
303
}
304
isInterWord = true
305
} else if !t.midWord(c) {
306
break
307
}
308
}
309
c.checkpoint()
310
}
311
return c.ret()
312
}
313
314
// titleCaser implements the Transformer interface. Title casing algorithms
315
// distinguish between the first letter of a word and subsequent letters of the
316
// same word. It uses state to avoid requiring a potentially infinite lookahead.
317
type titleCaser struct {
318
context
319
320
// rune mappings used by the actual casing algorithms.
321
title mapFunc
322
lower mapFunc
323
titleSpan spanFunc
324
325
rewrite func(*context)
326
}
327
328
// Transform implements the standard Unicode title case algorithm as defined in
329
// Chapter 3 of The Unicode Standard:
330
// toTitlecase(X): Find the word boundaries in X according to Unicode Standard
331
// Annex #29, "Unicode Text Segmentation." For each word boundary, find the
332
// first cased character F following the word boundary. If F exists, map F to
333
// Titlecase_Mapping(F); then map all characters C between F and the following
334
// word boundary to Lowercase_Mapping(C).
335
func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
336
t.context = context{dst: dst, src: src, atEOF: atEOF, isMidWord: t.isMidWord}
337
c := &t.context
338
339
if !c.next() {
340
return c.ret()
341
}
342
343
for {
344
p := c.info
345
if t.rewrite != nil {
346
t.rewrite(c)
347
}
348
349
wasMid := p.isMid()
350
// Break out of this loop on failure to ensure we do not modify the
351
// state incorrectly.
352
if p.isCased() {
353
if !c.isMidWord {
354
if !t.title(c) {
355
break
356
}
357
c.isMidWord = true
358
} else if !t.lower(c) {
359
break
360
}
361
} else if !c.copy() {
362
break
363
} else if p.isBreak() {
364
c.isMidWord = false
365
}
366
367
// As we save the state of the transformer, it is safe to call
368
// checkpoint after any successful write.
369
if !(c.isMidWord && wasMid) {
370
c.checkpoint()
371
}
372
373
if !c.next() {
374
break
375
}
376
if wasMid && c.info.isMid() {
377
c.isMidWord = false
378
}
379
}
380
return c.ret()
381
}
382
383
func (t *titleCaser) Span(src []byte, atEOF bool) (n int, err error) {
384
t.context = context{src: src, atEOF: atEOF, isMidWord: t.isMidWord}
385
c := &t.context
386
387
if !c.next() {
388
return c.retSpan()
389
}
390
391
for {
392
p := c.info
393
if t.rewrite != nil {
394
t.rewrite(c)
395
}
396
397
wasMid := p.isMid()
398
// Break out of this loop on failure to ensure we do not modify the
399
// state incorrectly.
400
if p.isCased() {
401
if !c.isMidWord {
402
if !t.titleSpan(c) {
403
break
404
}
405
c.isMidWord = true
406
} else if !isLower(c) {
407
break
408
}
409
} else if p.isBreak() {
410
c.isMidWord = false
411
}
412
// As we save the state of the transformer, it is safe to call
413
// checkpoint after any successful write.
414
if !(c.isMidWord && wasMid) {
415
c.checkpoint()
416
}
417
418
if !c.next() {
419
break
420
}
421
if wasMid && c.info.isMid() {
422
c.isMidWord = false
423
}
424
}
425
return c.retSpan()
426
}
427
428
// finalSigma adds Greek final Sigma handing to another casing function. It
429
// determines whether a lowercased sigma should be σ or ς, by looking ahead for
430
// case-ignorables and a cased letters.
431
func finalSigma(f mapFunc) mapFunc {
432
return func(c *context) bool {
433
if !c.hasPrefix("Σ") {
434
return f(c)
435
}
436
return finalSigmaBody(c)
437
}
438
}
439
440
func finalSigmaBody(c *context) bool {
441
// Current rune must be ∑.
442
443
// ::NFD();
444
// # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
445
// Σ } [:case-ignorable:]* [:cased:] → σ;
446
// [:cased:] [:case-ignorable:]* { Σ → ς;
447
// ::Any-Lower;
448
// ::NFC();
449
450
p := c.pDst
451
c.writeString("ς")
452
453
// TODO: we should do this here, but right now this will never have an
454
// effect as this is called when the prefix is Sigma, whereas Dutch and
455
// Afrikaans only test for an apostrophe.
456
//
457
// if t.rewrite != nil {
458
// t.rewrite(c)
459
// }
460
461
// We need to do one more iteration after maxIgnorable, as a cased
462
// letter is not an ignorable and may modify the result.
463
wasMid := false
464
for i := 0; i < maxIgnorable+1; i++ {
465
if !c.next() {
466
return false
467
}
468
if !c.info.isCaseIgnorable() {
469
// All Midword runes are also case ignorable, so we are
470
// guaranteed to have a letter or word break here. As we are
471
// unreading the run, there is no need to unset c.isMidWord;
472
// the title caser will handle this.
473
if c.info.isCased() {
474
// p+1 is guaranteed to be in bounds: if writing ς was
475
// successful, p+1 will contain the second byte of ς. If not,
476
// this function will have returned after c.next returned false.
477
c.dst[p+1]++ // ς → σ
478
}
479
c.unreadRune()
480
return true
481
}
482
// A case ignorable may also introduce a word break, so we may need
483
// to continue searching even after detecting a break.
484
isMid := c.info.isMid()
485
if (wasMid && isMid) || c.info.isBreak() {
486
c.isMidWord = false
487
}
488
wasMid = isMid
489
c.copy()
490
}
491
return true
492
}
493
494
// finalSigmaSpan would be the same as isLower.
495
496
// elUpper implements Greek upper casing, which entails removing a predefined
497
// set of non-blocked modifiers. Note that these accents should not be removed
498
// for title casing!
499
// Example: "Οδός" -> "ΟΔΟΣ".
500
func elUpper(c *context) bool {
501
// From CLDR:
502
// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Above:]]*? { [\u0313\u0314\u0301\u0300\u0306\u0342\u0308\u0304] → ;
503
// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Iota_Subscript:]]*? { \u0345 → ;
504
505
r, _ := utf8.DecodeRune(c.src[c.pSrc:])
506
oldPDst := c.pDst
507
if !upper(c) {
508
return false
509
}
510
if !unicode.Is(unicode.Greek, r) {
511
return true
512
}
513
i := 0
514
// Take the properties of the uppercased rune that is already written to the
515
// destination. This saves us the trouble of having to uppercase the
516
// decomposed rune again.
517
if b := norm.NFD.Properties(c.dst[oldPDst:]).Decomposition(); b != nil {
518
// Restore the destination position and process the decomposed rune.
519
r, sz := utf8.DecodeRune(b)
520
if r <= 0xFF { // See A.6.1
521
return true
522
}
523
c.pDst = oldPDst
524
// Insert the first rune and ignore the modifiers. See A.6.2.
525
c.writeBytes(b[:sz])
526
i = len(b[sz:]) / 2 // Greek modifiers are always of length 2.
527
}
528
529
for ; i < maxIgnorable && c.next(); i++ {
530
switch r, _ := utf8.DecodeRune(c.src[c.pSrc:]); r {
531
// Above and Iota Subscript
532
case 0x0300, // U+0300 COMBINING GRAVE ACCENT
533
0x0301, // U+0301 COMBINING ACUTE ACCENT
534
0x0304, // U+0304 COMBINING MACRON
535
0x0306, // U+0306 COMBINING BREVE
536
0x0308, // U+0308 COMBINING DIAERESIS
537
0x0313, // U+0313 COMBINING COMMA ABOVE
538
0x0314, // U+0314 COMBINING REVERSED COMMA ABOVE
539
0x0342, // U+0342 COMBINING GREEK PERISPOMENI
540
0x0345: // U+0345 COMBINING GREEK YPOGEGRAMMENI
541
// No-op. Gobble the modifier.
542
543
default:
544
switch v, _ := trie.lookup(c.src[c.pSrc:]); info(v).cccType() {
545
case cccZero:
546
c.unreadRune()
547
return true
548
549
// We don't need to test for IotaSubscript as the only rune that
550
// qualifies (U+0345) was already excluded in the switch statement
551
// above. See A.4.
552
553
case cccAbove:
554
return c.copy()
555
default:
556
// Some other modifier. We're still allowed to gobble Greek
557
// modifiers after this.
558
c.copy()
559
}
560
}
561
}
562
return i == maxIgnorable
563
}
564
565
// TODO: implement elUpperSpan (low-priority: complex and infrequent).
566
567
func ltLower(c *context) bool {
568
// From CLDR:
569
// # Introduce an explicit dot above when lowercasing capital I's and J's
570
// # whenever there are more accents above.
571
// # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
572
// # 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
573
// # 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
574
// # 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
575
// # 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
576
// # 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
577
// # 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
578
// ::NFD();
579
// I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
580
// J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
581
// I \u0328 (Į) } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307;
582
// I \u0300 (Ì) → i \u0307 \u0300;
583
// I \u0301 (Í) → i \u0307 \u0301;
584
// I \u0303 (Ĩ) → i \u0307 \u0303;
585
// ::Any-Lower();
586
// ::NFC();
587
588
i := 0
589
if r := c.src[c.pSrc]; r < utf8.RuneSelf {
590
lower(c)
591
if r != 'I' && r != 'J' {
592
return true
593
}
594
} else {
595
p := norm.NFD.Properties(c.src[c.pSrc:])
596
if d := p.Decomposition(); len(d) >= 3 && (d[0] == 'I' || d[0] == 'J') {
597
// UTF-8 optimization: the decomposition will only have an above
598
// modifier if the last rune of the decomposition is in [U+300-U+311].
599
// In all other cases, a decomposition starting with I is always
600
// an I followed by modifiers that are not cased themselves. See A.2.
601
if d[1] == 0xCC && d[2] <= 0x91 { // A.2.4.
602
if !c.writeBytes(d[:1]) {
603
return false
604
}
605
c.dst[c.pDst-1] += 'a' - 'A' // lower
606
607
// Assumption: modifier never changes on lowercase. See A.1.
608
// Assumption: all modifiers added have CCC = Above. See A.2.3.
609
return c.writeString("\u0307") && c.writeBytes(d[1:])
610
}
611
// In all other cases the additional modifiers will have a CCC
612
// that is less than 230 (Above). We will insert the U+0307, if
613
// needed, after these modifiers so that a string in FCD form
614
// will remain so. See A.2.2.
615
lower(c)
616
i = 1
617
} else {
618
return lower(c)
619
}
620
}
621
622
for ; i < maxIgnorable && c.next(); i++ {
623
switch c.info.cccType() {
624
case cccZero:
625
c.unreadRune()
626
return true
627
case cccAbove:
628
return c.writeString("\u0307") && c.copy() // See A.1.
629
default:
630
c.copy() // See A.1.
631
}
632
}
633
return i == maxIgnorable
634
}
635
636
// ltLowerSpan would be the same as isLower.
637
638
func ltUpper(f mapFunc) mapFunc {
639
return func(c *context) bool {
640
// Unicode:
641
// 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
642
//
643
// From CLDR:
644
// # Remove \u0307 following soft-dotteds (i, j, and the like), with possible
645
// # intervening non-230 marks.
646
// ::NFD();
647
// [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ;
648
// ::Any-Upper();
649
// ::NFC();
650
651
// TODO: See A.5. A soft-dotted rune never has an exception. This would
652
// allow us to overload the exception bit and encode this property in
653
// info. Need to measure performance impact of this.
654
r, _ := utf8.DecodeRune(c.src[c.pSrc:])
655
oldPDst := c.pDst
656
if !f(c) {
657
return false
658
}
659
if !unicode.Is(unicode.Soft_Dotted, r) {
660
return true
661
}
662
663
// We don't need to do an NFD normalization, as a soft-dotted rune never
664
// contains U+0307. See A.3.
665
666
i := 0
667
for ; i < maxIgnorable && c.next(); i++ {
668
switch c.info.cccType() {
669
case cccZero:
670
c.unreadRune()
671
return true
672
case cccAbove:
673
if c.hasPrefix("\u0307") {
674
// We don't do a full NFC, but rather combine runes for
675
// some of the common cases. (Returning NFC or
676
// preserving normal form is neither a requirement nor
677
// a possibility anyway).
678
if !c.next() {
679
return false
680
}
681
if c.dst[oldPDst] == 'I' && c.pDst == oldPDst+1 && c.src[c.pSrc] == 0xcc {
682
s := ""
683
switch c.src[c.pSrc+1] {
684
case 0x80: // U+0300 COMBINING GRAVE ACCENT
685
s = "\u00cc" // U+00CC LATIN CAPITAL LETTER I WITH GRAVE
686
case 0x81: // U+0301 COMBINING ACUTE ACCENT
687
s = "\u00cd" // U+00CD LATIN CAPITAL LETTER I WITH ACUTE
688
case 0x83: // U+0303 COMBINING TILDE
689
s = "\u0128" // U+0128 LATIN CAPITAL LETTER I WITH TILDE
690
case 0x88: // U+0308 COMBINING DIAERESIS
691
s = "\u00cf" // U+00CF LATIN CAPITAL LETTER I WITH DIAERESIS
692
default:
693
}
694
if s != "" {
695
c.pDst = oldPDst
696
return c.writeString(s)
697
}
698
}
699
}
700
return c.copy()
701
default:
702
c.copy()
703
}
704
}
705
return i == maxIgnorable
706
}
707
}
708
709
// TODO: implement ltUpperSpan (low priority: complex and infrequent).
710
711
func aztrUpper(f mapFunc) mapFunc {
712
return func(c *context) bool {
713
// i→İ;
714
if c.src[c.pSrc] == 'i' {
715
return c.writeString("İ")
716
}
717
return f(c)
718
}
719
}
720
721
func aztrLower(c *context) (done bool) {
722
// From CLDR:
723
// # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
724
// # 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
725
// İ→i;
726
// # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
727
// # This matches the behavior of the canonically equivalent I-dot_above
728
// # 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
729
// # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
730
// # 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
731
// I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ;
732
// I→ı ;
733
// ::Any-Lower();
734
if c.hasPrefix("\u0130") { // İ
735
return c.writeString("i")
736
}
737
if c.src[c.pSrc] != 'I' {
738
return lower(c)
739
}
740
741
// We ignore the lower-case I for now, but insert it later when we know
742
// which form we need.
743
start := c.pSrc + c.sz
744
745
i := 0
746
Loop:
747
// We check for up to n ignorables before \u0307. As \u0307 is an
748
// ignorable as well, n is maxIgnorable-1.
749
for ; i < maxIgnorable && c.next(); i++ {
750
switch c.info.cccType() {
751
case cccAbove:
752
if c.hasPrefix("\u0307") {
753
return c.writeString("i") && c.writeBytes(c.src[start:c.pSrc]) // ignore U+0307
754
}
755
done = true
756
break Loop
757
case cccZero:
758
c.unreadRune()
759
done = true
760
break Loop
761
default:
762
// We'll write this rune after we know which starter to use.
763
}
764
}
765
if i == maxIgnorable {
766
done = true
767
}
768
return c.writeString("ı") && c.writeBytes(c.src[start:c.pSrc+c.sz]) && done
769
}
770
771
// aztrLowerSpan would be the same as isLower.
772
773
func nlTitle(c *context) bool {
774
// From CLDR:
775
// # Special titlecasing for Dutch initial "ij".
776
// ::Any-Title();
777
// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
778
// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
779
if c.src[c.pSrc] != 'I' && c.src[c.pSrc] != 'i' {
780
return title(c)
781
}
782
783
if !c.writeString("I") || !c.next() {
784
return false
785
}
786
if c.src[c.pSrc] == 'j' || c.src[c.pSrc] == 'J' {
787
return c.writeString("J")
788
}
789
c.unreadRune()
790
return true
791
}
792
793
func nlTitleSpan(c *context) bool {
794
// From CLDR:
795
// # Special titlecasing for Dutch initial "ij".
796
// ::Any-Title();
797
// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
798
// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
799
if c.src[c.pSrc] != 'I' {
800
return isTitle(c)
801
}
802
if !c.next() || c.src[c.pSrc] == 'j' {
803
return false
804
}
805
if c.src[c.pSrc] != 'J' {
806
c.unreadRune()
807
}
808
return true
809
}
810
811
// Not part of CLDR, but see https://unicode.org/cldr/trac/ticket/7078.
812
func afnlRewrite(c *context) {
813
if c.hasPrefix("'") || c.hasPrefix("’") {
814
c.isMidWord = true
815
}
816
}
817
818