Path: blob/main/vendor/golang.org/x/text/cases/map.go
2880 views
// Copyright 2014 The Go Authors. All rights reserved.1// Use of this source code is governed by a BSD-style2// license that can be found in the LICENSE file.34package cases56// This file contains the definitions of case mappings for all supported7// languages. The rules for the language-specific tailorings were taken and8// modified from the CLDR transform definitions in common/transforms.910import (11"strings"12"unicode"13"unicode/utf8"1415"golang.org/x/text/internal"16"golang.org/x/text/language"17"golang.org/x/text/transform"18"golang.org/x/text/unicode/norm"19)2021// A mapFunc takes a context set to the current rune and writes the mapped22// version to the same context. It may advance the context to the next rune. It23// returns whether a checkpoint is possible: whether the pDst bytes written to24// dst so far won't need changing as we see more source bytes.25type mapFunc func(*context) bool2627// A spanFunc takes a context set to the current rune and returns whether this28// rune would be altered when written to the output. It may advance the context29// to the next rune. It returns whether a checkpoint is possible.30type spanFunc func(*context) bool3132// maxIgnorable defines the maximum number of ignorables to consider for33// lookahead operations.34const maxIgnorable = 303536// supported lists the language tags for which we have tailorings.37const supported = "und af az el lt nl tr"3839func init() {40tags := []language.Tag{}41for _, s := range strings.Split(supported, " ") {42tags = append(tags, language.MustParse(s))43}44matcher = internal.NewInheritanceMatcher(tags)45Supported = language.NewCoverage(tags)46}4748var (49matcher *internal.InheritanceMatcher5051Supported language.Coverage5253// We keep the following lists separate, instead of having a single per-54// language struct, to give the compiler a chance to remove unused code.5556// Some uppercase mappers are stateless, so we can precompute the57// Transformers and save a bit on runtime allocations.58upperFunc = []struct {59upper mapFunc60span spanFunc61}{62{nil, nil}, // und63{nil, nil}, // af64{aztrUpper(upper), isUpper}, // az65{elUpper, noSpan}, // el66{ltUpper(upper), noSpan}, // lt67{nil, nil}, // nl68{aztrUpper(upper), isUpper}, // tr69}7071undUpper transform.SpanningTransformer = &undUpperCaser{}72undLower transform.SpanningTransformer = &undLowerCaser{}73undLowerIgnoreSigma transform.SpanningTransformer = &undLowerIgnoreSigmaCaser{}7475lowerFunc = []mapFunc{76nil, // und77nil, // af78aztrLower, // az79nil, // el80ltLower, // lt81nil, // nl82aztrLower, // tr83}8485titleInfos = []struct {86title mapFunc87lower mapFunc88titleSpan spanFunc89rewrite func(*context)90}{91{title, lower, isTitle, nil}, // und92{title, lower, isTitle, afnlRewrite}, // af93{aztrUpper(title), aztrLower, isTitle, nil}, // az94{title, lower, isTitle, nil}, // el95{ltUpper(title), ltLower, noSpan, nil}, // lt96{nlTitle, lower, nlTitleSpan, afnlRewrite}, // nl97{aztrUpper(title), aztrLower, isTitle, nil}, // tr98}99)100101func makeUpper(t language.Tag, o options) transform.SpanningTransformer {102_, i, _ := matcher.Match(t)103f := upperFunc[i].upper104if f == nil {105return undUpper106}107return &simpleCaser{f: f, span: upperFunc[i].span}108}109110func makeLower(t language.Tag, o options) transform.SpanningTransformer {111_, i, _ := matcher.Match(t)112f := lowerFunc[i]113if f == nil {114if o.ignoreFinalSigma {115return undLowerIgnoreSigma116}117return undLower118}119if o.ignoreFinalSigma {120return &simpleCaser{f: f, span: isLower}121}122return &lowerCaser{123first: f,124midWord: finalSigma(f),125}126}127128func makeTitle(t language.Tag, o options) transform.SpanningTransformer {129_, i, _ := matcher.Match(t)130x := &titleInfos[i]131lower := x.lower132if o.noLower {133lower = (*context).copy134} else if !o.ignoreFinalSigma {135lower = finalSigma(lower)136}137return &titleCaser{138title: x.title,139lower: lower,140titleSpan: x.titleSpan,141rewrite: x.rewrite,142}143}144145func noSpan(c *context) bool {146c.err = transform.ErrEndOfSpan147return false148}149150// TODO: consider a similar special case for the fast majority lower case. This151// is a bit more involved so will require some more precise benchmarking to152// justify it.153154type undUpperCaser struct{ transform.NopResetter }155156// undUpperCaser implements the Transformer interface for doing an upper case157// mapping for the root locale (und). It eliminates the need for an allocation158// as it prevents escaping by not using function pointers.159func (t undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {160c := context{dst: dst, src: src, atEOF: atEOF}161for c.next() {162upper(&c)163c.checkpoint()164}165return c.ret()166}167168func (t undUpperCaser) Span(src []byte, atEOF bool) (n int, err error) {169c := context{src: src, atEOF: atEOF}170for c.next() && isUpper(&c) {171c.checkpoint()172}173return c.retSpan()174}175176// undLowerIgnoreSigmaCaser implements the Transformer interface for doing177// a lower case mapping for the root locale (und) ignoring final sigma178// handling. This casing algorithm is used in some performance-critical packages179// like secure/precis and x/net/http/idna, which warrants its special-casing.180type undLowerIgnoreSigmaCaser struct{ transform.NopResetter }181182func (t undLowerIgnoreSigmaCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {183c := context{dst: dst, src: src, atEOF: atEOF}184for c.next() && lower(&c) {185c.checkpoint()186}187return c.ret()188189}190191// Span implements a generic lower-casing. This is possible as isLower works192// for all lowercasing variants. All lowercase variants only vary in how they193// transform a non-lowercase letter. They will never change an already lowercase194// letter. In addition, there is no state.195func (t undLowerIgnoreSigmaCaser) Span(src []byte, atEOF bool) (n int, err error) {196c := context{src: src, atEOF: atEOF}197for c.next() && isLower(&c) {198c.checkpoint()199}200return c.retSpan()201}202203type simpleCaser struct {204context205f mapFunc206span spanFunc207}208209// simpleCaser implements the Transformer interface for doing a case operation210// on a rune-by-rune basis.211func (t *simpleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {212c := context{dst: dst, src: src, atEOF: atEOF}213for c.next() && t.f(&c) {214c.checkpoint()215}216return c.ret()217}218219func (t *simpleCaser) Span(src []byte, atEOF bool) (n int, err error) {220c := context{src: src, atEOF: atEOF}221for c.next() && t.span(&c) {222c.checkpoint()223}224return c.retSpan()225}226227// undLowerCaser implements the Transformer interface for doing a lower case228// mapping for the root locale (und) ignoring final sigma handling. This casing229// algorithm is used in some performance-critical packages like secure/precis230// and x/net/http/idna, which warrants its special-casing.231type undLowerCaser struct{ transform.NopResetter }232233func (t undLowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {234c := context{dst: dst, src: src, atEOF: atEOF}235236for isInterWord := true; c.next(); {237if isInterWord {238if c.info.isCased() {239if !lower(&c) {240break241}242isInterWord = false243} else if !c.copy() {244break245}246} else {247if c.info.isNotCasedAndNotCaseIgnorable() {248if !c.copy() {249break250}251isInterWord = true252} else if !c.hasPrefix("Σ") {253if !lower(&c) {254break255}256} else if !finalSigmaBody(&c) {257break258}259}260c.checkpoint()261}262return c.ret()263}264265func (t undLowerCaser) Span(src []byte, atEOF bool) (n int, err error) {266c := context{src: src, atEOF: atEOF}267for c.next() && isLower(&c) {268c.checkpoint()269}270return c.retSpan()271}272273// lowerCaser implements the Transformer interface. The default Unicode lower274// casing requires different treatment for the first and subsequent characters275// of a word, most notably to handle the Greek final Sigma.276type lowerCaser struct {277undLowerIgnoreSigmaCaser278279context280281first, midWord mapFunc282}283284func (t *lowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {285t.context = context{dst: dst, src: src, atEOF: atEOF}286c := &t.context287288for isInterWord := true; c.next(); {289if isInterWord {290if c.info.isCased() {291if !t.first(c) {292break293}294isInterWord = false295} else if !c.copy() {296break297}298} else {299if c.info.isNotCasedAndNotCaseIgnorable() {300if !c.copy() {301break302}303isInterWord = true304} else if !t.midWord(c) {305break306}307}308c.checkpoint()309}310return c.ret()311}312313// titleCaser implements the Transformer interface. Title casing algorithms314// distinguish between the first letter of a word and subsequent letters of the315// same word. It uses state to avoid requiring a potentially infinite lookahead.316type titleCaser struct {317context318319// rune mappings used by the actual casing algorithms.320title mapFunc321lower mapFunc322titleSpan spanFunc323324rewrite func(*context)325}326327// Transform implements the standard Unicode title case algorithm as defined in328// Chapter 3 of The Unicode Standard:329// toTitlecase(X): Find the word boundaries in X according to Unicode Standard330// Annex #29, "Unicode Text Segmentation." For each word boundary, find the331// first cased character F following the word boundary. If F exists, map F to332// Titlecase_Mapping(F); then map all characters C between F and the following333// word boundary to Lowercase_Mapping(C).334func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {335t.context = context{dst: dst, src: src, atEOF: atEOF, isMidWord: t.isMidWord}336c := &t.context337338if !c.next() {339return c.ret()340}341342for {343p := c.info344if t.rewrite != nil {345t.rewrite(c)346}347348wasMid := p.isMid()349// Break out of this loop on failure to ensure we do not modify the350// state incorrectly.351if p.isCased() {352if !c.isMidWord {353if !t.title(c) {354break355}356c.isMidWord = true357} else if !t.lower(c) {358break359}360} else if !c.copy() {361break362} else if p.isBreak() {363c.isMidWord = false364}365366// As we save the state of the transformer, it is safe to call367// checkpoint after any successful write.368if !(c.isMidWord && wasMid) {369c.checkpoint()370}371372if !c.next() {373break374}375if wasMid && c.info.isMid() {376c.isMidWord = false377}378}379return c.ret()380}381382func (t *titleCaser) Span(src []byte, atEOF bool) (n int, err error) {383t.context = context{src: src, atEOF: atEOF, isMidWord: t.isMidWord}384c := &t.context385386if !c.next() {387return c.retSpan()388}389390for {391p := c.info392if t.rewrite != nil {393t.rewrite(c)394}395396wasMid := p.isMid()397// Break out of this loop on failure to ensure we do not modify the398// state incorrectly.399if p.isCased() {400if !c.isMidWord {401if !t.titleSpan(c) {402break403}404c.isMidWord = true405} else if !isLower(c) {406break407}408} else if p.isBreak() {409c.isMidWord = false410}411// As we save the state of the transformer, it is safe to call412// checkpoint after any successful write.413if !(c.isMidWord && wasMid) {414c.checkpoint()415}416417if !c.next() {418break419}420if wasMid && c.info.isMid() {421c.isMidWord = false422}423}424return c.retSpan()425}426427// finalSigma adds Greek final Sigma handing to another casing function. It428// determines whether a lowercased sigma should be σ or ς, by looking ahead for429// case-ignorables and a cased letters.430func finalSigma(f mapFunc) mapFunc {431return func(c *context) bool {432if !c.hasPrefix("Σ") {433return f(c)434}435return finalSigmaBody(c)436}437}438439func finalSigmaBody(c *context) bool {440// Current rune must be ∑.441442// ::NFD();443// # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA444// Σ } [:case-ignorable:]* [:cased:] → σ;445// [:cased:] [:case-ignorable:]* { Σ → ς;446// ::Any-Lower;447// ::NFC();448449p := c.pDst450c.writeString("ς")451452// TODO: we should do this here, but right now this will never have an453// effect as this is called when the prefix is Sigma, whereas Dutch and454// Afrikaans only test for an apostrophe.455//456// if t.rewrite != nil {457// t.rewrite(c)458// }459460// We need to do one more iteration after maxIgnorable, as a cased461// letter is not an ignorable and may modify the result.462wasMid := false463for i := 0; i < maxIgnorable+1; i++ {464if !c.next() {465return false466}467if !c.info.isCaseIgnorable() {468// All Midword runes are also case ignorable, so we are469// guaranteed to have a letter or word break here. As we are470// unreading the run, there is no need to unset c.isMidWord;471// the title caser will handle this.472if c.info.isCased() {473// p+1 is guaranteed to be in bounds: if writing ς was474// successful, p+1 will contain the second byte of ς. If not,475// this function will have returned after c.next returned false.476c.dst[p+1]++ // ς → σ477}478c.unreadRune()479return true480}481// A case ignorable may also introduce a word break, so we may need482// to continue searching even after detecting a break.483isMid := c.info.isMid()484if (wasMid && isMid) || c.info.isBreak() {485c.isMidWord = false486}487wasMid = isMid488c.copy()489}490return true491}492493// finalSigmaSpan would be the same as isLower.494495// elUpper implements Greek upper casing, which entails removing a predefined496// set of non-blocked modifiers. Note that these accents should not be removed497// for title casing!498// Example: "Οδός" -> "ΟΔΟΣ".499func elUpper(c *context) bool {500// From CLDR:501// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Above:]]*? { [\u0313\u0314\u0301\u0300\u0306\u0342\u0308\u0304] → ;502// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Iota_Subscript:]]*? { \u0345 → ;503504r, _ := utf8.DecodeRune(c.src[c.pSrc:])505oldPDst := c.pDst506if !upper(c) {507return false508}509if !unicode.Is(unicode.Greek, r) {510return true511}512i := 0513// Take the properties of the uppercased rune that is already written to the514// destination. This saves us the trouble of having to uppercase the515// decomposed rune again.516if b := norm.NFD.Properties(c.dst[oldPDst:]).Decomposition(); b != nil {517// Restore the destination position and process the decomposed rune.518r, sz := utf8.DecodeRune(b)519if r <= 0xFF { // See A.6.1520return true521}522c.pDst = oldPDst523// Insert the first rune and ignore the modifiers. See A.6.2.524c.writeBytes(b[:sz])525i = len(b[sz:]) / 2 // Greek modifiers are always of length 2.526}527528for ; i < maxIgnorable && c.next(); i++ {529switch r, _ := utf8.DecodeRune(c.src[c.pSrc:]); r {530// Above and Iota Subscript531case 0x0300, // U+0300 COMBINING GRAVE ACCENT5320x0301, // U+0301 COMBINING ACUTE ACCENT5330x0304, // U+0304 COMBINING MACRON5340x0306, // U+0306 COMBINING BREVE5350x0308, // U+0308 COMBINING DIAERESIS5360x0313, // U+0313 COMBINING COMMA ABOVE5370x0314, // U+0314 COMBINING REVERSED COMMA ABOVE5380x0342, // U+0342 COMBINING GREEK PERISPOMENI5390x0345: // U+0345 COMBINING GREEK YPOGEGRAMMENI540// No-op. Gobble the modifier.541542default:543switch v, _ := trie.lookup(c.src[c.pSrc:]); info(v).cccType() {544case cccZero:545c.unreadRune()546return true547548// We don't need to test for IotaSubscript as the only rune that549// qualifies (U+0345) was already excluded in the switch statement550// above. See A.4.551552case cccAbove:553return c.copy()554default:555// Some other modifier. We're still allowed to gobble Greek556// modifiers after this.557c.copy()558}559}560}561return i == maxIgnorable562}563564// TODO: implement elUpperSpan (low-priority: complex and infrequent).565566func ltLower(c *context) bool {567// From CLDR:568// # Introduce an explicit dot above when lowercasing capital I's and J's569// # whenever there are more accents above.570// # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)571// # 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I572// # 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J573// # 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK574// # 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE575// # 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE576// # 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE577// ::NFD();578// I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;579// J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;580// I \u0328 (Į) } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307;581// I \u0300 (Ì) → i \u0307 \u0300;582// I \u0301 (Í) → i \u0307 \u0301;583// I \u0303 (Ĩ) → i \u0307 \u0303;584// ::Any-Lower();585// ::NFC();586587i := 0588if r := c.src[c.pSrc]; r < utf8.RuneSelf {589lower(c)590if r != 'I' && r != 'J' {591return true592}593} else {594p := norm.NFD.Properties(c.src[c.pSrc:])595if d := p.Decomposition(); len(d) >= 3 && (d[0] == 'I' || d[0] == 'J') {596// UTF-8 optimization: the decomposition will only have an above597// modifier if the last rune of the decomposition is in [U+300-U+311].598// In all other cases, a decomposition starting with I is always599// an I followed by modifiers that are not cased themselves. See A.2.600if d[1] == 0xCC && d[2] <= 0x91 { // A.2.4.601if !c.writeBytes(d[:1]) {602return false603}604c.dst[c.pDst-1] += 'a' - 'A' // lower605606// Assumption: modifier never changes on lowercase. See A.1.607// Assumption: all modifiers added have CCC = Above. See A.2.3.608return c.writeString("\u0307") && c.writeBytes(d[1:])609}610// In all other cases the additional modifiers will have a CCC611// that is less than 230 (Above). We will insert the U+0307, if612// needed, after these modifiers so that a string in FCD form613// will remain so. See A.2.2.614lower(c)615i = 1616} else {617return lower(c)618}619}620621for ; i < maxIgnorable && c.next(); i++ {622switch c.info.cccType() {623case cccZero:624c.unreadRune()625return true626case cccAbove:627return c.writeString("\u0307") && c.copy() // See A.1.628default:629c.copy() // See A.1.630}631}632return i == maxIgnorable633}634635// ltLowerSpan would be the same as isLower.636637func ltUpper(f mapFunc) mapFunc {638return func(c *context) bool {639// Unicode:640// 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE641//642// From CLDR:643// # Remove \u0307 following soft-dotteds (i, j, and the like), with possible644// # intervening non-230 marks.645// ::NFD();646// [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ;647// ::Any-Upper();648// ::NFC();649650// TODO: See A.5. A soft-dotted rune never has an exception. This would651// allow us to overload the exception bit and encode this property in652// info. Need to measure performance impact of this.653r, _ := utf8.DecodeRune(c.src[c.pSrc:])654oldPDst := c.pDst655if !f(c) {656return false657}658if !unicode.Is(unicode.Soft_Dotted, r) {659return true660}661662// We don't need to do an NFD normalization, as a soft-dotted rune never663// contains U+0307. See A.3.664665i := 0666for ; i < maxIgnorable && c.next(); i++ {667switch c.info.cccType() {668case cccZero:669c.unreadRune()670return true671case cccAbove:672if c.hasPrefix("\u0307") {673// We don't do a full NFC, but rather combine runes for674// some of the common cases. (Returning NFC or675// preserving normal form is neither a requirement nor676// a possibility anyway).677if !c.next() {678return false679}680if c.dst[oldPDst] == 'I' && c.pDst == oldPDst+1 && c.src[c.pSrc] == 0xcc {681s := ""682switch c.src[c.pSrc+1] {683case 0x80: // U+0300 COMBINING GRAVE ACCENT684s = "\u00cc" // U+00CC LATIN CAPITAL LETTER I WITH GRAVE685case 0x81: // U+0301 COMBINING ACUTE ACCENT686s = "\u00cd" // U+00CD LATIN CAPITAL LETTER I WITH ACUTE687case 0x83: // U+0303 COMBINING TILDE688s = "\u0128" // U+0128 LATIN CAPITAL LETTER I WITH TILDE689case 0x88: // U+0308 COMBINING DIAERESIS690s = "\u00cf" // U+00CF LATIN CAPITAL LETTER I WITH DIAERESIS691default:692}693if s != "" {694c.pDst = oldPDst695return c.writeString(s)696}697}698}699return c.copy()700default:701c.copy()702}703}704return i == maxIgnorable705}706}707708// TODO: implement ltUpperSpan (low priority: complex and infrequent).709710func aztrUpper(f mapFunc) mapFunc {711return func(c *context) bool {712// i→İ;713if c.src[c.pSrc] == 'i' {714return c.writeString("İ")715}716return f(c)717}718}719720func aztrLower(c *context) (done bool) {721// From CLDR:722// # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri723// # 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE724// İ→i;725// # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.726// # This matches the behavior of the canonically equivalent I-dot_above727// # 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE728// # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.729// # 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I730// I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ;731// I→ı ;732// ::Any-Lower();733if c.hasPrefix("\u0130") { // İ734return c.writeString("i")735}736if c.src[c.pSrc] != 'I' {737return lower(c)738}739740// We ignore the lower-case I for now, but insert it later when we know741// which form we need.742start := c.pSrc + c.sz743744i := 0745Loop:746// We check for up to n ignorables before \u0307. As \u0307 is an747// ignorable as well, n is maxIgnorable-1.748for ; i < maxIgnorable && c.next(); i++ {749switch c.info.cccType() {750case cccAbove:751if c.hasPrefix("\u0307") {752return c.writeString("i") && c.writeBytes(c.src[start:c.pSrc]) // ignore U+0307753}754done = true755break Loop756case cccZero:757c.unreadRune()758done = true759break Loop760default:761// We'll write this rune after we know which starter to use.762}763}764if i == maxIgnorable {765done = true766}767return c.writeString("ı") && c.writeBytes(c.src[start:c.pSrc+c.sz]) && done768}769770// aztrLowerSpan would be the same as isLower.771772func nlTitle(c *context) bool {773// From CLDR:774// # Special titlecasing for Dutch initial "ij".775// ::Any-Title();776// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)777// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;778if c.src[c.pSrc] != 'I' && c.src[c.pSrc] != 'i' {779return title(c)780}781782if !c.writeString("I") || !c.next() {783return false784}785if c.src[c.pSrc] == 'j' || c.src[c.pSrc] == 'J' {786return c.writeString("J")787}788c.unreadRune()789return true790}791792func nlTitleSpan(c *context) bool {793// From CLDR:794// # Special titlecasing for Dutch initial "ij".795// ::Any-Title();796// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)797// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;798if c.src[c.pSrc] != 'I' {799return isTitle(c)800}801if !c.next() || c.src[c.pSrc] == 'j' {802return false803}804if c.src[c.pSrc] != 'J' {805c.unreadRune()806}807return true808}809810// Not part of CLDR, but see https://unicode.org/cldr/trac/ticket/7078.811func afnlRewrite(c *context) {812if c.hasPrefix("'") || c.hasPrefix("’") {813c.isMidWord = true814}815}816817818