Path: blob/main/vendor/golang.org/x/text/cases/context.go
2880 views
// Copyright 2014 The Go Authors. All rights reserved.1// Use of this source code is governed by a BSD-style2// license that can be found in the LICENSE file.34package cases56import "golang.org/x/text/transform"78// A context is used for iterating over source bytes, fetching case info and9// writing to a destination buffer.10//11// Casing operations may need more than one rune of context to decide how a rune12// should be cased. Casing implementations should call checkpoint on context13// whenever it is known to be safe to return the runes processed so far.14//15// It is recommended for implementations to not allow for more than 30 case16// ignorables as lookahead (analogous to the limit in norm) and to use state if17// unbounded lookahead is needed for cased runes.18type context struct {19dst, src []byte20atEOF bool2122pDst int // pDst points past the last written rune in dst.23pSrc int // pSrc points to the start of the currently scanned rune.2425// checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc.26nDst, nSrc int27err error2829sz int // size of current rune30info info // case information of currently scanned rune3132// State preserved across calls to Transform.33isMidWord bool // false if next cased letter needs to be title-cased.34}3536func (c *context) Reset() {37c.isMidWord = false38}3940// ret returns the return values for the Transform method. It checks whether41// there were insufficient bytes in src to complete and introduces an error42// accordingly, if necessary.43func (c *context) ret() (nDst, nSrc int, err error) {44if c.err != nil || c.nSrc == len(c.src) {45return c.nDst, c.nSrc, c.err46}47// This point is only reached by mappers if there was no short destination48// buffer. This means that the source buffer was exhausted and that c.sz was49// set to 0 by next.50if c.atEOF && c.pSrc == len(c.src) {51return c.pDst, c.pSrc, nil52}53return c.nDst, c.nSrc, transform.ErrShortSrc54}5556// retSpan returns the return values for the Span method. It checks whether57// there were insufficient bytes in src to complete and introduces an error58// accordingly, if necessary.59func (c *context) retSpan() (n int, err error) {60_, nSrc, err := c.ret()61return nSrc, err62}6364// checkpoint sets the return value buffer points for Transform to the current65// positions.66func (c *context) checkpoint() {67if c.err == nil {68c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz69}70}7172// unreadRune causes the last rune read by next to be reread on the next73// invocation of next. Only one unreadRune may be called after a call to next.74func (c *context) unreadRune() {75c.sz = 076}7778func (c *context) next() bool {79c.pSrc += c.sz80if c.pSrc == len(c.src) || c.err != nil {81c.info, c.sz = 0, 082return false83}84v, sz := trie.lookup(c.src[c.pSrc:])85c.info, c.sz = info(v), sz86if c.sz == 0 {87if c.atEOF {88// A zero size means we have an incomplete rune. If we are atEOF,89// this means it is an illegal rune, which we will consume one90// byte at a time.91c.sz = 192} else {93c.err = transform.ErrShortSrc94return false95}96}97return true98}99100// writeBytes adds bytes to dst.101func (c *context) writeBytes(b []byte) bool {102if len(c.dst)-c.pDst < len(b) {103c.err = transform.ErrShortDst104return false105}106// This loop is faster than using copy.107for _, ch := range b {108c.dst[c.pDst] = ch109c.pDst++110}111return true112}113114// writeString writes the given string to dst.115func (c *context) writeString(s string) bool {116if len(c.dst)-c.pDst < len(s) {117c.err = transform.ErrShortDst118return false119}120// This loop is faster than using copy.121for i := 0; i < len(s); i++ {122c.dst[c.pDst] = s[i]123c.pDst++124}125return true126}127128// copy writes the current rune to dst.129func (c *context) copy() bool {130return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz])131}132133// copyXOR copies the current rune to dst and modifies it by applying the XOR134// pattern of the case info. It is the responsibility of the caller to ensure135// that this is a rune with a XOR pattern defined.136func (c *context) copyXOR() bool {137if !c.copy() {138return false139}140if c.info&xorIndexBit == 0 {141// Fast path for 6-bit XOR pattern, which covers most cases.142c.dst[c.pDst-1] ^= byte(c.info >> xorShift)143} else {144// Interpret XOR bits as an index.145// TODO: test performance for unrolling this loop. Verify that we have146// at least two bytes and at most three.147idx := c.info >> xorShift148for p := c.pDst - 1; ; p-- {149c.dst[p] ^= xorData[idx]150idx--151if xorData[idx] == 0 {152break153}154}155}156return true157}158159// hasPrefix returns true if src[pSrc:] starts with the given string.160func (c *context) hasPrefix(s string) bool {161b := c.src[c.pSrc:]162if len(b) < len(s) {163return false164}165for i, c := range b[:len(s)] {166if c != s[i] {167return false168}169}170return true171}172173// caseType returns an info with only the case bits, normalized to either174// cLower, cUpper, cTitle or cUncased.175func (c *context) caseType() info {176cm := c.info & 0x7177if cm < 4 {178return cm179}180if cm >= cXORCase {181// xor the last bit of the rune with the case type bits.182b := c.src[c.pSrc+c.sz-1]183return info(b&1) ^ cm&0x3184}185if cm == cIgnorableCased {186return cLower187}188return cUncased189}190191// lower writes the lowercase version of the current rune to dst.192func lower(c *context) bool {193ct := c.caseType()194if c.info&hasMappingMask == 0 || ct == cLower {195return c.copy()196}197if c.info&exceptionBit == 0 {198return c.copyXOR()199}200e := exceptions[c.info>>exceptionShift:]201offset := 2 + e[0]&lengthMask // size of header + fold string202if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {203return c.writeString(e[offset : offset+nLower])204}205return c.copy()206}207208func isLower(c *context) bool {209ct := c.caseType()210if c.info&hasMappingMask == 0 || ct == cLower {211return true212}213if c.info&exceptionBit == 0 {214c.err = transform.ErrEndOfSpan215return false216}217e := exceptions[c.info>>exceptionShift:]218if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {219c.err = transform.ErrEndOfSpan220return false221}222return true223}224225// upper writes the uppercase version of the current rune to dst.226func upper(c *context) bool {227ct := c.caseType()228if c.info&hasMappingMask == 0 || ct == cUpper {229return c.copy()230}231if c.info&exceptionBit == 0 {232return c.copyXOR()233}234e := exceptions[c.info>>exceptionShift:]235offset := 2 + e[0]&lengthMask // size of header + fold string236// Get length of first special case mapping.237n := (e[1] >> lengthBits) & lengthMask238if ct == cTitle {239// The first special case mapping is for lower. Set n to the second.240if n == noChange {241n = 0242}243n, e = e[1]&lengthMask, e[n:]244}245if n != noChange {246return c.writeString(e[offset : offset+n])247}248return c.copy()249}250251// isUpper writes the isUppercase version of the current rune to dst.252func isUpper(c *context) bool {253ct := c.caseType()254if c.info&hasMappingMask == 0 || ct == cUpper {255return true256}257if c.info&exceptionBit == 0 {258c.err = transform.ErrEndOfSpan259return false260}261e := exceptions[c.info>>exceptionShift:]262// Get length of first special case mapping.263n := (e[1] >> lengthBits) & lengthMask264if ct == cTitle {265n = e[1] & lengthMask266}267if n != noChange {268c.err = transform.ErrEndOfSpan269return false270}271return true272}273274// title writes the title case version of the current rune to dst.275func title(c *context) bool {276ct := c.caseType()277if c.info&hasMappingMask == 0 || ct == cTitle {278return c.copy()279}280if c.info&exceptionBit == 0 {281if ct == cLower {282return c.copyXOR()283}284return c.copy()285}286// Get the exception data.287e := exceptions[c.info>>exceptionShift:]288offset := 2 + e[0]&lengthMask // size of header + fold string289290nFirst := (e[1] >> lengthBits) & lengthMask291if nTitle := e[1] & lengthMask; nTitle != noChange {292if nFirst != noChange {293e = e[nFirst:]294}295return c.writeString(e[offset : offset+nTitle])296}297if ct == cLower && nFirst != noChange {298// Use the uppercase version instead.299return c.writeString(e[offset : offset+nFirst])300}301// Already in correct case.302return c.copy()303}304305// isTitle reports whether the current rune is in title case.306func isTitle(c *context) bool {307ct := c.caseType()308if c.info&hasMappingMask == 0 || ct == cTitle {309return true310}311if c.info&exceptionBit == 0 {312if ct == cLower {313c.err = transform.ErrEndOfSpan314return false315}316return true317}318// Get the exception data.319e := exceptions[c.info>>exceptionShift:]320if nTitle := e[1] & lengthMask; nTitle != noChange {321c.err = transform.ErrEndOfSpan322return false323}324nFirst := (e[1] >> lengthBits) & lengthMask325if ct == cLower && nFirst != noChange {326c.err = transform.ErrEndOfSpan327return false328}329return true330}331332// foldFull writes the foldFull version of the current rune to dst.333func foldFull(c *context) bool {334if c.info&hasMappingMask == 0 {335return c.copy()336}337ct := c.caseType()338if c.info&exceptionBit == 0 {339if ct != cLower || c.info&inverseFoldBit != 0 {340return c.copyXOR()341}342return c.copy()343}344e := exceptions[c.info>>exceptionShift:]345n := e[0] & lengthMask346if n == 0 {347if ct == cLower {348return c.copy()349}350n = (e[1] >> lengthBits) & lengthMask351}352return c.writeString(e[2 : 2+n])353}354355// isFoldFull reports whether the current run is mapped to foldFull356func isFoldFull(c *context) bool {357if c.info&hasMappingMask == 0 {358return true359}360ct := c.caseType()361if c.info&exceptionBit == 0 {362if ct != cLower || c.info&inverseFoldBit != 0 {363c.err = transform.ErrEndOfSpan364return false365}366return true367}368e := exceptions[c.info>>exceptionShift:]369n := e[0] & lengthMask370if n == 0 && ct == cLower {371return true372}373c.err = transform.ErrEndOfSpan374return false375}376377378