Path: blob/main/vendor/golang.org/x/text/cases/trieval.go
2880 views
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.12package cases34// This file contains definitions for interpreting the trie value of the case5// trie generated by "go run gen*.go". It is shared by both the generator6// program and the resultant package. Sharing is achieved by the generator7// copying gen_trieval.go to trieval.go and changing what's above this comment.89// info holds case information for a single rune. It is the value returned10// by a trie lookup. Most mapping information can be stored in a single 16-bit11// value. If not, for example when a rune is mapped to multiple runes, the value12// stores some basic case data and an index into an array with additional data.13//14// The per-rune values have the following format:15//16// if (exception) {17// 15..4 unsigned exception index18// } else {19// 15..8 XOR pattern or index to XOR pattern for case mapping20// Only 13..8 are used for XOR patterns.21// 7 inverseFold (fold to upper, not to lower)22// 6 index: interpret the XOR pattern as an index23// or isMid if case mode is cIgnorableUncased.24// 5..4 CCC: zero (normal or break), above or other25// }26// 3 exception: interpret this value as an exception index27// (TODO: is this bit necessary? Probably implied from case mode.)28// 2..0 case mode29//30// For the non-exceptional cases, a rune must be either uncased, lowercase or31// uppercase. If the rune is cased, the XOR pattern maps either a lowercase32// rune to uppercase or an uppercase rune to lowercase (applied to the 1033// least-significant bits of the rune).34//35// See the definitions below for a more detailed description of the various36// bits.37type info uint163839const (40casedMask = 0x000341fullCasedMask = 0x000742ignorableMask = 0x000643ignorableValue = 0x00044445inverseFoldBit = 1 << 746isMidBit = 1 << 64748exceptionBit = 1 << 349exceptionShift = 450numExceptionBits = 125152xorIndexBit = 1 << 653xorShift = 85455// There is no mapping if all xor bits and the exception bit are zero.56hasMappingMask = 0xff80 | exceptionBit57)5859// The case mode bits encodes the case type of a rune. This includes uncased,60// title, upper and lower case and case ignorable. (For a definition of these61// terms see Chapter 3 of The Unicode Standard Core Specification.) In some rare62// cases, a rune can be both cased and case-ignorable. This is encoded by63// cIgnorableCased. A rune of this type is always lower case. Some runes are64// cased while not having a mapping.65//66// A common pattern for scripts in the Unicode standard is for upper and lower67// case runes to alternate for increasing rune values (e.g. the accented Latin68// ranges starting from U+0100 and U+1E00 among others and some Cyrillic69// characters). We use this property by defining a cXORCase mode, where the case70// mode (always upper or lower case) is derived from the rune value. As the XOR71// pattern for case mappings is often identical for successive runes, using72// cXORCase can result in large series of identical trie values. This, in turn,73// allows us to better compress the trie blocks.74const (75cUncased info = iota // 00076cTitle // 00177cLower // 01078cUpper // 01179cIgnorableUncased // 10080cIgnorableCased // 101 // lower case if mappings exist81cXORCase // 11x // case is cLower | ((rune&1) ^ x)8283maxCaseMode = cUpper84)8586func (c info) isCased() bool {87return c&casedMask != 088}8990func (c info) isCaseIgnorable() bool {91return c&ignorableMask == ignorableValue92}9394func (c info) isNotCasedAndNotCaseIgnorable() bool {95return c&fullCasedMask == 096}9798func (c info) isCaseIgnorableAndNotCased() bool {99return c&fullCasedMask == cIgnorableUncased100}101102func (c info) isMid() bool {103return c&(fullCasedMask|isMidBit) == isMidBit|cIgnorableUncased104}105106// The case mapping implementation will need to know about various Canonical107// Combining Class (CCC) values. We encode two of these in the trie value:108// cccZero (0) and cccAbove (230). If the value is cccOther, it means that109// CCC(r) > 0, but not 230. A value of cccBreak means that CCC(r) == 0 and that110// the rune also has the break category Break (see below).111const (112cccBreak info = iota << 4113cccZero114cccAbove115cccOther116117cccMask = cccBreak | cccZero | cccAbove | cccOther118)119120const (121starter = 0122above = 230123iotaSubscript = 240124)125126// The exceptions slice holds data that does not fit in a normal info entry.127// The entry is pointed to by the exception index in an entry. It has the128// following format:129//130// Header:131//132// byte 0:133// 7..6 unused134// 5..4 CCC type (same bits as entry)135// 3 unused136// 2..0 length of fold137//138// byte 1:139// 7..6 unused140// 5..3 length of 1st mapping of case type141// 2..0 length of 2nd mapping of case type142//143// case 1st 2nd144// lower -> upper, title145// upper -> lower, title146// title -> lower, upper147//148// Lengths with the value 0x7 indicate no value and implies no change.149// A length of 0 indicates a mapping to zero-length string.150//151// Body bytes:152//153// case folding bytes154// lowercase mapping bytes155// uppercase mapping bytes156// titlecase mapping bytes157// closure mapping bytes (for NFKC_Casefold). (TODO)158//159// Fallbacks:160//161// missing fold -> lower162// missing title -> upper163// all missing -> original rune164//165// exceptions starts with a dummy byte to enforce that there is no zero index166// value.167const (168lengthMask = 0x07169lengthBits = 3170noChange = 0171)172173// References to generated trie.174175var trie = newCaseTrie(0)176177var sparse = sparseBlocks{178values: sparseValues[:],179offsets: sparseOffsets[:],180}181182// Sparse block lookup code.183184// valueRange is an entry in a sparse block.185type valueRange struct {186value uint16187lo, hi byte188}189190type sparseBlocks struct {191values []valueRange192offsets []uint16193}194195// lookup returns the value from values block n for byte b using binary search.196func (s *sparseBlocks) lookup(n uint32, b byte) uint16 {197lo := s.offsets[n]198hi := s.offsets[n+1]199for lo < hi {200m := lo + (hi-lo)/2201r := s.values[m]202if r.lo <= b && b <= r.hi {203return r.value204}205if b < r.lo {206hi = m207} else {208lo = m + 1209}210}211return 0212}213214// lastRuneForTesting is the last rune used for testing. Everything after this215// is boring.216const lastRuneForTesting = rune(0x1FFFF)217218219