Path: blob/main/vendor/golang.org/x/text/encoding/simplifiedchinese/hzgb2312.go
2893 views
// Copyright 2013 The Go Authors. All rights reserved.1// Use of this source code is governed by a BSD-style2// license that can be found in the LICENSE file.34package simplifiedchinese56import (7"unicode/utf8"89"golang.org/x/text/encoding"10"golang.org/x/text/encoding/internal"11"golang.org/x/text/encoding/internal/identifier"12"golang.org/x/text/transform"13)1415// HZGB2312 is the HZ-GB2312 encoding.16var HZGB2312 encoding.Encoding = &hzGB23121718var hzGB2312 = internal.Encoding{19internal.FuncEncoding{hzGB2312NewDecoder, hzGB2312NewEncoder},20"HZ-GB2312",21identifier.HZGB2312,22}2324func hzGB2312NewDecoder() transform.Transformer {25return new(hzGB2312Decoder)26}2728func hzGB2312NewEncoder() transform.Transformer {29return new(hzGB2312Encoder)30}3132const (33asciiState = iota34gbState35)3637type hzGB2312Decoder int3839func (d *hzGB2312Decoder) Reset() {40*d = asciiState41}4243func (d *hzGB2312Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {44r, size := rune(0), 045loop:46for ; nSrc < len(src); nSrc += size {47c0 := src[nSrc]48if c0 >= utf8.RuneSelf {49r, size = utf8.RuneError, 150goto write51}5253if c0 == '~' {54if nSrc+1 >= len(src) {55if !atEOF {56err = transform.ErrShortSrc57break loop58}59r, size = utf8.RuneError, 160goto write61}62size = 263switch src[nSrc+1] {64case '{':65*d = gbState66continue67case '}':68*d = asciiState69continue70case '~':71if nDst >= len(dst) {72err = transform.ErrShortDst73break loop74}75dst[nDst] = '~'76nDst++77continue78case '\n':79continue80default:81r = utf8.RuneError82goto write83}84}8586if *d == asciiState {87r, size = rune(c0), 188} else {89if nSrc+1 >= len(src) {90if !atEOF {91err = transform.ErrShortSrc92break loop93}94r, size = utf8.RuneError, 195goto write96}97size = 298c1 := src[nSrc+1]99if c0 < 0x21 || 0x7e <= c0 || c1 < 0x21 || 0x7f <= c1 {100// error101} else if i := int(c0-0x01)*190 + int(c1+0x3f); i < len(decode) {102r = rune(decode[i])103if r != 0 {104goto write105}106}107if c1 > utf8.RuneSelf {108// Be consistent and always treat non-ASCII as a single error.109size = 1110}111r = utf8.RuneError112}113114write:115if nDst+utf8.RuneLen(r) > len(dst) {116err = transform.ErrShortDst117break loop118}119nDst += utf8.EncodeRune(dst[nDst:], r)120}121return nDst, nSrc, err122}123124type hzGB2312Encoder int125126func (d *hzGB2312Encoder) Reset() {127*d = asciiState128}129130func (e *hzGB2312Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {131r, size := rune(0), 0132for ; nSrc < len(src); nSrc += size {133r = rune(src[nSrc])134135// Decode a 1-byte rune.136if r < utf8.RuneSelf {137size = 1138if r == '~' {139if nDst+2 > len(dst) {140err = transform.ErrShortDst141break142}143dst[nDst+0] = '~'144dst[nDst+1] = '~'145nDst += 2146continue147} else if *e != asciiState {148if nDst+3 > len(dst) {149err = transform.ErrShortDst150break151}152*e = asciiState153dst[nDst+0] = '~'154dst[nDst+1] = '}'155nDst += 2156} else if nDst >= len(dst) {157err = transform.ErrShortDst158break159}160dst[nDst] = uint8(r)161nDst += 1162continue163164}165166// Decode a multi-byte rune.167r, size = utf8.DecodeRune(src[nSrc:])168if size == 1 {169// All valid runes of size 1 (those below utf8.RuneSelf) were170// handled above. We have invalid UTF-8 or we haven't seen the171// full character yet.172if !atEOF && !utf8.FullRune(src[nSrc:]) {173err = transform.ErrShortSrc174break175}176}177178// func init checks that the switch covers all tables.179switch {180case encode0Low <= r && r < encode0High:181if r = rune(encode0[r-encode0Low]); r != 0 {182goto writeGB183}184case encode1Low <= r && r < encode1High:185if r = rune(encode1[r-encode1Low]); r != 0 {186goto writeGB187}188case encode2Low <= r && r < encode2High:189if r = rune(encode2[r-encode2Low]); r != 0 {190goto writeGB191}192case encode3Low <= r && r < encode3High:193if r = rune(encode3[r-encode3Low]); r != 0 {194goto writeGB195}196case encode4Low <= r && r < encode4High:197if r = rune(encode4[r-encode4Low]); r != 0 {198goto writeGB199}200}201202terminateInASCIIState:203// Switch back to ASCII state in case of error so that an ASCII204// replacement character can be written in the correct state.205if *e != asciiState {206if nDst+2 > len(dst) {207err = transform.ErrShortDst208break209}210dst[nDst+0] = '~'211dst[nDst+1] = '}'212nDst += 2213}214err = internal.ErrASCIIReplacement215break216217writeGB:218c0 := uint8(r>>8) - 0x80219c1 := uint8(r) - 0x80220if c0 < 0x21 || 0x7e <= c0 || c1 < 0x21 || 0x7f <= c1 {221goto terminateInASCIIState222}223if *e == asciiState {224if nDst+4 > len(dst) {225err = transform.ErrShortDst226break227}228*e = gbState229dst[nDst+0] = '~'230dst[nDst+1] = '{'231nDst += 2232} else if nDst+2 > len(dst) {233err = transform.ErrShortDst234break235}236dst[nDst+0] = c0237dst[nDst+1] = c1238nDst += 2239continue240}241// TODO: should one always terminate in ASCII state to make it safe to242// concatenate two HZ-GB2312-encoded strings?243return nDst, nSrc, err244}245246247