Path: blob/main/vendor/golang.org/x/text/encoding/japanese/iso2022jp.go
2893 views
// Copyright 2013 The Go Authors. All rights reserved.1// Use of this source code is governed by a BSD-style2// license that can be found in the LICENSE file.34package japanese56import (7"unicode/utf8"89"golang.org/x/text/encoding"10"golang.org/x/text/encoding/internal"11"golang.org/x/text/encoding/internal/identifier"12"golang.org/x/text/transform"13)1415// ISO2022JP is the ISO-2022-JP encoding.16var ISO2022JP encoding.Encoding = &iso2022JP1718var iso2022JP = internal.Encoding{19internal.FuncEncoding{iso2022JPNewDecoder, iso2022JPNewEncoder},20"ISO-2022-JP",21identifier.ISO2022JP,22}2324func iso2022JPNewDecoder() transform.Transformer {25return new(iso2022JPDecoder)26}2728func iso2022JPNewEncoder() transform.Transformer {29return new(iso2022JPEncoder)30}3132const (33asciiState = iota34katakanaState35jis0208State36jis0212State37)3839const asciiEsc = 0x1b4041type iso2022JPDecoder int4243func (d *iso2022JPDecoder) Reset() {44*d = asciiState45}4647func (d *iso2022JPDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {48r, size := rune(0), 049for ; nSrc < len(src); nSrc += size {50c0 := src[nSrc]51if c0 >= utf8.RuneSelf {52r, size = '\ufffd', 153goto write54}5556if c0 == asciiEsc {57if nSrc+2 >= len(src) {58if !atEOF {59return nDst, nSrc, transform.ErrShortSrc60}61// TODO: is it correct to only skip 1??62r, size = '\ufffd', 163goto write64}65size = 366c1 := src[nSrc+1]67c2 := src[nSrc+2]68switch {69case c1 == '$' && (c2 == '@' || c2 == 'B'): // 0x24 {0x40, 0x42}70*d = jis0208State71continue72case c1 == '$' && c2 == '(': // 0x24 0x2873if nSrc+3 >= len(src) {74if !atEOF {75return nDst, nSrc, transform.ErrShortSrc76}77r, size = '\ufffd', 178goto write79}80size = 481if src[nSrc+3] == 'D' {82*d = jis0212State83continue84}85case c1 == '(' && (c2 == 'B' || c2 == 'J'): // 0x28 {0x42, 0x4A}86*d = asciiState87continue88case c1 == '(' && c2 == 'I': // 0x28 0x4989*d = katakanaState90continue91}92r, size = '\ufffd', 193goto write94}9596switch *d {97case asciiState:98r, size = rune(c0), 199100case katakanaState:101if c0 < 0x21 || 0x60 <= c0 {102r, size = '\ufffd', 1103goto write104}105r, size = rune(c0)+(0xff61-0x21), 1106107default:108if c0 == 0x0a {109*d = asciiState110r, size = rune(c0), 1111goto write112}113if nSrc+1 >= len(src) {114if !atEOF {115return nDst, nSrc, transform.ErrShortSrc116}117r, size = '\ufffd', 1118goto write119}120size = 2121c1 := src[nSrc+1]122i := int(c0-0x21)*94 + int(c1-0x21)123if *d == jis0208State && i < len(jis0208Decode) {124r = rune(jis0208Decode[i])125} else if *d == jis0212State && i < len(jis0212Decode) {126r = rune(jis0212Decode[i])127} else {128r = '\ufffd'129goto write130}131if r == 0 {132r = '\ufffd'133}134}135136write:137if nDst+utf8.RuneLen(r) > len(dst) {138return nDst, nSrc, transform.ErrShortDst139}140nDst += utf8.EncodeRune(dst[nDst:], r)141}142return nDst, nSrc, err143}144145type iso2022JPEncoder int146147func (e *iso2022JPEncoder) Reset() {148*e = asciiState149}150151func (e *iso2022JPEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {152r, size := rune(0), 0153for ; nSrc < len(src); nSrc += size {154r = rune(src[nSrc])155156// Decode a 1-byte rune.157if r < utf8.RuneSelf {158size = 1159160} else {161// Decode a multi-byte rune.162r, size = utf8.DecodeRune(src[nSrc:])163if size == 1 {164// All valid runes of size 1 (those below utf8.RuneSelf) were165// handled above. We have invalid UTF-8 or we haven't seen the166// full character yet.167if !atEOF && !utf8.FullRune(src[nSrc:]) {168err = transform.ErrShortSrc169break170}171}172173// func init checks that the switch covers all tables.174//175// http://encoding.spec.whatwg.org/#iso-2022-jp says that "the index jis0212176// is not used by the iso-2022-jp encoder due to lack of widespread support".177//178// TODO: do we have to special-case U+00A5 and U+203E, as per179// http://encoding.spec.whatwg.org/#iso-2022-jp180// Doing so would mean that "\u00a5" would not be preserved181// after an encode-decode round trip.182switch {183case encode0Low <= r && r < encode0High:184if r = rune(encode0[r-encode0Low]); r>>tableShift == jis0208 {185goto writeJIS186}187case encode1Low <= r && r < encode1High:188if r = rune(encode1[r-encode1Low]); r>>tableShift == jis0208 {189goto writeJIS190}191case encode2Low <= r && r < encode2High:192if r = rune(encode2[r-encode2Low]); r>>tableShift == jis0208 {193goto writeJIS194}195case encode3Low <= r && r < encode3High:196if r = rune(encode3[r-encode3Low]); r>>tableShift == jis0208 {197goto writeJIS198}199case encode4Low <= r && r < encode4High:200if r = rune(encode4[r-encode4Low]); r>>tableShift == jis0208 {201goto writeJIS202}203case encode5Low <= r && r < encode5High:204if 0xff61 <= r && r < 0xffa0 {205goto writeKatakana206}207if r = rune(encode5[r-encode5Low]); r>>tableShift == jis0208 {208goto writeJIS209}210}211212// Switch back to ASCII state in case of error so that an ASCII213// replacement character can be written in the correct state.214if *e != asciiState {215if nDst+3 > len(dst) {216err = transform.ErrShortDst217break218}219*e = asciiState220dst[nDst+0] = asciiEsc221dst[nDst+1] = '('222dst[nDst+2] = 'B'223nDst += 3224}225err = internal.ErrASCIIReplacement226break227}228229if *e != asciiState {230if nDst+4 > len(dst) {231err = transform.ErrShortDst232break233}234*e = asciiState235dst[nDst+0] = asciiEsc236dst[nDst+1] = '('237dst[nDst+2] = 'B'238nDst += 3239} else if nDst >= len(dst) {240err = transform.ErrShortDst241break242}243dst[nDst] = uint8(r)244nDst++245continue246247writeJIS:248if *e != jis0208State {249if nDst+5 > len(dst) {250err = transform.ErrShortDst251break252}253*e = jis0208State254dst[nDst+0] = asciiEsc255dst[nDst+1] = '$'256dst[nDst+2] = 'B'257nDst += 3258} else if nDst+2 > len(dst) {259err = transform.ErrShortDst260break261}262dst[nDst+0] = 0x21 + uint8(r>>codeShift)&codeMask263dst[nDst+1] = 0x21 + uint8(r)&codeMask264nDst += 2265continue266267writeKatakana:268if *e != katakanaState {269if nDst+4 > len(dst) {270err = transform.ErrShortDst271break272}273*e = katakanaState274dst[nDst+0] = asciiEsc275dst[nDst+1] = '('276dst[nDst+2] = 'I'277nDst += 3278} else if nDst >= len(dst) {279err = transform.ErrShortDst280break281}282dst[nDst] = uint8(r - (0xff61 - 0x21))283nDst++284continue285}286if atEOF && err == nil && *e != asciiState {287if nDst+3 > len(dst) {288err = transform.ErrShortDst289} else {290*e = asciiState291dst[nDst+0] = asciiEsc292dst[nDst+1] = '('293dst[nDst+2] = 'B'294nDst += 3295}296}297return nDst, nSrc, err298}299300301