Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
kardolus
GitHub Repository: kardolus/chatgpt-cli
Path: blob/main/vendor/golang.org/x/text/encoding/japanese/iso2022jp.go
2893 views
1
// Copyright 2013 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
package japanese
6
7
import (
8
"unicode/utf8"
9
10
"golang.org/x/text/encoding"
11
"golang.org/x/text/encoding/internal"
12
"golang.org/x/text/encoding/internal/identifier"
13
"golang.org/x/text/transform"
14
)
15
16
// ISO2022JP is the ISO-2022-JP encoding.
17
var ISO2022JP encoding.Encoding = &iso2022JP
18
19
var iso2022JP = internal.Encoding{
20
internal.FuncEncoding{iso2022JPNewDecoder, iso2022JPNewEncoder},
21
"ISO-2022-JP",
22
identifier.ISO2022JP,
23
}
24
25
func iso2022JPNewDecoder() transform.Transformer {
26
return new(iso2022JPDecoder)
27
}
28
29
func iso2022JPNewEncoder() transform.Transformer {
30
return new(iso2022JPEncoder)
31
}
32
33
const (
34
asciiState = iota
35
katakanaState
36
jis0208State
37
jis0212State
38
)
39
40
const asciiEsc = 0x1b
41
42
type iso2022JPDecoder int
43
44
func (d *iso2022JPDecoder) Reset() {
45
*d = asciiState
46
}
47
48
func (d *iso2022JPDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
49
r, size := rune(0), 0
50
for ; nSrc < len(src); nSrc += size {
51
c0 := src[nSrc]
52
if c0 >= utf8.RuneSelf {
53
r, size = '\ufffd', 1
54
goto write
55
}
56
57
if c0 == asciiEsc {
58
if nSrc+2 >= len(src) {
59
if !atEOF {
60
return nDst, nSrc, transform.ErrShortSrc
61
}
62
// TODO: is it correct to only skip 1??
63
r, size = '\ufffd', 1
64
goto write
65
}
66
size = 3
67
c1 := src[nSrc+1]
68
c2 := src[nSrc+2]
69
switch {
70
case c1 == '$' && (c2 == '@' || c2 == 'B'): // 0x24 {0x40, 0x42}
71
*d = jis0208State
72
continue
73
case c1 == '$' && c2 == '(': // 0x24 0x28
74
if nSrc+3 >= len(src) {
75
if !atEOF {
76
return nDst, nSrc, transform.ErrShortSrc
77
}
78
r, size = '\ufffd', 1
79
goto write
80
}
81
size = 4
82
if src[nSrc+3] == 'D' {
83
*d = jis0212State
84
continue
85
}
86
case c1 == '(' && (c2 == 'B' || c2 == 'J'): // 0x28 {0x42, 0x4A}
87
*d = asciiState
88
continue
89
case c1 == '(' && c2 == 'I': // 0x28 0x49
90
*d = katakanaState
91
continue
92
}
93
r, size = '\ufffd', 1
94
goto write
95
}
96
97
switch *d {
98
case asciiState:
99
r, size = rune(c0), 1
100
101
case katakanaState:
102
if c0 < 0x21 || 0x60 <= c0 {
103
r, size = '\ufffd', 1
104
goto write
105
}
106
r, size = rune(c0)+(0xff61-0x21), 1
107
108
default:
109
if c0 == 0x0a {
110
*d = asciiState
111
r, size = rune(c0), 1
112
goto write
113
}
114
if nSrc+1 >= len(src) {
115
if !atEOF {
116
return nDst, nSrc, transform.ErrShortSrc
117
}
118
r, size = '\ufffd', 1
119
goto write
120
}
121
size = 2
122
c1 := src[nSrc+1]
123
i := int(c0-0x21)*94 + int(c1-0x21)
124
if *d == jis0208State && i < len(jis0208Decode) {
125
r = rune(jis0208Decode[i])
126
} else if *d == jis0212State && i < len(jis0212Decode) {
127
r = rune(jis0212Decode[i])
128
} else {
129
r = '\ufffd'
130
goto write
131
}
132
if r == 0 {
133
r = '\ufffd'
134
}
135
}
136
137
write:
138
if nDst+utf8.RuneLen(r) > len(dst) {
139
return nDst, nSrc, transform.ErrShortDst
140
}
141
nDst += utf8.EncodeRune(dst[nDst:], r)
142
}
143
return nDst, nSrc, err
144
}
145
146
type iso2022JPEncoder int
147
148
func (e *iso2022JPEncoder) Reset() {
149
*e = asciiState
150
}
151
152
func (e *iso2022JPEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
153
r, size := rune(0), 0
154
for ; nSrc < len(src); nSrc += size {
155
r = rune(src[nSrc])
156
157
// Decode a 1-byte rune.
158
if r < utf8.RuneSelf {
159
size = 1
160
161
} else {
162
// Decode a multi-byte rune.
163
r, size = utf8.DecodeRune(src[nSrc:])
164
if size == 1 {
165
// All valid runes of size 1 (those below utf8.RuneSelf) were
166
// handled above. We have invalid UTF-8 or we haven't seen the
167
// full character yet.
168
if !atEOF && !utf8.FullRune(src[nSrc:]) {
169
err = transform.ErrShortSrc
170
break
171
}
172
}
173
174
// func init checks that the switch covers all tables.
175
//
176
// http://encoding.spec.whatwg.org/#iso-2022-jp says that "the index jis0212
177
// is not used by the iso-2022-jp encoder due to lack of widespread support".
178
//
179
// TODO: do we have to special-case U+00A5 and U+203E, as per
180
// http://encoding.spec.whatwg.org/#iso-2022-jp
181
// Doing so would mean that "\u00a5" would not be preserved
182
// after an encode-decode round trip.
183
switch {
184
case encode0Low <= r && r < encode0High:
185
if r = rune(encode0[r-encode0Low]); r>>tableShift == jis0208 {
186
goto writeJIS
187
}
188
case encode1Low <= r && r < encode1High:
189
if r = rune(encode1[r-encode1Low]); r>>tableShift == jis0208 {
190
goto writeJIS
191
}
192
case encode2Low <= r && r < encode2High:
193
if r = rune(encode2[r-encode2Low]); r>>tableShift == jis0208 {
194
goto writeJIS
195
}
196
case encode3Low <= r && r < encode3High:
197
if r = rune(encode3[r-encode3Low]); r>>tableShift == jis0208 {
198
goto writeJIS
199
}
200
case encode4Low <= r && r < encode4High:
201
if r = rune(encode4[r-encode4Low]); r>>tableShift == jis0208 {
202
goto writeJIS
203
}
204
case encode5Low <= r && r < encode5High:
205
if 0xff61 <= r && r < 0xffa0 {
206
goto writeKatakana
207
}
208
if r = rune(encode5[r-encode5Low]); r>>tableShift == jis0208 {
209
goto writeJIS
210
}
211
}
212
213
// Switch back to ASCII state in case of error so that an ASCII
214
// replacement character can be written in the correct state.
215
if *e != asciiState {
216
if nDst+3 > len(dst) {
217
err = transform.ErrShortDst
218
break
219
}
220
*e = asciiState
221
dst[nDst+0] = asciiEsc
222
dst[nDst+1] = '('
223
dst[nDst+2] = 'B'
224
nDst += 3
225
}
226
err = internal.ErrASCIIReplacement
227
break
228
}
229
230
if *e != asciiState {
231
if nDst+4 > len(dst) {
232
err = transform.ErrShortDst
233
break
234
}
235
*e = asciiState
236
dst[nDst+0] = asciiEsc
237
dst[nDst+1] = '('
238
dst[nDst+2] = 'B'
239
nDst += 3
240
} else if nDst >= len(dst) {
241
err = transform.ErrShortDst
242
break
243
}
244
dst[nDst] = uint8(r)
245
nDst++
246
continue
247
248
writeJIS:
249
if *e != jis0208State {
250
if nDst+5 > len(dst) {
251
err = transform.ErrShortDst
252
break
253
}
254
*e = jis0208State
255
dst[nDst+0] = asciiEsc
256
dst[nDst+1] = '$'
257
dst[nDst+2] = 'B'
258
nDst += 3
259
} else if nDst+2 > len(dst) {
260
err = transform.ErrShortDst
261
break
262
}
263
dst[nDst+0] = 0x21 + uint8(r>>codeShift)&codeMask
264
dst[nDst+1] = 0x21 + uint8(r)&codeMask
265
nDst += 2
266
continue
267
268
writeKatakana:
269
if *e != katakanaState {
270
if nDst+4 > len(dst) {
271
err = transform.ErrShortDst
272
break
273
}
274
*e = katakanaState
275
dst[nDst+0] = asciiEsc
276
dst[nDst+1] = '('
277
dst[nDst+2] = 'I'
278
nDst += 3
279
} else if nDst >= len(dst) {
280
err = transform.ErrShortDst
281
break
282
}
283
dst[nDst] = uint8(r - (0xff61 - 0x21))
284
nDst++
285
continue
286
}
287
if atEOF && err == nil && *e != asciiState {
288
if nDst+3 > len(dst) {
289
err = transform.ErrShortDst
290
} else {
291
*e = asciiState
292
dst[nDst+0] = asciiEsc
293
dst[nDst+1] = '('
294
dst[nDst+2] = 'B'
295
nDst += 3
296
}
297
}
298
return nDst, nSrc, err
299
}
300
301