Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
kardolus
GitHub Repository: kardolus/chatgpt-cli
Path: blob/main/vendor/golang.org/x/text/language/parse.go
2880 views
1
// Copyright 2013 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
package language
6
7
import (
8
"errors"
9
"sort"
10
"strconv"
11
"strings"
12
13
"golang.org/x/text/internal/language"
14
)
15
16
// ValueError is returned by any of the parsing functions when the
17
// input is well-formed but the respective subtag is not recognized
18
// as a valid value.
19
type ValueError interface {
20
error
21
22
// Subtag returns the subtag for which the error occurred.
23
Subtag() string
24
}
25
26
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
27
// failed it returns an error and any part of the tag that could be parsed.
28
// If parsing succeeded but an unknown value was found, it returns
29
// ValueError. The Tag returned in this case is just stripped of the unknown
30
// value. All other values are preserved. It accepts tags in the BCP 47 format
31
// and extensions to this standard defined in
32
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
33
// The resulting tag is canonicalized using the default canonicalization type.
34
func Parse(s string) (t Tag, err error) {
35
return Default.Parse(s)
36
}
37
38
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
39
// failed it returns an error and any part of the tag that could be parsed.
40
// If parsing succeeded but an unknown value was found, it returns
41
// ValueError. The Tag returned in this case is just stripped of the unknown
42
// value. All other values are preserved. It accepts tags in the BCP 47 format
43
// and extensions to this standard defined in
44
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
45
// The resulting tag is canonicalized using the canonicalization type c.
46
func (c CanonType) Parse(s string) (t Tag, err error) {
47
defer func() {
48
if recover() != nil {
49
t = Tag{}
50
err = language.ErrSyntax
51
}
52
}()
53
54
tt, err := language.Parse(s)
55
if err != nil {
56
return makeTag(tt), err
57
}
58
tt, changed := canonicalize(c, tt)
59
if changed {
60
tt.RemakeString()
61
}
62
return makeTag(tt), nil
63
}
64
65
// Compose creates a Tag from individual parts, which may be of type Tag, Base,
66
// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
67
// Base, Script or Region or slice of type Variant or Extension is passed more
68
// than once, the latter will overwrite the former. Variants and Extensions are
69
// accumulated, but if two extensions of the same type are passed, the latter
70
// will replace the former. For -u extensions, though, the key-type pairs are
71
// added, where later values overwrite older ones. A Tag overwrites all former
72
// values and typically only makes sense as the first argument. The resulting
73
// tag is returned after canonicalizing using the Default CanonType. If one or
74
// more errors are encountered, one of the errors is returned.
75
func Compose(part ...interface{}) (t Tag, err error) {
76
return Default.Compose(part...)
77
}
78
79
// Compose creates a Tag from individual parts, which may be of type Tag, Base,
80
// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
81
// Base, Script or Region or slice of type Variant or Extension is passed more
82
// than once, the latter will overwrite the former. Variants and Extensions are
83
// accumulated, but if two extensions of the same type are passed, the latter
84
// will replace the former. For -u extensions, though, the key-type pairs are
85
// added, where later values overwrite older ones. A Tag overwrites all former
86
// values and typically only makes sense as the first argument. The resulting
87
// tag is returned after canonicalizing using CanonType c. If one or more errors
88
// are encountered, one of the errors is returned.
89
func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
90
defer func() {
91
if recover() != nil {
92
t = Tag{}
93
err = language.ErrSyntax
94
}
95
}()
96
97
var b language.Builder
98
if err = update(&b, part...); err != nil {
99
return und, err
100
}
101
b.Tag, _ = canonicalize(c, b.Tag)
102
return makeTag(b.Make()), err
103
}
104
105
var errInvalidArgument = errors.New("invalid Extension or Variant")
106
107
func update(b *language.Builder, part ...interface{}) (err error) {
108
for _, x := range part {
109
switch v := x.(type) {
110
case Tag:
111
b.SetTag(v.tag())
112
case Base:
113
b.Tag.LangID = v.langID
114
case Script:
115
b.Tag.ScriptID = v.scriptID
116
case Region:
117
b.Tag.RegionID = v.regionID
118
case Variant:
119
if v.variant == "" {
120
err = errInvalidArgument
121
break
122
}
123
b.AddVariant(v.variant)
124
case Extension:
125
if v.s == "" {
126
err = errInvalidArgument
127
break
128
}
129
b.SetExt(v.s)
130
case []Variant:
131
b.ClearVariants()
132
for _, v := range v {
133
b.AddVariant(v.variant)
134
}
135
case []Extension:
136
b.ClearExtensions()
137
for _, e := range v {
138
b.SetExt(e.s)
139
}
140
// TODO: support parsing of raw strings based on morphology or just extensions?
141
case error:
142
if v != nil {
143
err = v
144
}
145
}
146
}
147
return
148
}
149
150
var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
151
var errTagListTooLarge = errors.New("tag list exceeds max length")
152
153
// ParseAcceptLanguage parses the contents of an Accept-Language header as
154
// defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
155
// a list of corresponding quality weights. It is more permissive than RFC 2616
156
// and may return non-nil slices even if the input is not valid.
157
// The Tags will be sorted by highest weight first and then by first occurrence.
158
// Tags with a weight of zero will be dropped. An error will be returned if the
159
// input could not be parsed.
160
func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
161
defer func() {
162
if recover() != nil {
163
tag = nil
164
q = nil
165
err = language.ErrSyntax
166
}
167
}()
168
169
if strings.Count(s, "-") > 1000 {
170
return nil, nil, errTagListTooLarge
171
}
172
173
var entry string
174
for s != "" {
175
if entry, s = split(s, ','); entry == "" {
176
continue
177
}
178
179
entry, weight := split(entry, ';')
180
181
// Scan the language.
182
t, err := Parse(entry)
183
if err != nil {
184
id, ok := acceptFallback[entry]
185
if !ok {
186
return nil, nil, err
187
}
188
t = makeTag(language.Tag{LangID: id})
189
}
190
191
// Scan the optional weight.
192
w := 1.0
193
if weight != "" {
194
weight = consume(weight, 'q')
195
weight = consume(weight, '=')
196
// consume returns the empty string when a token could not be
197
// consumed, resulting in an error for ParseFloat.
198
if w, err = strconv.ParseFloat(weight, 32); err != nil {
199
return nil, nil, errInvalidWeight
200
}
201
// Drop tags with a quality weight of 0.
202
if w <= 0 {
203
continue
204
}
205
}
206
207
tag = append(tag, t)
208
q = append(q, float32(w))
209
}
210
sort.Stable(&tagSort{tag, q})
211
return tag, q, nil
212
}
213
214
// consume removes a leading token c from s and returns the result or the empty
215
// string if there is no such token.
216
func consume(s string, c byte) string {
217
if s == "" || s[0] != c {
218
return ""
219
}
220
return strings.TrimSpace(s[1:])
221
}
222
223
func split(s string, c byte) (head, tail string) {
224
if i := strings.IndexByte(s, c); i >= 0 {
225
return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])
226
}
227
return strings.TrimSpace(s), ""
228
}
229
230
// Add hack mapping to deal with a small number of cases that occur
231
// in Accept-Language (with reasonable frequency).
232
var acceptFallback = map[string]language.Language{
233
"english": _en,
234
"deutsch": _de,
235
"italian": _it,
236
"french": _fr,
237
"*": _mul, // defined in the spec to match all languages.
238
}
239
240
type tagSort struct {
241
tag []Tag
242
q []float32
243
}
244
245
func (s *tagSort) Len() int {
246
return len(s.q)
247
}
248
249
func (s *tagSort) Less(i, j int) bool {
250
return s.q[i] > s.q[j]
251
}
252
253
func (s *tagSort) Swap(i, j int) {
254
s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
255
s.q[i], s.q[j] = s.q[j], s.q[i]
256
}
257
258