Path: blob/main/vendor/golang.org/x/text/language/parse.go
2880 views
// Copyright 2013 The Go Authors. All rights reserved.1// Use of this source code is governed by a BSD-style2// license that can be found in the LICENSE file.34package language56import (7"errors"8"sort"9"strconv"10"strings"1112"golang.org/x/text/internal/language"13)1415// ValueError is returned by any of the parsing functions when the16// input is well-formed but the respective subtag is not recognized17// as a valid value.18type ValueError interface {19error2021// Subtag returns the subtag for which the error occurred.22Subtag() string23}2425// Parse parses the given BCP 47 string and returns a valid Tag. If parsing26// failed it returns an error and any part of the tag that could be parsed.27// If parsing succeeded but an unknown value was found, it returns28// ValueError. The Tag returned in this case is just stripped of the unknown29// value. All other values are preserved. It accepts tags in the BCP 47 format30// and extensions to this standard defined in31// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.32// The resulting tag is canonicalized using the default canonicalization type.33func Parse(s string) (t Tag, err error) {34return Default.Parse(s)35}3637// Parse parses the given BCP 47 string and returns a valid Tag. If parsing38// failed it returns an error and any part of the tag that could be parsed.39// If parsing succeeded but an unknown value was found, it returns40// ValueError. The Tag returned in this case is just stripped of the unknown41// value. All other values are preserved. It accepts tags in the BCP 47 format42// and extensions to this standard defined in43// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.44// The resulting tag is canonicalized using the canonicalization type c.45func (c CanonType) Parse(s string) (t Tag, err error) {46defer func() {47if recover() != nil {48t = Tag{}49err = language.ErrSyntax50}51}()5253tt, err := language.Parse(s)54if err != nil {55return makeTag(tt), err56}57tt, changed := canonicalize(c, tt)58if changed {59tt.RemakeString()60}61return makeTag(tt), nil62}6364// Compose creates a Tag from individual parts, which may be of type Tag, Base,65// Script, Region, Variant, []Variant, Extension, []Extension or error. If a66// Base, Script or Region or slice of type Variant or Extension is passed more67// than once, the latter will overwrite the former. Variants and Extensions are68// accumulated, but if two extensions of the same type are passed, the latter69// will replace the former. For -u extensions, though, the key-type pairs are70// added, where later values overwrite older ones. A Tag overwrites all former71// values and typically only makes sense as the first argument. The resulting72// tag is returned after canonicalizing using the Default CanonType. If one or73// more errors are encountered, one of the errors is returned.74func Compose(part ...interface{}) (t Tag, err error) {75return Default.Compose(part...)76}7778// Compose creates a Tag from individual parts, which may be of type Tag, Base,79// Script, Region, Variant, []Variant, Extension, []Extension or error. If a80// Base, Script or Region or slice of type Variant or Extension is passed more81// than once, the latter will overwrite the former. Variants and Extensions are82// accumulated, but if two extensions of the same type are passed, the latter83// will replace the former. For -u extensions, though, the key-type pairs are84// added, where later values overwrite older ones. A Tag overwrites all former85// values and typically only makes sense as the first argument. The resulting86// tag is returned after canonicalizing using CanonType c. If one or more errors87// are encountered, one of the errors is returned.88func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {89defer func() {90if recover() != nil {91t = Tag{}92err = language.ErrSyntax93}94}()9596var b language.Builder97if err = update(&b, part...); err != nil {98return und, err99}100b.Tag, _ = canonicalize(c, b.Tag)101return makeTag(b.Make()), err102}103104var errInvalidArgument = errors.New("invalid Extension or Variant")105106func update(b *language.Builder, part ...interface{}) (err error) {107for _, x := range part {108switch v := x.(type) {109case Tag:110b.SetTag(v.tag())111case Base:112b.Tag.LangID = v.langID113case Script:114b.Tag.ScriptID = v.scriptID115case Region:116b.Tag.RegionID = v.regionID117case Variant:118if v.variant == "" {119err = errInvalidArgument120break121}122b.AddVariant(v.variant)123case Extension:124if v.s == "" {125err = errInvalidArgument126break127}128b.SetExt(v.s)129case []Variant:130b.ClearVariants()131for _, v := range v {132b.AddVariant(v.variant)133}134case []Extension:135b.ClearExtensions()136for _, e := range v {137b.SetExt(e.s)138}139// TODO: support parsing of raw strings based on morphology or just extensions?140case error:141if v != nil {142err = v143}144}145}146return147}148149var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")150var errTagListTooLarge = errors.New("tag list exceeds max length")151152// ParseAcceptLanguage parses the contents of an Accept-Language header as153// defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and154// a list of corresponding quality weights. It is more permissive than RFC 2616155// and may return non-nil slices even if the input is not valid.156// The Tags will be sorted by highest weight first and then by first occurrence.157// Tags with a weight of zero will be dropped. An error will be returned if the158// input could not be parsed.159func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {160defer func() {161if recover() != nil {162tag = nil163q = nil164err = language.ErrSyntax165}166}()167168if strings.Count(s, "-") > 1000 {169return nil, nil, errTagListTooLarge170}171172var entry string173for s != "" {174if entry, s = split(s, ','); entry == "" {175continue176}177178entry, weight := split(entry, ';')179180// Scan the language.181t, err := Parse(entry)182if err != nil {183id, ok := acceptFallback[entry]184if !ok {185return nil, nil, err186}187t = makeTag(language.Tag{LangID: id})188}189190// Scan the optional weight.191w := 1.0192if weight != "" {193weight = consume(weight, 'q')194weight = consume(weight, '=')195// consume returns the empty string when a token could not be196// consumed, resulting in an error for ParseFloat.197if w, err = strconv.ParseFloat(weight, 32); err != nil {198return nil, nil, errInvalidWeight199}200// Drop tags with a quality weight of 0.201if w <= 0 {202continue203}204}205206tag = append(tag, t)207q = append(q, float32(w))208}209sort.Stable(&tagSort{tag, q})210return tag, q, nil211}212213// consume removes a leading token c from s and returns the result or the empty214// string if there is no such token.215func consume(s string, c byte) string {216if s == "" || s[0] != c {217return ""218}219return strings.TrimSpace(s[1:])220}221222func split(s string, c byte) (head, tail string) {223if i := strings.IndexByte(s, c); i >= 0 {224return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])225}226return strings.TrimSpace(s), ""227}228229// Add hack mapping to deal with a small number of cases that occur230// in Accept-Language (with reasonable frequency).231var acceptFallback = map[string]language.Language{232"english": _en,233"deutsch": _de,234"italian": _it,235"french": _fr,236"*": _mul, // defined in the spec to match all languages.237}238239type tagSort struct {240tag []Tag241q []float32242}243244func (s *tagSort) Len() int {245return len(s.q)246}247248func (s *tagSort) Less(i, j int) bool {249return s.q[i] > s.q[j]250}251252func (s *tagSort) Swap(i, j int) {253s.tag[i], s.tag[j] = s.tag[j], s.tag[i]254s.q[i], s.q[j] = s.q[j], s.q[i]255}256257258