Path: blob/main/vendor/golang.org/x/text/encoding/unicode/override.go
2893 views
// Copyright 2015 The Go Authors. All rights reserved.1// Use of this source code is governed by a BSD-style2// license that can be found in the LICENSE file.34package unicode56import (7"golang.org/x/text/transform"8)910// BOMOverride returns a new decoder transformer that is identical to fallback,11// except that the presence of a Byte Order Mark at the start of the input12// causes it to switch to the corresponding Unicode decoding. It will only13// consider BOMs for UTF-8, UTF-16BE, and UTF-16LE.14//15// This differs from using ExpectBOM by allowing a BOM to switch to UTF-8, not16// just UTF-16 variants, and allowing falling back to any encoding scheme.17//18// This technique is recommended by the W3C for use in HTML 5: "For19// compatibility with deployed content, the byte order mark (also known as BOM)20// is considered more authoritative than anything else."21// http://www.w3.org/TR/encoding/#specification-hooks22//23// Using BOMOverride is mostly intended for use cases where the first characters24// of a fallback encoding are known to not be a BOM, for example, for valid HTML25// and most encodings.26func BOMOverride(fallback transform.Transformer) transform.Transformer {27// TODO: possibly allow a variadic argument of unicode encodings to allow28// specifying details of which fallbacks are supported as well as29// specifying the details of the implementations. This would also allow for30// support for UTF-32, which should not be supported by default.31return &bomOverride{fallback: fallback}32}3334type bomOverride struct {35fallback transform.Transformer36current transform.Transformer37}3839func (d *bomOverride) Reset() {40d.current = nil41d.fallback.Reset()42}4344var (45// TODO: we could use decode functions here, instead of allocating a new46// decoder on every NewDecoder as IgnoreBOM decoders can be stateless.47utf16le = UTF16(LittleEndian, IgnoreBOM)48utf16be = UTF16(BigEndian, IgnoreBOM)49)5051const utf8BOM = "\ufeff"5253func (d *bomOverride) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {54if d.current != nil {55return d.current.Transform(dst, src, atEOF)56}57if len(src) < 3 && !atEOF {58return 0, 0, transform.ErrShortSrc59}60d.current = d.fallback61bomSize := 062if len(src) >= 2 {63if src[0] == 0xFF && src[1] == 0xFE {64d.current = utf16le.NewDecoder()65bomSize = 266} else if src[0] == 0xFE && src[1] == 0xFF {67d.current = utf16be.NewDecoder()68bomSize = 269} else if len(src) >= 3 &&70src[0] == utf8BOM[0] &&71src[1] == utf8BOM[1] &&72src[2] == utf8BOM[2] {73d.current = transform.Nop74bomSize = 375}76}77if bomSize < len(src) {78nDst, nSrc, err = d.current.Transform(dst, src[bomSize:], atEOF)79}80return nDst, nSrc + bomSize, err81}828384