Path: blob/main/vendor/github.com/google/go-cmp/cmp/report_slices.go
2880 views
// Copyright 2019, The Go Authors. All rights reserved.1// Use of this source code is governed by a BSD-style2// license that can be found in the LICENSE file.34package cmp56import (7"bytes"8"fmt"9"math"10"reflect"11"strconv"12"strings"13"unicode"14"unicode/utf8"1516"github.com/google/go-cmp/cmp/internal/diff"17)1819// CanFormatDiffSlice reports whether we support custom formatting for nodes20// that are slices of primitive kinds or strings.21func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool {22switch {23case opts.DiffMode != diffUnknown:24return false // Must be formatting in diff mode25case v.NumDiff == 0:26return false // No differences detected27case !v.ValueX.IsValid() || !v.ValueY.IsValid():28return false // Both values must be valid29case v.NumIgnored > 0:30return false // Some ignore option was used31case v.NumTransformed > 0:32return false // Some transform option was used33case v.NumCompared > 1:34return false // More than one comparison was used35case v.NumCompared == 1 && v.Type.Name() != "":36// The need for cmp to check applicability of options on every element37// in a slice is a significant performance detriment for large []byte.38// The workaround is to specify Comparer(bytes.Equal),39// which enables cmp to compare []byte more efficiently.40// If they differ, we still want to provide batched diffing.41// The logic disallows named types since they tend to have their own42// String method, with nicer formatting than what this provides.43return false44}4546// Check whether this is an interface with the same concrete types.47t := v.Type48vx, vy := v.ValueX, v.ValueY49if t.Kind() == reflect.Interface && !vx.IsNil() && !vy.IsNil() && vx.Elem().Type() == vy.Elem().Type() {50vx, vy = vx.Elem(), vy.Elem()51t = vx.Type()52}5354// Check whether we provide specialized diffing for this type.55switch t.Kind() {56case reflect.String:57case reflect.Array, reflect.Slice:58// Only slices of primitive types have specialized handling.59switch t.Elem().Kind() {60case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,61reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr,62reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:63default:64return false65}6667// Both slice values have to be non-empty.68if t.Kind() == reflect.Slice && (vx.Len() == 0 || vy.Len() == 0) {69return false70}7172// If a sufficient number of elements already differ,73// use specialized formatting even if length requirement is not met.74if v.NumDiff > v.NumSame {75return true76}77default:78return false79}8081// Use specialized string diffing for longer slices or strings.82const minLength = 3283return vx.Len() >= minLength && vy.Len() >= minLength84}8586// FormatDiffSlice prints a diff for the slices (or strings) represented by v.87// This provides custom-tailored logic to make printing of differences in88// textual strings and slices of primitive kinds more readable.89func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {90assert(opts.DiffMode == diffUnknown)91t, vx, vy := v.Type, v.ValueX, v.ValueY92if t.Kind() == reflect.Interface {93vx, vy = vx.Elem(), vy.Elem()94t = vx.Type()95opts = opts.WithTypeMode(emitType)96}9798// Auto-detect the type of the data.99var sx, sy string100var ssx, ssy []string101var isString, isMostlyText, isPureLinedText, isBinary bool102switch {103case t.Kind() == reflect.String:104sx, sy = vx.String(), vy.String()105isString = true106case t.Kind() == reflect.Slice && t.Elem() == byteType:107sx, sy = string(vx.Bytes()), string(vy.Bytes())108isString = true109case t.Kind() == reflect.Array:110// Arrays need to be addressable for slice operations to work.111vx2, vy2 := reflect.New(t).Elem(), reflect.New(t).Elem()112vx2.Set(vx)113vy2.Set(vy)114vx, vy = vx2, vy2115}116if isString {117var numTotalRunes, numValidRunes, numLines, lastLineIdx, maxLineLen int118for i, r := range sx + sy {119numTotalRunes++120if (unicode.IsPrint(r) || unicode.IsSpace(r)) && r != utf8.RuneError {121numValidRunes++122}123if r == '\n' {124if maxLineLen < i-lastLineIdx {125maxLineLen = i - lastLineIdx126}127lastLineIdx = i + 1128numLines++129}130}131isPureText := numValidRunes == numTotalRunes132isMostlyText = float64(numValidRunes) > math.Floor(0.90*float64(numTotalRunes))133isPureLinedText = isPureText && numLines >= 4 && maxLineLen <= 1024134isBinary = !isMostlyText135136// Avoid diffing by lines if it produces a significantly more complex137// edit script than diffing by bytes.138if isPureLinedText {139ssx = strings.Split(sx, "\n")140ssy = strings.Split(sy, "\n")141esLines := diff.Difference(len(ssx), len(ssy), func(ix, iy int) diff.Result {142return diff.BoolResult(ssx[ix] == ssy[iy])143})144esBytes := diff.Difference(len(sx), len(sy), func(ix, iy int) diff.Result {145return diff.BoolResult(sx[ix] == sy[iy])146})147efficiencyLines := float64(esLines.Dist()) / float64(len(esLines))148efficiencyBytes := float64(esBytes.Dist()) / float64(len(esBytes))149quotedLength := len(strconv.Quote(sx + sy))150unquotedLength := len(sx) + len(sy)151escapeExpansionRatio := float64(quotedLength) / float64(unquotedLength)152isPureLinedText = efficiencyLines < 4*efficiencyBytes || escapeExpansionRatio > 1.1153}154}155156// Format the string into printable records.157var list textList158var delim string159switch {160// If the text appears to be multi-lined text,161// then perform differencing across individual lines.162case isPureLinedText:163list = opts.formatDiffSlice(164reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line",165func(v reflect.Value, d diffMode) textRecord {166s := formatString(v.Index(0).String())167return textRecord{Diff: d, Value: textLine(s)}168},169)170delim = "\n"171172// If possible, use a custom triple-quote (""") syntax for printing173// differences in a string literal. This format is more readable,174// but has edge-cases where differences are visually indistinguishable.175// This format is avoided under the following conditions:176// - A line starts with `"""`177// - A line starts with "..."178// - A line contains non-printable characters179// - Adjacent different lines differ only by whitespace180//181// For example:182//183// """184// ... // 3 identical lines185// foo186// bar187// - baz188// + BAZ189// """190isTripleQuoted := true191prevRemoveLines := map[string]bool{}192prevInsertLines := map[string]bool{}193var list2 textList194list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true})195for _, r := range list {196if !r.Value.Equal(textEllipsis) {197line, _ := strconv.Unquote(string(r.Value.(textLine)))198line = strings.TrimPrefix(strings.TrimSuffix(line, "\r"), "\r") // trim leading/trailing carriage returns for legacy Windows endline support199normLine := strings.Map(func(r rune) rune {200if unicode.IsSpace(r) {201return -1 // drop whitespace to avoid visually indistinguishable output202}203return r204}, line)205isPrintable := func(r rune) bool {206return unicode.IsPrint(r) || r == '\t' // specially treat tab as printable207}208isTripleQuoted = !strings.HasPrefix(line, `"""`) && !strings.HasPrefix(line, "...") && strings.TrimFunc(line, isPrintable) == ""209switch r.Diff {210case diffRemoved:211isTripleQuoted = isTripleQuoted && !prevInsertLines[normLine]212prevRemoveLines[normLine] = true213case diffInserted:214isTripleQuoted = isTripleQuoted && !prevRemoveLines[normLine]215prevInsertLines[normLine] = true216}217if !isTripleQuoted {218break219}220r.Value = textLine(line)221r.ElideComma = true222}223if !(r.Diff == diffRemoved || r.Diff == diffInserted) { // start a new non-adjacent difference group224prevRemoveLines = map[string]bool{}225prevInsertLines = map[string]bool{}226}227list2 = append(list2, r)228}229if r := list2[len(list2)-1]; r.Diff == diffIdentical && len(r.Value.(textLine)) == 0 {230list2 = list2[:len(list2)-1] // elide single empty line at the end231}232list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true})233if isTripleQuoted {234var out textNode = &textWrap{Prefix: "(", Value: list2, Suffix: ")"}235switch t.Kind() {236case reflect.String:237if t != stringType {238out = opts.FormatType(t, out)239}240case reflect.Slice:241// Always emit type for slices since the triple-quote syntax242// looks like a string (not a slice).243opts = opts.WithTypeMode(emitType)244out = opts.FormatType(t, out)245}246return out247}248249// If the text appears to be single-lined text,250// then perform differencing in approximately fixed-sized chunks.251// The output is printed as quoted strings.252case isMostlyText:253list = opts.formatDiffSlice(254reflect.ValueOf(sx), reflect.ValueOf(sy), 64, "byte",255func(v reflect.Value, d diffMode) textRecord {256s := formatString(v.String())257return textRecord{Diff: d, Value: textLine(s)}258},259)260261// If the text appears to be binary data,262// then perform differencing in approximately fixed-sized chunks.263// The output is inspired by hexdump.264case isBinary:265list = opts.formatDiffSlice(266reflect.ValueOf(sx), reflect.ValueOf(sy), 16, "byte",267func(v reflect.Value, d diffMode) textRecord {268var ss []string269for i := 0; i < v.Len(); i++ {270ss = append(ss, formatHex(v.Index(i).Uint()))271}272s := strings.Join(ss, ", ")273comment := commentString(fmt.Sprintf("%c|%v|", d, formatASCII(v.String())))274return textRecord{Diff: d, Value: textLine(s), Comment: comment}275},276)277278// For all other slices of primitive types,279// then perform differencing in approximately fixed-sized chunks.280// The size of each chunk depends on the width of the element kind.281default:282var chunkSize int283if t.Elem().Kind() == reflect.Bool {284chunkSize = 16285} else {286switch t.Elem().Bits() {287case 8:288chunkSize = 16289case 16:290chunkSize = 12291case 32:292chunkSize = 8293default:294chunkSize = 8295}296}297list = opts.formatDiffSlice(298vx, vy, chunkSize, t.Elem().Kind().String(),299func(v reflect.Value, d diffMode) textRecord {300var ss []string301for i := 0; i < v.Len(); i++ {302switch t.Elem().Kind() {303case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:304ss = append(ss, fmt.Sprint(v.Index(i).Int()))305case reflect.Uint, reflect.Uint16, reflect.Uint32, reflect.Uint64:306ss = append(ss, fmt.Sprint(v.Index(i).Uint()))307case reflect.Uint8, reflect.Uintptr:308ss = append(ss, formatHex(v.Index(i).Uint()))309case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:310ss = append(ss, fmt.Sprint(v.Index(i).Interface()))311}312}313s := strings.Join(ss, ", ")314return textRecord{Diff: d, Value: textLine(s)}315},316)317}318319// Wrap the output with appropriate type information.320var out textNode = &textWrap{Prefix: "{", Value: list, Suffix: "}"}321if !isMostlyText {322// The "{...}" byte-sequence literal is not valid Go syntax for strings.323// Emit the type for extra clarity (e.g. "string{...}").324if t.Kind() == reflect.String {325opts = opts.WithTypeMode(emitType)326}327return opts.FormatType(t, out)328}329switch t.Kind() {330case reflect.String:331out = &textWrap{Prefix: "strings.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)}332if t != stringType {333out = opts.FormatType(t, out)334}335case reflect.Slice:336out = &textWrap{Prefix: "bytes.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)}337if t != bytesType {338out = opts.FormatType(t, out)339}340}341return out342}343344// formatASCII formats s as an ASCII string.345// This is useful for printing binary strings in a semi-legible way.346func formatASCII(s string) string {347b := bytes.Repeat([]byte{'.'}, len(s))348for i := 0; i < len(s); i++ {349if ' ' <= s[i] && s[i] <= '~' {350b[i] = s[i]351}352}353return string(b)354}355356func (opts formatOptions) formatDiffSlice(357vx, vy reflect.Value, chunkSize int, name string,358makeRec func(reflect.Value, diffMode) textRecord,359) (list textList) {360eq := func(ix, iy int) bool {361return vx.Index(ix).Interface() == vy.Index(iy).Interface()362}363es := diff.Difference(vx.Len(), vy.Len(), func(ix, iy int) diff.Result {364return diff.BoolResult(eq(ix, iy))365})366367appendChunks := func(v reflect.Value, d diffMode) int {368n0 := v.Len()369for v.Len() > 0 {370n := chunkSize371if n > v.Len() {372n = v.Len()373}374list = append(list, makeRec(v.Slice(0, n), d))375v = v.Slice(n, v.Len())376}377return n0 - v.Len()378}379380var numDiffs int381maxLen := -1382if opts.LimitVerbosity {383maxLen = (1 << opts.verbosity()) << 2 // 4, 8, 16, 32, 64, etc...384opts.VerbosityLevel--385}386387groups := coalesceAdjacentEdits(name, es)388groups = coalesceInterveningIdentical(groups, chunkSize/4)389groups = cleanupSurroundingIdentical(groups, eq)390maxGroup := diffStats{Name: name}391for i, ds := range groups {392if maxLen >= 0 && numDiffs >= maxLen {393maxGroup = maxGroup.Append(ds)394continue395}396397// Print equal.398if ds.NumDiff() == 0 {399// Compute the number of leading and trailing equal bytes to print.400var numLo, numHi int401numEqual := ds.NumIgnored + ds.NumIdentical402for numLo < chunkSize*numContextRecords && numLo+numHi < numEqual && i != 0 {403numLo++404}405for numHi < chunkSize*numContextRecords && numLo+numHi < numEqual && i != len(groups)-1 {406numHi++407}408if numEqual-(numLo+numHi) <= chunkSize && ds.NumIgnored == 0 {409numHi = numEqual - numLo // Avoid pointless coalescing of single equal row410}411412// Print the equal bytes.413appendChunks(vx.Slice(0, numLo), diffIdentical)414if numEqual > numLo+numHi {415ds.NumIdentical -= numLo + numHi416list.AppendEllipsis(ds)417}418appendChunks(vx.Slice(numEqual-numHi, numEqual), diffIdentical)419vx = vx.Slice(numEqual, vx.Len())420vy = vy.Slice(numEqual, vy.Len())421continue422}423424// Print unequal.425len0 := len(list)426nx := appendChunks(vx.Slice(0, ds.NumIdentical+ds.NumRemoved+ds.NumModified), diffRemoved)427vx = vx.Slice(nx, vx.Len())428ny := appendChunks(vy.Slice(0, ds.NumIdentical+ds.NumInserted+ds.NumModified), diffInserted)429vy = vy.Slice(ny, vy.Len())430numDiffs += len(list) - len0431}432if maxGroup.IsZero() {433assert(vx.Len() == 0 && vy.Len() == 0)434} else {435list.AppendEllipsis(maxGroup)436}437return list438}439440// coalesceAdjacentEdits coalesces the list of edits into groups of adjacent441// equal or unequal counts.442//443// Example:444//445// Input: "..XXY...Y"446// Output: [447// {NumIdentical: 2},448// {NumRemoved: 2, NumInserted 1},449// {NumIdentical: 3},450// {NumInserted: 1},451// ]452func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) {453var prevMode byte454lastStats := func(mode byte) *diffStats {455if prevMode != mode {456groups = append(groups, diffStats{Name: name})457prevMode = mode458}459return &groups[len(groups)-1]460}461for _, e := range es {462switch e {463case diff.Identity:464lastStats('=').NumIdentical++465case diff.UniqueX:466lastStats('!').NumRemoved++467case diff.UniqueY:468lastStats('!').NumInserted++469case diff.Modified:470lastStats('!').NumModified++471}472}473return groups474}475476// coalesceInterveningIdentical coalesces sufficiently short (<= windowSize)477// equal groups into adjacent unequal groups that currently result in a478// dual inserted/removed printout. This acts as a high-pass filter to smooth479// out high-frequency changes within the windowSize.480//481// Example:482//483// WindowSize: 16,484// Input: [485// {NumIdentical: 61}, // group 0486// {NumRemoved: 3, NumInserted: 1}, // group 1487// {NumIdentical: 6}, // ├── coalesce488// {NumInserted: 2}, // ├── coalesce489// {NumIdentical: 1}, // ├── coalesce490// {NumRemoved: 9}, // └── coalesce491// {NumIdentical: 64}, // group 2492// {NumRemoved: 3, NumInserted: 1}, // group 3493// {NumIdentical: 6}, // ├── coalesce494// {NumInserted: 2}, // ├── coalesce495// {NumIdentical: 1}, // ├── coalesce496// {NumRemoved: 7}, // ├── coalesce497// {NumIdentical: 1}, // ├── coalesce498// {NumRemoved: 2}, // └── coalesce499// {NumIdentical: 63}, // group 4500// ]501// Output: [502// {NumIdentical: 61},503// {NumIdentical: 7, NumRemoved: 12, NumInserted: 3},504// {NumIdentical: 64},505// {NumIdentical: 8, NumRemoved: 12, NumInserted: 3},506// {NumIdentical: 63},507// ]508func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStats {509groups, groupsOrig := groups[:0], groups510for i, ds := range groupsOrig {511if len(groups) >= 2 && ds.NumDiff() > 0 {512prev := &groups[len(groups)-2] // Unequal group513curr := &groups[len(groups)-1] // Equal group514next := &groupsOrig[i] // Unequal group515hadX, hadY := prev.NumRemoved > 0, prev.NumInserted > 0516hasX, hasY := next.NumRemoved > 0, next.NumInserted > 0517if ((hadX || hasX) && (hadY || hasY)) && curr.NumIdentical <= windowSize {518*prev = prev.Append(*curr).Append(*next)519groups = groups[:len(groups)-1] // Truncate off equal group520continue521}522}523groups = append(groups, ds)524}525return groups526}527528// cleanupSurroundingIdentical scans through all unequal groups, and529// moves any leading sequence of equal elements to the preceding equal group and530// moves and trailing sequence of equal elements to the succeeding equal group.531//532// This is necessary since coalesceInterveningIdentical may coalesce edit groups533// together such that leading/trailing spans of equal elements becomes possible.534// Note that this can occur even with an optimal diffing algorithm.535//536// Example:537//538// Input: [539// {NumIdentical: 61},540// {NumIdentical: 1 , NumRemoved: 11, NumInserted: 2}, // assume 3 leading identical elements541// {NumIdentical: 67},542// {NumIdentical: 7, NumRemoved: 12, NumInserted: 3}, // assume 10 trailing identical elements543// {NumIdentical: 54},544// ]545// Output: [546// {NumIdentical: 64}, // incremented by 3547// {NumRemoved: 9},548// {NumIdentical: 67},549// {NumRemoved: 9},550// {NumIdentical: 64}, // incremented by 10551// ]552func cleanupSurroundingIdentical(groups []diffStats, eq func(i, j int) bool) []diffStats {553var ix, iy int // indexes into sequence x and y554for i, ds := range groups {555// Handle equal group.556if ds.NumDiff() == 0 {557ix += ds.NumIdentical558iy += ds.NumIdentical559continue560}561562// Handle unequal group.563nx := ds.NumIdentical + ds.NumRemoved + ds.NumModified564ny := ds.NumIdentical + ds.NumInserted + ds.NumModified565var numLeadingIdentical, numTrailingIdentical int566for j := 0; j < nx && j < ny && eq(ix+j, iy+j); j++ {567numLeadingIdentical++568}569for j := 0; j < nx && j < ny && eq(ix+nx-1-j, iy+ny-1-j); j++ {570numTrailingIdentical++571}572if numIdentical := numLeadingIdentical + numTrailingIdentical; numIdentical > 0 {573if numLeadingIdentical > 0 {574// Remove leading identical span from this group and575// insert it into the preceding group.576if i-1 >= 0 {577groups[i-1].NumIdentical += numLeadingIdentical578} else {579// No preceding group exists, so prepend a new group,580// but do so after we finish iterating over all groups.581defer func() {582groups = append([]diffStats{{Name: groups[0].Name, NumIdentical: numLeadingIdentical}}, groups...)583}()584}585// Increment indexes since the preceding group would have handled this.586ix += numLeadingIdentical587iy += numLeadingIdentical588}589if numTrailingIdentical > 0 {590// Remove trailing identical span from this group and591// insert it into the succeeding group.592if i+1 < len(groups) {593groups[i+1].NumIdentical += numTrailingIdentical594} else {595// No succeeding group exists, so append a new group,596// but do so after we finish iterating over all groups.597defer func() {598groups = append(groups, diffStats{Name: groups[len(groups)-1].Name, NumIdentical: numTrailingIdentical})599}()600}601// Do not increment indexes since the succeeding group will handle this.602}603604// Update this group since some identical elements were removed.605nx -= numIdentical606ny -= numIdentical607groups[i] = diffStats{Name: ds.Name, NumRemoved: nx, NumInserted: ny}608}609ix += nx610iy += ny611}612return groups613}614615616