• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2019, The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package cmp
6
7import (
8	"bytes"
9	"fmt"
10	"math"
11	"reflect"
12	"strconv"
13	"strings"
14	"unicode"
15	"unicode/utf8"
16
17	"github.com/google/go-cmp/cmp/internal/diff"
18)
19
20// CanFormatDiffSlice reports whether we support custom formatting for nodes
21// that are slices of primitive kinds or strings.
22func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool {
23	switch {
24	case opts.DiffMode != diffUnknown:
25		return false // Must be formatting in diff mode
26	case v.NumDiff == 0:
27		return false // No differences detected
28	case !v.ValueX.IsValid() || !v.ValueY.IsValid():
29		return false // Both values must be valid
30	case v.NumIgnored > 0:
31		return false // Some ignore option was used
32	case v.NumTransformed > 0:
33		return false // Some transform option was used
34	case v.NumCompared > 1:
35		return false // More than one comparison was used
36	case v.NumCompared == 1 && v.Type.Name() != "":
37		// The need for cmp to check applicability of options on every element
38		// in a slice is a significant performance detriment for large []byte.
39		// The workaround is to specify Comparer(bytes.Equal),
40		// which enables cmp to compare []byte more efficiently.
41		// If they differ, we still want to provide batched diffing.
42		// The logic disallows named types since they tend to have their own
43		// String method, with nicer formatting than what this provides.
44		return false
45	}
46
47	// Check whether this is an interface with the same concrete types.
48	t := v.Type
49	vx, vy := v.ValueX, v.ValueY
50	if t.Kind() == reflect.Interface && !vx.IsNil() && !vy.IsNil() && vx.Elem().Type() == vy.Elem().Type() {
51		vx, vy = vx.Elem(), vy.Elem()
52		t = vx.Type()
53	}
54
55	// Check whether we provide specialized diffing for this type.
56	switch t.Kind() {
57	case reflect.String:
58	case reflect.Array, reflect.Slice:
59		// Only slices of primitive types have specialized handling.
60		switch t.Elem().Kind() {
61		case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
62			reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr,
63			reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
64		default:
65			return false
66		}
67
68		// Both slice values have to be non-empty.
69		if t.Kind() == reflect.Slice && (vx.Len() == 0 || vy.Len() == 0) {
70			return false
71		}
72
73		// If a sufficient number of elements already differ,
74		// use specialized formatting even if length requirement is not met.
75		if v.NumDiff > v.NumSame {
76			return true
77		}
78	default:
79		return false
80	}
81
82	// Use specialized string diffing for longer slices or strings.
83	const minLength = 32
84	return vx.Len() >= minLength && vy.Len() >= minLength
85}
86
87// FormatDiffSlice prints a diff for the slices (or strings) represented by v.
88// This provides custom-tailored logic to make printing of differences in
89// textual strings and slices of primitive kinds more readable.
90func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
91	assert(opts.DiffMode == diffUnknown)
92	t, vx, vy := v.Type, v.ValueX, v.ValueY
93	if t.Kind() == reflect.Interface {
94		vx, vy = vx.Elem(), vy.Elem()
95		t = vx.Type()
96		opts = opts.WithTypeMode(emitType)
97	}
98
99	// Auto-detect the type of the data.
100	var sx, sy string
101	var ssx, ssy []string
102	var isString, isMostlyText, isPureLinedText, isBinary bool
103	switch {
104	case t.Kind() == reflect.String:
105		sx, sy = vx.String(), vy.String()
106		isString = true
107	case t.Kind() == reflect.Slice && t.Elem() == reflect.TypeOf(byte(0)):
108		sx, sy = string(vx.Bytes()), string(vy.Bytes())
109		isString = true
110	case t.Kind() == reflect.Array:
111		// Arrays need to be addressable for slice operations to work.
112		vx2, vy2 := reflect.New(t).Elem(), reflect.New(t).Elem()
113		vx2.Set(vx)
114		vy2.Set(vy)
115		vx, vy = vx2, vy2
116	}
117	if isString {
118		var numTotalRunes, numValidRunes, numLines, lastLineIdx, maxLineLen int
119		for i, r := range sx + sy {
120			numTotalRunes++
121			if (unicode.IsPrint(r) || unicode.IsSpace(r)) && r != utf8.RuneError {
122				numValidRunes++
123			}
124			if r == '\n' {
125				if maxLineLen < i-lastLineIdx {
126					maxLineLen = i - lastLineIdx
127				}
128				lastLineIdx = i + 1
129				numLines++
130			}
131		}
132		isPureText := numValidRunes == numTotalRunes
133		isMostlyText = float64(numValidRunes) > math.Floor(0.90*float64(numTotalRunes))
134		isPureLinedText = isPureText && numLines >= 4 && maxLineLen <= 1024
135		isBinary = !isMostlyText
136
137		// Avoid diffing by lines if it produces a significantly more complex
138		// edit script than diffing by bytes.
139		if isPureLinedText {
140			ssx = strings.Split(sx, "\n")
141			ssy = strings.Split(sy, "\n")
142			esLines := diff.Difference(len(ssx), len(ssy), func(ix, iy int) diff.Result {
143				return diff.BoolResult(ssx[ix] == ssy[iy])
144			})
145			esBytes := diff.Difference(len(sx), len(sy), func(ix, iy int) diff.Result {
146				return diff.BoolResult(sx[ix] == sy[iy])
147			})
148			efficiencyLines := float64(esLines.Dist()) / float64(len(esLines))
149			efficiencyBytes := float64(esBytes.Dist()) / float64(len(esBytes))
150			isPureLinedText = efficiencyLines < 4*efficiencyBytes
151		}
152	}
153
154	// Format the string into printable records.
155	var list textList
156	var delim string
157	switch {
158	// If the text appears to be multi-lined text,
159	// then perform differencing across individual lines.
160	case isPureLinedText:
161		list = opts.formatDiffSlice(
162			reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line",
163			func(v reflect.Value, d diffMode) textRecord {
164				s := formatString(v.Index(0).String())
165				return textRecord{Diff: d, Value: textLine(s)}
166			},
167		)
168		delim = "\n"
169
170		// If possible, use a custom triple-quote (""") syntax for printing
171		// differences in a string literal. This format is more readable,
172		// but has edge-cases where differences are visually indistinguishable.
173		// This format is avoided under the following conditions:
174		//	• A line starts with `"""`
175		//	• A line starts with "..."
176		//	• A line contains non-printable characters
177		//	• Adjacent different lines differ only by whitespace
178		//
179		// For example:
180		//		"""
181		//		... // 3 identical lines
182		//		foo
183		//		bar
184		//	-	baz
185		//	+	BAZ
186		//		"""
187		isTripleQuoted := true
188		prevRemoveLines := map[string]bool{}
189		prevInsertLines := map[string]bool{}
190		var list2 textList
191		list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true})
192		for _, r := range list {
193			if !r.Value.Equal(textEllipsis) {
194				line, _ := strconv.Unquote(string(r.Value.(textLine)))
195				line = strings.TrimPrefix(strings.TrimSuffix(line, "\r"), "\r") // trim leading/trailing carriage returns for legacy Windows endline support
196				normLine := strings.Map(func(r rune) rune {
197					if unicode.IsSpace(r) {
198						return -1 // drop whitespace to avoid visually indistinguishable output
199					}
200					return r
201				}, line)
202				isPrintable := func(r rune) bool {
203					return unicode.IsPrint(r) || r == '\t' // specially treat tab as printable
204				}
205				isTripleQuoted = !strings.HasPrefix(line, `"""`) && !strings.HasPrefix(line, "...") && strings.TrimFunc(line, isPrintable) == ""
206				switch r.Diff {
207				case diffRemoved:
208					isTripleQuoted = isTripleQuoted && !prevInsertLines[normLine]
209					prevRemoveLines[normLine] = true
210				case diffInserted:
211					isTripleQuoted = isTripleQuoted && !prevRemoveLines[normLine]
212					prevInsertLines[normLine] = true
213				}
214				if !isTripleQuoted {
215					break
216				}
217				r.Value = textLine(line)
218				r.ElideComma = true
219			}
220			if !(r.Diff == diffRemoved || r.Diff == diffInserted) { // start a new non-adjacent difference group
221				prevRemoveLines = map[string]bool{}
222				prevInsertLines = map[string]bool{}
223			}
224			list2 = append(list2, r)
225		}
226		if r := list2[len(list2)-1]; r.Diff == diffIdentical && len(r.Value.(textLine)) == 0 {
227			list2 = list2[:len(list2)-1] // elide single empty line at the end
228		}
229		list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true})
230		if isTripleQuoted {
231			var out textNode = &textWrap{Prefix: "(", Value: list2, Suffix: ")"}
232			switch t.Kind() {
233			case reflect.String:
234				if t != reflect.TypeOf(string("")) {
235					out = opts.FormatType(t, out)
236				}
237			case reflect.Slice:
238				// Always emit type for slices since the triple-quote syntax
239				// looks like a string (not a slice).
240				opts = opts.WithTypeMode(emitType)
241				out = opts.FormatType(t, out)
242			}
243			return out
244		}
245
246	// If the text appears to be single-lined text,
247	// then perform differencing in approximately fixed-sized chunks.
248	// The output is printed as quoted strings.
249	case isMostlyText:
250		list = opts.formatDiffSlice(
251			reflect.ValueOf(sx), reflect.ValueOf(sy), 64, "byte",
252			func(v reflect.Value, d diffMode) textRecord {
253				s := formatString(v.String())
254				return textRecord{Diff: d, Value: textLine(s)}
255			},
256		)
257
258	// If the text appears to be binary data,
259	// then perform differencing in approximately fixed-sized chunks.
260	// The output is inspired by hexdump.
261	case isBinary:
262		list = opts.formatDiffSlice(
263			reflect.ValueOf(sx), reflect.ValueOf(sy), 16, "byte",
264			func(v reflect.Value, d diffMode) textRecord {
265				var ss []string
266				for i := 0; i < v.Len(); i++ {
267					ss = append(ss, formatHex(v.Index(i).Uint()))
268				}
269				s := strings.Join(ss, ", ")
270				comment := commentString(fmt.Sprintf("%c|%v|", d, formatASCII(v.String())))
271				return textRecord{Diff: d, Value: textLine(s), Comment: comment}
272			},
273		)
274
275	// For all other slices of primitive types,
276	// then perform differencing in approximately fixed-sized chunks.
277	// The size of each chunk depends on the width of the element kind.
278	default:
279		var chunkSize int
280		if t.Elem().Kind() == reflect.Bool {
281			chunkSize = 16
282		} else {
283			switch t.Elem().Bits() {
284			case 8:
285				chunkSize = 16
286			case 16:
287				chunkSize = 12
288			case 32:
289				chunkSize = 8
290			default:
291				chunkSize = 8
292			}
293		}
294		list = opts.formatDiffSlice(
295			vx, vy, chunkSize, t.Elem().Kind().String(),
296			func(v reflect.Value, d diffMode) textRecord {
297				var ss []string
298				for i := 0; i < v.Len(); i++ {
299					switch t.Elem().Kind() {
300					case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
301						ss = append(ss, fmt.Sprint(v.Index(i).Int()))
302					case reflect.Uint, reflect.Uint16, reflect.Uint32, reflect.Uint64:
303						ss = append(ss, fmt.Sprint(v.Index(i).Uint()))
304					case reflect.Uint8, reflect.Uintptr:
305						ss = append(ss, formatHex(v.Index(i).Uint()))
306					case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
307						ss = append(ss, fmt.Sprint(v.Index(i).Interface()))
308					}
309				}
310				s := strings.Join(ss, ", ")
311				return textRecord{Diff: d, Value: textLine(s)}
312			},
313		)
314	}
315
316	// Wrap the output with appropriate type information.
317	var out textNode = &textWrap{Prefix: "{", Value: list, Suffix: "}"}
318	if !isMostlyText {
319		// The "{...}" byte-sequence literal is not valid Go syntax for strings.
320		// Emit the type for extra clarity (e.g. "string{...}").
321		if t.Kind() == reflect.String {
322			opts = opts.WithTypeMode(emitType)
323		}
324		return opts.FormatType(t, out)
325	}
326	switch t.Kind() {
327	case reflect.String:
328		out = &textWrap{Prefix: "strings.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)}
329		if t != reflect.TypeOf(string("")) {
330			out = opts.FormatType(t, out)
331		}
332	case reflect.Slice:
333		out = &textWrap{Prefix: "bytes.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)}
334		if t != reflect.TypeOf([]byte(nil)) {
335			out = opts.FormatType(t, out)
336		}
337	}
338	return out
339}
340
341// formatASCII formats s as an ASCII string.
342// This is useful for printing binary strings in a semi-legible way.
343func formatASCII(s string) string {
344	b := bytes.Repeat([]byte{'.'}, len(s))
345	for i := 0; i < len(s); i++ {
346		if ' ' <= s[i] && s[i] <= '~' {
347			b[i] = s[i]
348		}
349	}
350	return string(b)
351}
352
353func (opts formatOptions) formatDiffSlice(
354	vx, vy reflect.Value, chunkSize int, name string,
355	makeRec func(reflect.Value, diffMode) textRecord,
356) (list textList) {
357	eq := func(ix, iy int) bool {
358		return vx.Index(ix).Interface() == vy.Index(iy).Interface()
359	}
360	es := diff.Difference(vx.Len(), vy.Len(), func(ix, iy int) diff.Result {
361		return diff.BoolResult(eq(ix, iy))
362	})
363
364	appendChunks := func(v reflect.Value, d diffMode) int {
365		n0 := v.Len()
366		for v.Len() > 0 {
367			n := chunkSize
368			if n > v.Len() {
369				n = v.Len()
370			}
371			list = append(list, makeRec(v.Slice(0, n), d))
372			v = v.Slice(n, v.Len())
373		}
374		return n0 - v.Len()
375	}
376
377	var numDiffs int
378	maxLen := -1
379	if opts.LimitVerbosity {
380		maxLen = (1 << opts.verbosity()) << 2 // 4, 8, 16, 32, 64, etc...
381		opts.VerbosityLevel--
382	}
383
384	groups := coalesceAdjacentEdits(name, es)
385	groups = coalesceInterveningIdentical(groups, chunkSize/4)
386	groups = cleanupSurroundingIdentical(groups, eq)
387	maxGroup := diffStats{Name: name}
388	for i, ds := range groups {
389		if maxLen >= 0 && numDiffs >= maxLen {
390			maxGroup = maxGroup.Append(ds)
391			continue
392		}
393
394		// Print equal.
395		if ds.NumDiff() == 0 {
396			// Compute the number of leading and trailing equal bytes to print.
397			var numLo, numHi int
398			numEqual := ds.NumIgnored + ds.NumIdentical
399			for numLo < chunkSize*numContextRecords && numLo+numHi < numEqual && i != 0 {
400				numLo++
401			}
402			for numHi < chunkSize*numContextRecords && numLo+numHi < numEqual && i != len(groups)-1 {
403				numHi++
404			}
405			if numEqual-(numLo+numHi) <= chunkSize && ds.NumIgnored == 0 {
406				numHi = numEqual - numLo // Avoid pointless coalescing of single equal row
407			}
408
409			// Print the equal bytes.
410			appendChunks(vx.Slice(0, numLo), diffIdentical)
411			if numEqual > numLo+numHi {
412				ds.NumIdentical -= numLo + numHi
413				list.AppendEllipsis(ds)
414			}
415			appendChunks(vx.Slice(numEqual-numHi, numEqual), diffIdentical)
416			vx = vx.Slice(numEqual, vx.Len())
417			vy = vy.Slice(numEqual, vy.Len())
418			continue
419		}
420
421		// Print unequal.
422		len0 := len(list)
423		nx := appendChunks(vx.Slice(0, ds.NumIdentical+ds.NumRemoved+ds.NumModified), diffRemoved)
424		vx = vx.Slice(nx, vx.Len())
425		ny := appendChunks(vy.Slice(0, ds.NumIdentical+ds.NumInserted+ds.NumModified), diffInserted)
426		vy = vy.Slice(ny, vy.Len())
427		numDiffs += len(list) - len0
428	}
429	if maxGroup.IsZero() {
430		assert(vx.Len() == 0 && vy.Len() == 0)
431	} else {
432		list.AppendEllipsis(maxGroup)
433	}
434	return list
435}
436
437// coalesceAdjacentEdits coalesces the list of edits into groups of adjacent
438// equal or unequal counts.
439//
440// Example:
441//
442//	Input:  "..XXY...Y"
443//	Output: [
444//		{NumIdentical: 2},
445//		{NumRemoved: 2, NumInserted 1},
446//		{NumIdentical: 3},
447//		{NumInserted: 1},
448//	]
449//
450func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) {
451	var prevMode byte
452	lastStats := func(mode byte) *diffStats {
453		if prevMode != mode {
454			groups = append(groups, diffStats{Name: name})
455			prevMode = mode
456		}
457		return &groups[len(groups)-1]
458	}
459	for _, e := range es {
460		switch e {
461		case diff.Identity:
462			lastStats('=').NumIdentical++
463		case diff.UniqueX:
464			lastStats('!').NumRemoved++
465		case diff.UniqueY:
466			lastStats('!').NumInserted++
467		case diff.Modified:
468			lastStats('!').NumModified++
469		}
470	}
471	return groups
472}
473
474// coalesceInterveningIdentical coalesces sufficiently short (<= windowSize)
475// equal groups into adjacent unequal groups that currently result in a
476// dual inserted/removed printout. This acts as a high-pass filter to smooth
477// out high-frequency changes within the windowSize.
478//
479// Example:
480//
481//	WindowSize: 16,
482//	Input: [
483//		{NumIdentical: 61},              // group 0
484//		{NumRemoved: 3, NumInserted: 1}, // group 1
485//		{NumIdentical: 6},               // ├── coalesce
486//		{NumInserted: 2},                // ├── coalesce
487//		{NumIdentical: 1},               // ├── coalesce
488//		{NumRemoved: 9},                 // └── coalesce
489//		{NumIdentical: 64},              // group 2
490//		{NumRemoved: 3, NumInserted: 1}, // group 3
491//		{NumIdentical: 6},               // ├── coalesce
492//		{NumInserted: 2},                // ├── coalesce
493//		{NumIdentical: 1},               // ├── coalesce
494//		{NumRemoved: 7},                 // ├── coalesce
495//		{NumIdentical: 1},               // ├── coalesce
496//		{NumRemoved: 2},                 // └── coalesce
497//		{NumIdentical: 63},              // group 4
498//	]
499//	Output: [
500//		{NumIdentical: 61},
501//		{NumIdentical: 7, NumRemoved: 12, NumInserted: 3},
502//		{NumIdentical: 64},
503//		{NumIdentical: 8, NumRemoved: 12, NumInserted: 3},
504//		{NumIdentical: 63},
505//	]
506//
507func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStats {
508	groups, groupsOrig := groups[:0], groups
509	for i, ds := range groupsOrig {
510		if len(groups) >= 2 && ds.NumDiff() > 0 {
511			prev := &groups[len(groups)-2] // Unequal group
512			curr := &groups[len(groups)-1] // Equal group
513			next := &groupsOrig[i]         // Unequal group
514			hadX, hadY := prev.NumRemoved > 0, prev.NumInserted > 0
515			hasX, hasY := next.NumRemoved > 0, next.NumInserted > 0
516			if ((hadX || hasX) && (hadY || hasY)) && curr.NumIdentical <= windowSize {
517				*prev = prev.Append(*curr).Append(*next)
518				groups = groups[:len(groups)-1] // Truncate off equal group
519				continue
520			}
521		}
522		groups = append(groups, ds)
523	}
524	return groups
525}
526
527// cleanupSurroundingIdentical scans through all unequal groups, and
528// moves any leading sequence of equal elements to the preceding equal group and
529// moves and trailing sequence of equal elements to the succeeding equal group.
530//
531// This is necessary since coalesceInterveningIdentical may coalesce edit groups
532// together such that leading/trailing spans of equal elements becomes possible.
533// Note that this can occur even with an optimal diffing algorithm.
534//
535// Example:
536//
537//	Input: [
538//		{NumIdentical: 61},
539//		{NumIdentical: 1 , NumRemoved: 11, NumInserted: 2}, // assume 3 leading identical elements
540//		{NumIdentical: 67},
541//		{NumIdentical: 7, NumRemoved: 12, NumInserted: 3},  // assume 10 trailing identical elements
542//		{NumIdentical: 54},
543//	]
544//	Output: [
545//		{NumIdentical: 64}, // incremented by 3
546//		{NumRemoved: 9},
547//		{NumIdentical: 67},
548//		{NumRemoved: 9},
549//		{NumIdentical: 64}, // incremented by 10
550//	]
551//
552func cleanupSurroundingIdentical(groups []diffStats, eq func(i, j int) bool) []diffStats {
553	var ix, iy int // indexes into sequence x and y
554	for i, ds := range groups {
555		// Handle equal group.
556		if ds.NumDiff() == 0 {
557			ix += ds.NumIdentical
558			iy += ds.NumIdentical
559			continue
560		}
561
562		// Handle unequal group.
563		nx := ds.NumIdentical + ds.NumRemoved + ds.NumModified
564		ny := ds.NumIdentical + ds.NumInserted + ds.NumModified
565		var numLeadingIdentical, numTrailingIdentical int
566		for j := 0; j < nx && j < ny && eq(ix+j, iy+j); j++ {
567			numLeadingIdentical++
568		}
569		for j := 0; j < nx && j < ny && eq(ix+nx-1-j, iy+ny-1-j); j++ {
570			numTrailingIdentical++
571		}
572		if numIdentical := numLeadingIdentical + numTrailingIdentical; numIdentical > 0 {
573			if numLeadingIdentical > 0 {
574				// Remove leading identical span from this group and
575				// insert it into the preceding group.
576				if i-1 >= 0 {
577					groups[i-1].NumIdentical += numLeadingIdentical
578				} else {
579					// No preceding group exists, so prepend a new group,
580					// but do so after we finish iterating over all groups.
581					defer func() {
582						groups = append([]diffStats{{Name: groups[0].Name, NumIdentical: numLeadingIdentical}}, groups...)
583					}()
584				}
585				// Increment indexes since the preceding group would have handled this.
586				ix += numLeadingIdentical
587				iy += numLeadingIdentical
588			}
589			if numTrailingIdentical > 0 {
590				// Remove trailing identical span from this group and
591				// insert it into the succeeding group.
592				if i+1 < len(groups) {
593					groups[i+1].NumIdentical += numTrailingIdentical
594				} else {
595					// No succeeding group exists, so append a new group,
596					// but do so after we finish iterating over all groups.
597					defer func() {
598						groups = append(groups, diffStats{Name: groups[len(groups)-1].Name, NumIdentical: numTrailingIdentical})
599					}()
600				}
601				// Do not increment indexes since the succeeding group will handle this.
602			}
603
604			// Update this group since some identical elements were removed.
605			nx -= numIdentical
606			ny -= numIdentical
607			groups[i] = diffStats{Name: ds.Name, NumRemoved: nx, NumInserted: ny}
608		}
609		ix += nx
610		iy += ny
611	}
612	return groups
613}
614