1// Copyright 2019, The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package cmp 6 7import ( 8 "bytes" 9 "fmt" 10 "math" 11 "reflect" 12 "strconv" 13 "strings" 14 "unicode" 15 "unicode/utf8" 16 17 "github.com/google/go-cmp/cmp/internal/diff" 18) 19 20// CanFormatDiffSlice reports whether we support custom formatting for nodes 21// that are slices of primitive kinds or strings. 22func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool { 23 switch { 24 case opts.DiffMode != diffUnknown: 25 return false // Must be formatting in diff mode 26 case v.NumDiff == 0: 27 return false // No differences detected 28 case !v.ValueX.IsValid() || !v.ValueY.IsValid(): 29 return false // Both values must be valid 30 case v.NumIgnored > 0: 31 return false // Some ignore option was used 32 case v.NumTransformed > 0: 33 return false // Some transform option was used 34 case v.NumCompared > 1: 35 return false // More than one comparison was used 36 case v.NumCompared == 1 && v.Type.Name() != "": 37 // The need for cmp to check applicability of options on every element 38 // in a slice is a significant performance detriment for large []byte. 39 // The workaround is to specify Comparer(bytes.Equal), 40 // which enables cmp to compare []byte more efficiently. 41 // If they differ, we still want to provide batched diffing. 42 // The logic disallows named types since they tend to have their own 43 // String method, with nicer formatting than what this provides. 44 return false 45 } 46 47 // Check whether this is an interface with the same concrete types. 48 t := v.Type 49 vx, vy := v.ValueX, v.ValueY 50 if t.Kind() == reflect.Interface && !vx.IsNil() && !vy.IsNil() && vx.Elem().Type() == vy.Elem().Type() { 51 vx, vy = vx.Elem(), vy.Elem() 52 t = vx.Type() 53 } 54 55 // Check whether we provide specialized diffing for this type. 56 switch t.Kind() { 57 case reflect.String: 58 case reflect.Array, reflect.Slice: 59 // Only slices of primitive types have specialized handling. 60 switch t.Elem().Kind() { 61 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, 62 reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, 63 reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: 64 default: 65 return false 66 } 67 68 // Both slice values have to be non-empty. 69 if t.Kind() == reflect.Slice && (vx.Len() == 0 || vy.Len() == 0) { 70 return false 71 } 72 73 // If a sufficient number of elements already differ, 74 // use specialized formatting even if length requirement is not met. 75 if v.NumDiff > v.NumSame { 76 return true 77 } 78 default: 79 return false 80 } 81 82 // Use specialized string diffing for longer slices or strings. 83 const minLength = 32 84 return vx.Len() >= minLength && vy.Len() >= minLength 85} 86 87// FormatDiffSlice prints a diff for the slices (or strings) represented by v. 88// This provides custom-tailored logic to make printing of differences in 89// textual strings and slices of primitive kinds more readable. 90func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode { 91 assert(opts.DiffMode == diffUnknown) 92 t, vx, vy := v.Type, v.ValueX, v.ValueY 93 if t.Kind() == reflect.Interface { 94 vx, vy = vx.Elem(), vy.Elem() 95 t = vx.Type() 96 opts = opts.WithTypeMode(emitType) 97 } 98 99 // Auto-detect the type of the data. 100 var sx, sy string 101 var ssx, ssy []string 102 var isString, isMostlyText, isPureLinedText, isBinary bool 103 switch { 104 case t.Kind() == reflect.String: 105 sx, sy = vx.String(), vy.String() 106 isString = true 107 case t.Kind() == reflect.Slice && t.Elem() == reflect.TypeOf(byte(0)): 108 sx, sy = string(vx.Bytes()), string(vy.Bytes()) 109 isString = true 110 case t.Kind() == reflect.Array: 111 // Arrays need to be addressable for slice operations to work. 112 vx2, vy2 := reflect.New(t).Elem(), reflect.New(t).Elem() 113 vx2.Set(vx) 114 vy2.Set(vy) 115 vx, vy = vx2, vy2 116 } 117 if isString { 118 var numTotalRunes, numValidRunes, numLines, lastLineIdx, maxLineLen int 119 for i, r := range sx + sy { 120 numTotalRunes++ 121 if (unicode.IsPrint(r) || unicode.IsSpace(r)) && r != utf8.RuneError { 122 numValidRunes++ 123 } 124 if r == '\n' { 125 if maxLineLen < i-lastLineIdx { 126 maxLineLen = i - lastLineIdx 127 } 128 lastLineIdx = i + 1 129 numLines++ 130 } 131 } 132 isPureText := numValidRunes == numTotalRunes 133 isMostlyText = float64(numValidRunes) > math.Floor(0.90*float64(numTotalRunes)) 134 isPureLinedText = isPureText && numLines >= 4 && maxLineLen <= 1024 135 isBinary = !isMostlyText 136 137 // Avoid diffing by lines if it produces a significantly more complex 138 // edit script than diffing by bytes. 139 if isPureLinedText { 140 ssx = strings.Split(sx, "\n") 141 ssy = strings.Split(sy, "\n") 142 esLines := diff.Difference(len(ssx), len(ssy), func(ix, iy int) diff.Result { 143 return diff.BoolResult(ssx[ix] == ssy[iy]) 144 }) 145 esBytes := diff.Difference(len(sx), len(sy), func(ix, iy int) diff.Result { 146 return diff.BoolResult(sx[ix] == sy[iy]) 147 }) 148 efficiencyLines := float64(esLines.Dist()) / float64(len(esLines)) 149 efficiencyBytes := float64(esBytes.Dist()) / float64(len(esBytes)) 150 isPureLinedText = efficiencyLines < 4*efficiencyBytes 151 } 152 } 153 154 // Format the string into printable records. 155 var list textList 156 var delim string 157 switch { 158 // If the text appears to be multi-lined text, 159 // then perform differencing across individual lines. 160 case isPureLinedText: 161 list = opts.formatDiffSlice( 162 reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line", 163 func(v reflect.Value, d diffMode) textRecord { 164 s := formatString(v.Index(0).String()) 165 return textRecord{Diff: d, Value: textLine(s)} 166 }, 167 ) 168 delim = "\n" 169 170 // If possible, use a custom triple-quote (""") syntax for printing 171 // differences in a string literal. This format is more readable, 172 // but has edge-cases where differences are visually indistinguishable. 173 // This format is avoided under the following conditions: 174 // • A line starts with `"""` 175 // • A line starts with "..." 176 // • A line contains non-printable characters 177 // • Adjacent different lines differ only by whitespace 178 // 179 // For example: 180 // """ 181 // ... // 3 identical lines 182 // foo 183 // bar 184 // - baz 185 // + BAZ 186 // """ 187 isTripleQuoted := true 188 prevRemoveLines := map[string]bool{} 189 prevInsertLines := map[string]bool{} 190 var list2 textList 191 list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true}) 192 for _, r := range list { 193 if !r.Value.Equal(textEllipsis) { 194 line, _ := strconv.Unquote(string(r.Value.(textLine))) 195 line = strings.TrimPrefix(strings.TrimSuffix(line, "\r"), "\r") // trim leading/trailing carriage returns for legacy Windows endline support 196 normLine := strings.Map(func(r rune) rune { 197 if unicode.IsSpace(r) { 198 return -1 // drop whitespace to avoid visually indistinguishable output 199 } 200 return r 201 }, line) 202 isPrintable := func(r rune) bool { 203 return unicode.IsPrint(r) || r == '\t' // specially treat tab as printable 204 } 205 isTripleQuoted = !strings.HasPrefix(line, `"""`) && !strings.HasPrefix(line, "...") && strings.TrimFunc(line, isPrintable) == "" 206 switch r.Diff { 207 case diffRemoved: 208 isTripleQuoted = isTripleQuoted && !prevInsertLines[normLine] 209 prevRemoveLines[normLine] = true 210 case diffInserted: 211 isTripleQuoted = isTripleQuoted && !prevRemoveLines[normLine] 212 prevInsertLines[normLine] = true 213 } 214 if !isTripleQuoted { 215 break 216 } 217 r.Value = textLine(line) 218 r.ElideComma = true 219 } 220 if !(r.Diff == diffRemoved || r.Diff == diffInserted) { // start a new non-adjacent difference group 221 prevRemoveLines = map[string]bool{} 222 prevInsertLines = map[string]bool{} 223 } 224 list2 = append(list2, r) 225 } 226 if r := list2[len(list2)-1]; r.Diff == diffIdentical && len(r.Value.(textLine)) == 0 { 227 list2 = list2[:len(list2)-1] // elide single empty line at the end 228 } 229 list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true}) 230 if isTripleQuoted { 231 var out textNode = &textWrap{Prefix: "(", Value: list2, Suffix: ")"} 232 switch t.Kind() { 233 case reflect.String: 234 if t != reflect.TypeOf(string("")) { 235 out = opts.FormatType(t, out) 236 } 237 case reflect.Slice: 238 // Always emit type for slices since the triple-quote syntax 239 // looks like a string (not a slice). 240 opts = opts.WithTypeMode(emitType) 241 out = opts.FormatType(t, out) 242 } 243 return out 244 } 245 246 // If the text appears to be single-lined text, 247 // then perform differencing in approximately fixed-sized chunks. 248 // The output is printed as quoted strings. 249 case isMostlyText: 250 list = opts.formatDiffSlice( 251 reflect.ValueOf(sx), reflect.ValueOf(sy), 64, "byte", 252 func(v reflect.Value, d diffMode) textRecord { 253 s := formatString(v.String()) 254 return textRecord{Diff: d, Value: textLine(s)} 255 }, 256 ) 257 258 // If the text appears to be binary data, 259 // then perform differencing in approximately fixed-sized chunks. 260 // The output is inspired by hexdump. 261 case isBinary: 262 list = opts.formatDiffSlice( 263 reflect.ValueOf(sx), reflect.ValueOf(sy), 16, "byte", 264 func(v reflect.Value, d diffMode) textRecord { 265 var ss []string 266 for i := 0; i < v.Len(); i++ { 267 ss = append(ss, formatHex(v.Index(i).Uint())) 268 } 269 s := strings.Join(ss, ", ") 270 comment := commentString(fmt.Sprintf("%c|%v|", d, formatASCII(v.String()))) 271 return textRecord{Diff: d, Value: textLine(s), Comment: comment} 272 }, 273 ) 274 275 // For all other slices of primitive types, 276 // then perform differencing in approximately fixed-sized chunks. 277 // The size of each chunk depends on the width of the element kind. 278 default: 279 var chunkSize int 280 if t.Elem().Kind() == reflect.Bool { 281 chunkSize = 16 282 } else { 283 switch t.Elem().Bits() { 284 case 8: 285 chunkSize = 16 286 case 16: 287 chunkSize = 12 288 case 32: 289 chunkSize = 8 290 default: 291 chunkSize = 8 292 } 293 } 294 list = opts.formatDiffSlice( 295 vx, vy, chunkSize, t.Elem().Kind().String(), 296 func(v reflect.Value, d diffMode) textRecord { 297 var ss []string 298 for i := 0; i < v.Len(); i++ { 299 switch t.Elem().Kind() { 300 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: 301 ss = append(ss, fmt.Sprint(v.Index(i).Int())) 302 case reflect.Uint, reflect.Uint16, reflect.Uint32, reflect.Uint64: 303 ss = append(ss, fmt.Sprint(v.Index(i).Uint())) 304 case reflect.Uint8, reflect.Uintptr: 305 ss = append(ss, formatHex(v.Index(i).Uint())) 306 case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: 307 ss = append(ss, fmt.Sprint(v.Index(i).Interface())) 308 } 309 } 310 s := strings.Join(ss, ", ") 311 return textRecord{Diff: d, Value: textLine(s)} 312 }, 313 ) 314 } 315 316 // Wrap the output with appropriate type information. 317 var out textNode = &textWrap{Prefix: "{", Value: list, Suffix: "}"} 318 if !isMostlyText { 319 // The "{...}" byte-sequence literal is not valid Go syntax for strings. 320 // Emit the type for extra clarity (e.g. "string{...}"). 321 if t.Kind() == reflect.String { 322 opts = opts.WithTypeMode(emitType) 323 } 324 return opts.FormatType(t, out) 325 } 326 switch t.Kind() { 327 case reflect.String: 328 out = &textWrap{Prefix: "strings.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)} 329 if t != reflect.TypeOf(string("")) { 330 out = opts.FormatType(t, out) 331 } 332 case reflect.Slice: 333 out = &textWrap{Prefix: "bytes.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)} 334 if t != reflect.TypeOf([]byte(nil)) { 335 out = opts.FormatType(t, out) 336 } 337 } 338 return out 339} 340 341// formatASCII formats s as an ASCII string. 342// This is useful for printing binary strings in a semi-legible way. 343func formatASCII(s string) string { 344 b := bytes.Repeat([]byte{'.'}, len(s)) 345 for i := 0; i < len(s); i++ { 346 if ' ' <= s[i] && s[i] <= '~' { 347 b[i] = s[i] 348 } 349 } 350 return string(b) 351} 352 353func (opts formatOptions) formatDiffSlice( 354 vx, vy reflect.Value, chunkSize int, name string, 355 makeRec func(reflect.Value, diffMode) textRecord, 356) (list textList) { 357 eq := func(ix, iy int) bool { 358 return vx.Index(ix).Interface() == vy.Index(iy).Interface() 359 } 360 es := diff.Difference(vx.Len(), vy.Len(), func(ix, iy int) diff.Result { 361 return diff.BoolResult(eq(ix, iy)) 362 }) 363 364 appendChunks := func(v reflect.Value, d diffMode) int { 365 n0 := v.Len() 366 for v.Len() > 0 { 367 n := chunkSize 368 if n > v.Len() { 369 n = v.Len() 370 } 371 list = append(list, makeRec(v.Slice(0, n), d)) 372 v = v.Slice(n, v.Len()) 373 } 374 return n0 - v.Len() 375 } 376 377 var numDiffs int 378 maxLen := -1 379 if opts.LimitVerbosity { 380 maxLen = (1 << opts.verbosity()) << 2 // 4, 8, 16, 32, 64, etc... 381 opts.VerbosityLevel-- 382 } 383 384 groups := coalesceAdjacentEdits(name, es) 385 groups = coalesceInterveningIdentical(groups, chunkSize/4) 386 groups = cleanupSurroundingIdentical(groups, eq) 387 maxGroup := diffStats{Name: name} 388 for i, ds := range groups { 389 if maxLen >= 0 && numDiffs >= maxLen { 390 maxGroup = maxGroup.Append(ds) 391 continue 392 } 393 394 // Print equal. 395 if ds.NumDiff() == 0 { 396 // Compute the number of leading and trailing equal bytes to print. 397 var numLo, numHi int 398 numEqual := ds.NumIgnored + ds.NumIdentical 399 for numLo < chunkSize*numContextRecords && numLo+numHi < numEqual && i != 0 { 400 numLo++ 401 } 402 for numHi < chunkSize*numContextRecords && numLo+numHi < numEqual && i != len(groups)-1 { 403 numHi++ 404 } 405 if numEqual-(numLo+numHi) <= chunkSize && ds.NumIgnored == 0 { 406 numHi = numEqual - numLo // Avoid pointless coalescing of single equal row 407 } 408 409 // Print the equal bytes. 410 appendChunks(vx.Slice(0, numLo), diffIdentical) 411 if numEqual > numLo+numHi { 412 ds.NumIdentical -= numLo + numHi 413 list.AppendEllipsis(ds) 414 } 415 appendChunks(vx.Slice(numEqual-numHi, numEqual), diffIdentical) 416 vx = vx.Slice(numEqual, vx.Len()) 417 vy = vy.Slice(numEqual, vy.Len()) 418 continue 419 } 420 421 // Print unequal. 422 len0 := len(list) 423 nx := appendChunks(vx.Slice(0, ds.NumIdentical+ds.NumRemoved+ds.NumModified), diffRemoved) 424 vx = vx.Slice(nx, vx.Len()) 425 ny := appendChunks(vy.Slice(0, ds.NumIdentical+ds.NumInserted+ds.NumModified), diffInserted) 426 vy = vy.Slice(ny, vy.Len()) 427 numDiffs += len(list) - len0 428 } 429 if maxGroup.IsZero() { 430 assert(vx.Len() == 0 && vy.Len() == 0) 431 } else { 432 list.AppendEllipsis(maxGroup) 433 } 434 return list 435} 436 437// coalesceAdjacentEdits coalesces the list of edits into groups of adjacent 438// equal or unequal counts. 439// 440// Example: 441// 442// Input: "..XXY...Y" 443// Output: [ 444// {NumIdentical: 2}, 445// {NumRemoved: 2, NumInserted 1}, 446// {NumIdentical: 3}, 447// {NumInserted: 1}, 448// ] 449// 450func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) { 451 var prevMode byte 452 lastStats := func(mode byte) *diffStats { 453 if prevMode != mode { 454 groups = append(groups, diffStats{Name: name}) 455 prevMode = mode 456 } 457 return &groups[len(groups)-1] 458 } 459 for _, e := range es { 460 switch e { 461 case diff.Identity: 462 lastStats('=').NumIdentical++ 463 case diff.UniqueX: 464 lastStats('!').NumRemoved++ 465 case diff.UniqueY: 466 lastStats('!').NumInserted++ 467 case diff.Modified: 468 lastStats('!').NumModified++ 469 } 470 } 471 return groups 472} 473 474// coalesceInterveningIdentical coalesces sufficiently short (<= windowSize) 475// equal groups into adjacent unequal groups that currently result in a 476// dual inserted/removed printout. This acts as a high-pass filter to smooth 477// out high-frequency changes within the windowSize. 478// 479// Example: 480// 481// WindowSize: 16, 482// Input: [ 483// {NumIdentical: 61}, // group 0 484// {NumRemoved: 3, NumInserted: 1}, // group 1 485// {NumIdentical: 6}, // ├── coalesce 486// {NumInserted: 2}, // ├── coalesce 487// {NumIdentical: 1}, // ├── coalesce 488// {NumRemoved: 9}, // └── coalesce 489// {NumIdentical: 64}, // group 2 490// {NumRemoved: 3, NumInserted: 1}, // group 3 491// {NumIdentical: 6}, // ├── coalesce 492// {NumInserted: 2}, // ├── coalesce 493// {NumIdentical: 1}, // ├── coalesce 494// {NumRemoved: 7}, // ├── coalesce 495// {NumIdentical: 1}, // ├── coalesce 496// {NumRemoved: 2}, // └── coalesce 497// {NumIdentical: 63}, // group 4 498// ] 499// Output: [ 500// {NumIdentical: 61}, 501// {NumIdentical: 7, NumRemoved: 12, NumInserted: 3}, 502// {NumIdentical: 64}, 503// {NumIdentical: 8, NumRemoved: 12, NumInserted: 3}, 504// {NumIdentical: 63}, 505// ] 506// 507func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStats { 508 groups, groupsOrig := groups[:0], groups 509 for i, ds := range groupsOrig { 510 if len(groups) >= 2 && ds.NumDiff() > 0 { 511 prev := &groups[len(groups)-2] // Unequal group 512 curr := &groups[len(groups)-1] // Equal group 513 next := &groupsOrig[i] // Unequal group 514 hadX, hadY := prev.NumRemoved > 0, prev.NumInserted > 0 515 hasX, hasY := next.NumRemoved > 0, next.NumInserted > 0 516 if ((hadX || hasX) && (hadY || hasY)) && curr.NumIdentical <= windowSize { 517 *prev = prev.Append(*curr).Append(*next) 518 groups = groups[:len(groups)-1] // Truncate off equal group 519 continue 520 } 521 } 522 groups = append(groups, ds) 523 } 524 return groups 525} 526 527// cleanupSurroundingIdentical scans through all unequal groups, and 528// moves any leading sequence of equal elements to the preceding equal group and 529// moves and trailing sequence of equal elements to the succeeding equal group. 530// 531// This is necessary since coalesceInterveningIdentical may coalesce edit groups 532// together such that leading/trailing spans of equal elements becomes possible. 533// Note that this can occur even with an optimal diffing algorithm. 534// 535// Example: 536// 537// Input: [ 538// {NumIdentical: 61}, 539// {NumIdentical: 1 , NumRemoved: 11, NumInserted: 2}, // assume 3 leading identical elements 540// {NumIdentical: 67}, 541// {NumIdentical: 7, NumRemoved: 12, NumInserted: 3}, // assume 10 trailing identical elements 542// {NumIdentical: 54}, 543// ] 544// Output: [ 545// {NumIdentical: 64}, // incremented by 3 546// {NumRemoved: 9}, 547// {NumIdentical: 67}, 548// {NumRemoved: 9}, 549// {NumIdentical: 64}, // incremented by 10 550// ] 551// 552func cleanupSurroundingIdentical(groups []diffStats, eq func(i, j int) bool) []diffStats { 553 var ix, iy int // indexes into sequence x and y 554 for i, ds := range groups { 555 // Handle equal group. 556 if ds.NumDiff() == 0 { 557 ix += ds.NumIdentical 558 iy += ds.NumIdentical 559 continue 560 } 561 562 // Handle unequal group. 563 nx := ds.NumIdentical + ds.NumRemoved + ds.NumModified 564 ny := ds.NumIdentical + ds.NumInserted + ds.NumModified 565 var numLeadingIdentical, numTrailingIdentical int 566 for j := 0; j < nx && j < ny && eq(ix+j, iy+j); j++ { 567 numLeadingIdentical++ 568 } 569 for j := 0; j < nx && j < ny && eq(ix+nx-1-j, iy+ny-1-j); j++ { 570 numTrailingIdentical++ 571 } 572 if numIdentical := numLeadingIdentical + numTrailingIdentical; numIdentical > 0 { 573 if numLeadingIdentical > 0 { 574 // Remove leading identical span from this group and 575 // insert it into the preceding group. 576 if i-1 >= 0 { 577 groups[i-1].NumIdentical += numLeadingIdentical 578 } else { 579 // No preceding group exists, so prepend a new group, 580 // but do so after we finish iterating over all groups. 581 defer func() { 582 groups = append([]diffStats{{Name: groups[0].Name, NumIdentical: numLeadingIdentical}}, groups...) 583 }() 584 } 585 // Increment indexes since the preceding group would have handled this. 586 ix += numLeadingIdentical 587 iy += numLeadingIdentical 588 } 589 if numTrailingIdentical > 0 { 590 // Remove trailing identical span from this group and 591 // insert it into the succeeding group. 592 if i+1 < len(groups) { 593 groups[i+1].NumIdentical += numTrailingIdentical 594 } else { 595 // No succeeding group exists, so append a new group, 596 // but do so after we finish iterating over all groups. 597 defer func() { 598 groups = append(groups, diffStats{Name: groups[len(groups)-1].Name, NumIdentical: numTrailingIdentical}) 599 }() 600 } 601 // Do not increment indexes since the succeeding group will handle this. 602 } 603 604 // Update this group since some identical elements were removed. 605 nx -= numIdentical 606 ny -= numIdentical 607 groups[i] = diffStats{Name: ds.Name, NumRemoved: nx, NumInserted: ny} 608 } 609 ix += nx 610 iy += ny 611 } 612 return groups 613} 614