Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cmp/compare_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1320,6 +1320,11 @@ using the AllowUnexported option.`, "\n"),
x: "d5c14bdf6bac81c27afc5429500ed750\n25483503b557c606dad4f144d27ae10b\n90bdbcdbb6ea7156068e3dcfb7459244\n978f480a6e3cced51e297fbff9a506b7\n",
y: "Xd5c14bdf6bac81c27afc5429500ed750\nX25483503b557c606dad4f144d27ae10b\nX90bdbcdbb6ea7156068e3dcfb7459244\nX978f480a6e3cced51e297fbff9a506b7\n",
reason: "all lines are different, so diffing based on lines is pointless",
}, {
label: label + "/JapaneseUTF8",
x: "プライベート ブランド ジャケット",
y: "プライベート ブランド シャツ",
reason: "multi-byte UTF-8 characters should be readable in diff output (issue #314)",
}, {
label: label + "/StringifiedBytes",
x: struct{ X []byte }{[]byte("hello, world!")},
Expand Down
36 changes: 28 additions & 8 deletions cmp/report_slices.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
// Auto-detect the type of the data.
var sx, sy string
var ssx, ssy []string
var isString, isMostlyText, isPureLinedText, isBinary bool
var isString, isMostlyText, isPureLinedText, isBinary, isValidUTF8 bool
switch {
case t.Kind() == reflect.String:
sx, sy = vx.String(), vy.String()
Expand Down Expand Up @@ -133,6 +133,7 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
isMostlyText = float64(numValidRunes) > math.Floor(0.90*float64(numTotalRunes))
isPureLinedText = isPureText && numLines >= 4 && maxLineLen <= 1024
isBinary = !isMostlyText
isValidUTF8 = utf8.ValidString(sx) && utf8.ValidString(sy)

// Avoid diffing by lines if it produces a significantly more complex
// edit script than diffing by bytes.
Expand Down Expand Up @@ -251,13 +252,32 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
// then perform differencing in approximately fixed-sized chunks.
// The output is printed as quoted strings.
case isMostlyText:
list = opts.formatDiffSlice(
reflect.ValueOf(sx), reflect.ValueOf(sy), 64, "byte",
func(v reflect.Value, d diffMode) textRecord {
s := formatString(v.String())
return textRecord{Diff: d, Value: textLine(s)}
},
)
if isValidUTF8 {
// Rune-based diffing for valid UTF-8 strings
rx := []rune(sx)
ry := []rune(sy)
list = opts.formatDiffSlice(
reflect.ValueOf(rx), reflect.ValueOf(ry), 64, "rune",
func(v reflect.Value, d diffMode) textRecord {
// Convert []rune chunk back to string for display
runes := make([]rune, v.Len())
for i := 0; i < v.Len(); i++ {
runes[i] = rune(v.Index(i).Int())
}
s := formatString(string(runes))
return textRecord{Diff: d, Value: textLine(s)}
},
)
} else {
// Byte-based diffing for invalid UTF-8 (original behavior)
list = opts.formatDiffSlice(
reflect.ValueOf(sx), reflect.ValueOf(sy), 64, "byte",
func(v reflect.Value, d diffMode) textRecord {
s := formatString(v.String())
return textRecord{Diff: d, Value: textLine(s)}
},
)
}

// If the text appears to be binary data,
// then perform differencing in approximately fixed-sized chunks.
Expand Down
13 changes: 10 additions & 3 deletions cmp/testdata/diffs
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@
+ "21 2nd Street",
`","city":"New York","state":"NY","postalCode":"10021-3100"},"pho`,
`neNumbers":[{"type":"home","number":"212 555-1234"},{"type":"off`,
... // 101 identical bytes
... // 101 identical runes
}, ""),
BytesB: nil,
BytesC: nil,
Expand Down Expand Up @@ -1019,7 +1019,7 @@
<<< TestDiff/Reporter/LargeStringInInterface
struct{ X any }{
X: strings.Join({
... // 485 identical bytes
... // 485 identical runes
"s mus. Pellentesque mi lorem, consectetur id porttitor id, solli",
"citudin sit amet enim. Duis eu dolor magna. Nunc ut augue turpis",
- ".",
Expand All @@ -1030,7 +1030,7 @@
<<< TestDiff/Reporter/LargeBytesInInterface
struct{ X any }{
X: bytes.Join({
... // 485 identical bytes
... // 485 identical runes
"s mus. Pellentesque mi lorem, consectetur id porttitor id, solli",
"citudin sit amet enim. Duis eu dolor magna. Nunc ut augue turpis",
- ".",
Expand Down Expand Up @@ -1098,6 +1098,13 @@
"978f480a6e3cced51e297fbff9a506b7\n",
}, "")
>>> TestDiff/Reporter/AllLinesDiffer
<<< TestDiff/Reporter/JapaneseUTF8
strings.Join({
"プライベート ブランド ",
- "ジャケット",
+ "シャツ",
}, "")
>>> TestDiff/Reporter/JapaneseUTF8
<<< TestDiff/Reporter/StringifiedBytes
struct{ X []uint8 }{
- X: []uint8("hello, world!"),
Expand Down