sourcegraph · vslashg · Sep 20, 2024
diff --git a/diff/reverse.go b/diff/reverse.go
@@ -0,0 +1,151 @@
+package diff
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"regexp"
+)
+
+// ReverseFileDiff takes a diff.FileDiff, and returns the reverse operation.
+// This is a FileDiff that undoes the edit of the original.
+func ReverseFileDiff(fd *FileDiff) (*FileDiff, error) {
+	reverse := FileDiff{
+		OrigName: fd.NewName,
+		OrigTime: fd.NewTime,
+		NewName:  fd.OrigName,
+		NewTime:  fd.OrigTime,
+		Extended: fd.Extended,
+	}
+	for _, hunk := range fd.Hunks {
+		invHunk, err := reverseHunk(hunk)
+		if err != nil {
+			return nil, err
+		}
+		reverse.Hunks = append(reverse.Hunks, invHunk)
+	}
+	return &reverse, nil
+}
+
+// ReverseMultiFileDiff reverses a series of FileDiffs.
+func ReverseMultiFileDiff(fds []*FileDiff) ([]*FileDiff, error) {
+	var reverse []*FileDiff
+	for _, fd := range fds {
+		r, err := ReverseFileDiff(fd)
+		if err != nil {
+			return nil, err
+		}
+		reverse = append(reverse, r)
+	}
+	return reverse, nil
+}
+
+// A subhunk represents a portion of a Hunk.Body, split into three sections.
+// It consists of zero or more context lines, followed by zero or more orig
+// lines and then zero or more new lines.
+//
+// Each line is stored WITHOUT its starting character, but with the newlines
+// included.  The final entry in a section may be missing a trailing newline.
+//
+// A missing newline in orig is represented in a Hunk by OrigNoNewlineAt,
+// but is represented here as a missing newline.
+type subhunk struct {
+	context [][]byte
+	orig    [][]byte
+	new     [][]byte
+}
+
+// reverseHunk converts a Hunk into its reverse operation.
+func reverseHunk(forward *Hunk) (*Hunk, error) {
+	reverse := Hunk{
+		OrigStartLine:   forward.NewStartLine,
+		OrigLines:       forward.NewLines,
+		OrigNoNewlineAt: 0, // we may change this below
+		NewStartLine:    forward.OrigStartLine,
+		NewLines:        forward.OrigLines,
+		Section:         forward.Section,
+		StartPosition:   forward.StartPosition,
+	}
+	subs, err := toSubhunks(forward)
+	if err != nil {
+		return nil, err
+	}
+	for _, sub := range subs {
+		invSub := subhunk{
+			context: sub.context,
+			orig:    sub.new,
+			new:     sub.orig,
+		}
+		for _, line := range invSub.context {
+			reverse.Body = append(reverse.Body, ' ')
+			reverse.Body = append(reverse.Body, line...)
+		}
+		for _, line := range invSub.orig {
+			reverse.Body = append(reverse.Body, '-')
+			reverse.Body = append(reverse.Body, line...)
+		}
+		if len(invSub.orig) > 0 && reverse.Body[len(reverse.Body)-1] != '\n' {
+			// There was a missing newline in `orig`, which we encode in a
+			// hunk with an offset.
+			reverse.Body = append(reverse.Body, '\n')
+			reverse.OrigNoNewlineAt = int32(len(reverse.Body))
+		}
+		for _, line := range invSub.new {
+			reverse.Body = append(reverse.Body, '+')
+			reverse.Body = append(reverse.Body, line...)
+		}
+	}
+	return &reverse, nil
+}
+
+var subhunkLineRe = regexp.MustCompile(`^.[^\n]*(\n|$)`)
+
+func extractLinesStartingWith(from *[]byte, startingWith byte) [][]byte {
+	var lines [][]byte
+	for len(*from) > 0 && (*from)[0] == startingWith {
+		line := subhunkLineRe.Find(*from)
+		lines = append(lines, line[1:])
+		*from = (*from)[len(line):]
+	}
+	return lines
+}
+
+// Extracts the subhunks from a diff.Hunk.
+//
+// This groups a Hunk's buffer into one or more subhunks, matching the conditions
+// of `subhunk` above.  This function groups, strips prefix characters, and strips
+// a newline for `OrigNoNewlineAt` if necessary.
+func toSubhunks(hunk *Hunk) ([]subhunk, error) {
+	var body []byte = hunk.Body
+	var subhunks []subhunk
+	if len(body) == 0 {
+		return nil, nil
+	}
+	for len(body) > 0 {
+		sh := subhunk{
+			context: extractLinesStartingWith(&body, ' '),
+			orig:    extractLinesStartingWith(&body, '-'),
+			new:     extractLinesStartingWith(&body, '+'),
+		}
+		if len(sh.context) == 0 && len(sh.orig) == 0 && len(sh.new) == 0 {
+			// The first line didn't start with any expected prefix.
+			return nil, fmt.Errorf("unexpected character %q at start of line", body[0])
+		}
+		subhunks = append(subhunks, sh)
+	}
+	if hunk.OrigNoNewlineAt > 0 {
+		// The Hunk represents a missing newline at the end of an "orig" line with a
+		// OrigNoNewlineAt index.  We represent it here as an actual missing newline.
+		var lastSubhunk *subhunk = &subhunks[len(subhunks)-1]
+		s := len(lastSubhunk.orig)
+		if s == 0 {
+			return nil, errors.New("inconsistent OrigNoNewlineAt in input")
+		}
+		var cut bool
+		lastSubhunk.orig[s-1], cut = bytes.CutSuffix(lastSubhunk.orig[s-1], []byte("\n"))
+		if !cut {
+			return nil, errors.New("missing newline in input")
+		}
+	}
+	return subhunks, nil
+}
diff --git a/diff/reverse_test.go b/diff/reverse_test.go
@@ -0,0 +1,146 @@
+package diff
+
+import (
+	"bytes"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+)
+
+func TestReverseHunks(t *testing.T) {
+	tests := []struct {
+		inputFile string
+		wantFile  string
+	}{
+		{
+			inputFile: "sample_hunks.diff",
+			wantFile:  "sample_hunks.reversed",
+		},
+		{
+			inputFile: "no_newline_new.diff",
+			wantFile:  "no_newline_new.reversed",
+		},
+		{
+			inputFile: "no_newline_orig.diff",
+			wantFile:  "no_newline_orig.reversed",
+		},
+		{
+			inputFile: "no_newline_both.diff",
+			wantFile:  "no_newline_both.reversed",
+		},
+	}
+	for _, test := range tests {
+		inputData, err := os.ReadFile(filepath.Join("testdata", test.inputFile))
+		if err != nil {
+			t.Fatal(err)
+		}
+		wantData, err := os.ReadFile(filepath.Join("testdata", test.wantFile))
+		if err != nil {
+			t.Fatal(err)
+		}
+		input, err := ParseHunks(inputData)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		var reversed []*Hunk
+		for _, in := range input {
+			out, err := reverseHunk(in)
+			if err != nil {
+				// This should only fail if the Hunk data structure is inconsistent
+				t.Errorf("%s: Unexpected reverseHunk() error: %s", test.inputFile, err)
+			}
+			reversed = append(reversed, out)
+		}
+		gotData, err := PrintHunks(reversed)
+		if err != nil {
+			t.Errorf("%s: PrintHunks of reversed data: %s", test.inputFile, err)
+		}
+		if !bytes.Equal(wantData, gotData) {
+			t.Errorf("%s: Reversed hunk does not match expected.\nWant vs got:\n%s",
+				test.inputFile, cmp.Diff(wantData, gotData))
+		}
+	}
+}
+
+func TestReverseFileDiff(t *testing.T) {
+	tests := []struct {
+		inputFile string
+		wantFile  string
+	}{
+		{
+			inputFile: "sample_file.diff",
+			wantFile:  "sample_file.reversed",
+		},
+	}
+	for _, test := range tests {
+		inputData, err := os.ReadFile(filepath.Join("testdata", test.inputFile))
+		if err != nil {
+			t.Fatal(err)
+		}
+		wantData, err := os.ReadFile(filepath.Join("testdata", test.wantFile))
+		if err != nil {
+			t.Fatal(err)
+		}
+		input, err := ParseFileDiff(inputData)
+		if err != nil {
+			t.Fatal(err)
+		}
+		reversed, err := ReverseFileDiff(input)
+		if err != nil {
+			t.Errorf("%s: ReverseFileDiff: %s", test.inputFile, err)
+		}
+		gotData, err := PrintFileDiff(reversed)
+		if err != nil {
+			t.Errorf("%s: PrintFileDiff of reversed data: %s", test.inputFile, err)
+		}
+		if !bytes.Equal(wantData, gotData) {
+			t.Errorf("%s: Reversed diff does not match expected.\nWant vs got:\n%s",
+				test.inputFile, cmp.Diff(wantData, gotData))
+		}
+	}
+}
+
+func TestReverseMultiFileDiff(t *testing.T) {
+	tests := []struct {
+		inputFile string
+		wantFile  string
+	}{
+		{
+			inputFile: "sample_file.diff",
+			wantFile:  "sample_file.reversed",
+		},
+		{
+			inputFile: "sample_multi_file.diff",
+			wantFile:  "sample_multi_file.reversed",
+		},
+	}
+	for _, test := range tests {
+		inputData, err := os.ReadFile(filepath.Join("testdata", test.inputFile))
+		if err != nil {
+			t.Fatal(err)
+		}
+		wantData, err := os.ReadFile(filepath.Join("testdata", test.wantFile))
+		if err != nil {
+			t.Fatal(err)
+		}
+		input, err := ParseMultiFileDiff(inputData)
+		if err != nil {
+			t.Fatal(err)
+		}
+		reversed, err := ReverseMultiFileDiff(input)
+		if err != nil {
+			t.Errorf("%s: ReverseMultiFileDiff: %s", test.inputFile, err)
+		}
+		gotData, err := PrintMultiFileDiff(reversed)
+		if err != nil {
+			t.Errorf("%s: PrintMultiFileDiff of reversed data: %s", test.inputFile, err)
+		}
+		if !bytes.Equal(wantData, gotData) {
+			t.Errorf("%s: Reversed diff does not match expected.\nWant vs got:\n%s",
+				test.inputFile, cmp.Diff(wantData, gotData))
+		}
+	}
+}
diff --git a/diff/testdata/no_newline_both.reversed b/diff/testdata/no_newline_both.reversed
@@ -0,0 +1,5 @@
+@@ -1,1 +1,1 @@
+-b
+\ No newline at end of file
++a
+\ No newline at end of file
diff --git a/diff/testdata/no_newline_new.reversed b/diff/testdata/no_newline_new.reversed
@@ -0,0 +1,6 @@
+@@ -1,2 +1,3 @@
+ a
+-a
+\ No newline at end of file
++a
++a
diff --git a/diff/testdata/no_newline_orig.reversed b/diff/testdata/no_newline_orig.reversed
@@ -0,0 +1,4 @@
+@@ -1,1 +1,1 @@
+-b
++a
+\ No newline at end of file
diff --git a/diff/testdata/sample_file.reversed b/diff/testdata/sample_file.reversed
@@ -0,0 +1,31 @@
+--- newname	2009-10-11 15:12:30.000000000 +0000
++++ oldname	2009-10-11 15:12:20.000000000 +0000
+@@ -1,9 +1,3 @@
+-This is an important
+-notice! It should
+-therefore be located at
+-the beginning of this
+-document!
+-
+ This part of the
+ document has stayed the
+ same from version to
+@@ -11,10 +5,16 @@
+ be shown if it doesn't
+ change.  Otherwise, that
+ would not be helping to
+-compress anything.
++compress the size of the
++changes.
++
++This paragraph contains
++text that is outdated.
++It will be deleted in the
++near future.
+
+ It is important to spell
+-check this document. On
++check this dokument. On
+ the other hand, a
+ misspelled word isn't
+ the end of the world.
diff --git a/diff/testdata/sample_hunks.reversed b/diff/testdata/sample_hunks.reversed
@@ -0,0 +1,37 @@
+@@ -1,9 +1,3 @@ Section Header
+-This is an important
+-notice! It should
+-therefore be located at
+-the beginning of this
+-document!
+-
+ This part of the
+ document has stayed the
+ same from version to
+@@ -11,10 +5,16 @@
+ be shown if it doesn't
+ change.  Otherwise, that
+ would not be helping to
+-compress anything.
++compress the size of the
++changes.
++
++This paragraph contains
++text that is outdated.
++It will be deleted in the
++near future.
+
+ It is important to spell
+-check this document. On
++check this dokument. On
+ the other hand, a
+ misspelled word isn't
+ the end of the world.
+@@ -22,7 +22,3 @@
+ this paragraph needs to
+ be changed. Things can
+ be added after it.
+-
+-This paragraph contains
+-important new additions
+-to this document.