Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new functionaltiy for reversing diffs. #72

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 151 additions & 0 deletions diff/reverse.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
package diff

import (
"bytes"
"errors"
"fmt"
"regexp"
)

// ReverseFileDiff takes a diff.FileDiff, and returns the reverse operation.
// This is a FileDiff that undoes the edit of the original.
func ReverseFileDiff(fd *FileDiff) (*FileDiff, error) {
reverse := FileDiff{
OrigName: fd.NewName,
OrigTime: fd.NewTime,
NewName: fd.OrigName,
NewTime: fd.OrigTime,
Extended: fd.Extended,
}
for _, hunk := range fd.Hunks {
invHunk, err := reverseHunk(hunk)
if err != nil {
return nil, err
}
reverse.Hunks = append(reverse.Hunks, invHunk)
}
return &reverse, nil
}

// ReverseMultiFileDiff reverses a series of FileDiffs.
func ReverseMultiFileDiff(fds []*FileDiff) ([]*FileDiff, error) {
var reverse []*FileDiff
for _, fd := range fds {
r, err := ReverseFileDiff(fd)
if err != nil {
return nil, err
}
reverse = append(reverse, r)
}
return reverse, nil
}

// A subhunk represents a portion of a Hunk.Body, split into three sections.
// It consists of zero or more context lines, followed by zero or more orig
// lines and then zero or more new lines.
//
// Each line is stored WITHOUT its starting character, but with the newlines
// included. The final entry in a section may be missing a trailing newline.
//
// A missing newline in orig is represented in a Hunk by OrigNoNewlineAt,
// but is represented here as a missing newline.
type subhunk struct {
context [][]byte
orig [][]byte
new [][]byte
}

// reverseHunk converts a Hunk into its reverse operation.
func reverseHunk(forward *Hunk) (*Hunk, error) {
reverse := Hunk{
OrigStartLine: forward.NewStartLine,
OrigLines: forward.NewLines,
OrigNoNewlineAt: 0, // we may change this below
NewStartLine: forward.OrigStartLine,
NewLines: forward.OrigLines,
Section: forward.Section,
StartPosition: forward.StartPosition,
}
subs, err := toSubhunks(forward)
if err != nil {
return nil, err
}
for _, sub := range subs {
invSub := subhunk{
context: sub.context,
orig: sub.new,
new: sub.orig,
}
for _, line := range invSub.context {
reverse.Body = append(reverse.Body, ' ')
reverse.Body = append(reverse.Body, line...)
}
for _, line := range invSub.orig {
reverse.Body = append(reverse.Body, '-')
reverse.Body = append(reverse.Body, line...)
}
if len(invSub.orig) > 0 && reverse.Body[len(reverse.Body)-1] != '\n' {
// There was a missing newline in `orig`, which we encode in a
// hunk with an offset.
reverse.Body = append(reverse.Body, '\n')
reverse.OrigNoNewlineAt = int32(len(reverse.Body))
}
for _, line := range invSub.new {
reverse.Body = append(reverse.Body, '+')
reverse.Body = append(reverse.Body, line...)
}
}
return &reverse, nil
}

var subhunkLineRe = regexp.MustCompile(`^.[^\n]*(\n|$)`)

func extractLinesStartingWith(from *[]byte, startingWith byte) [][]byte {
var lines [][]byte
for len(*from) > 0 && (*from)[0] == startingWith {
line := subhunkLineRe.Find(*from)
lines = append(lines, line[1:])
*from = (*from)[len(line):]
}
return lines
}

// Extracts the subhunks from a diff.Hunk.
//
// This groups a Hunk's buffer into one or more subhunks, matching the conditions
// of `subhunk` above. This function groups, strips prefix characters, and strips
// a newline for `OrigNoNewlineAt` if necessary.
func toSubhunks(hunk *Hunk) ([]subhunk, error) {
var body []byte = hunk.Body
var subhunks []subhunk
if len(body) == 0 {
return nil, nil
}
for len(body) > 0 {
sh := subhunk{
context: extractLinesStartingWith(&body, ' '),
orig: extractLinesStartingWith(&body, '-'),
new: extractLinesStartingWith(&body, '+'),
}
if len(sh.context) == 0 && len(sh.orig) == 0 && len(sh.new) == 0 {
// The first line didn't start with any expected prefix.
return nil, fmt.Errorf("unexpected character %q at start of line", body[0])
}
subhunks = append(subhunks, sh)
}
if hunk.OrigNoNewlineAt > 0 {
// The Hunk represents a missing newline at the end of an "orig" line with a
// OrigNoNewlineAt index. We represent it here as an actual missing newline.
var lastSubhunk *subhunk = &subhunks[len(subhunks)-1]
s := len(lastSubhunk.orig)
if s == 0 {
return nil, errors.New("inconsistent OrigNoNewlineAt in input")
}
var cut bool
lastSubhunk.orig[s-1], cut = bytes.CutSuffix(lastSubhunk.orig[s-1], []byte("\n"))
if !cut {
return nil, errors.New("missing newline in input")
}
}
return subhunks, nil
}
146 changes: 146 additions & 0 deletions diff/reverse_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
package diff

import (
"bytes"
"os"
"path/filepath"
"testing"

"github.com/google/go-cmp/cmp"
)

func TestReverseHunks(t *testing.T) {
tests := []struct {
inputFile string
wantFile string
}{
{
inputFile: "sample_hunks.diff",
wantFile: "sample_hunks.reversed",
},
{
inputFile: "no_newline_new.diff",
wantFile: "no_newline_new.reversed",
},
{
inputFile: "no_newline_orig.diff",
wantFile: "no_newline_orig.reversed",
},
{
inputFile: "no_newline_both.diff",
wantFile: "no_newline_both.reversed",
},
}
for _, test := range tests {
inputData, err := os.ReadFile(filepath.Join("testdata", test.inputFile))
if err != nil {
t.Fatal(err)
}
wantData, err := os.ReadFile(filepath.Join("testdata", test.wantFile))
if err != nil {
t.Fatal(err)
}
input, err := ParseHunks(inputData)
if err != nil {
t.Fatal(err)
}

var reversed []*Hunk
for _, in := range input {
out, err := reverseHunk(in)
if err != nil {
// This should only fail if the Hunk data structure is inconsistent
t.Errorf("%s: Unexpected reverseHunk() error: %s", test.inputFile, err)
}
reversed = append(reversed, out)
}
gotData, err := PrintHunks(reversed)
if err != nil {
t.Errorf("%s: PrintHunks of reversed data: %s", test.inputFile, err)
}
if !bytes.Equal(wantData, gotData) {
t.Errorf("%s: Reversed hunk does not match expected.\nWant vs got:\n%s",
test.inputFile, cmp.Diff(wantData, gotData))
}
}
}

func TestReverseFileDiff(t *testing.T) {
tests := []struct {
inputFile string
wantFile string
}{
{
inputFile: "sample_file.diff",
wantFile: "sample_file.reversed",
},
}
for _, test := range tests {
inputData, err := os.ReadFile(filepath.Join("testdata", test.inputFile))
if err != nil {
t.Fatal(err)
}
wantData, err := os.ReadFile(filepath.Join("testdata", test.wantFile))
if err != nil {
t.Fatal(err)
}
input, err := ParseFileDiff(inputData)
if err != nil {
t.Fatal(err)
}
reversed, err := ReverseFileDiff(input)
if err != nil {
t.Errorf("%s: ReverseFileDiff: %s", test.inputFile, err)
}
gotData, err := PrintFileDiff(reversed)
if err != nil {
t.Errorf("%s: PrintFileDiff of reversed data: %s", test.inputFile, err)
}
if !bytes.Equal(wantData, gotData) {
t.Errorf("%s: Reversed diff does not match expected.\nWant vs got:\n%s",
test.inputFile, cmp.Diff(wantData, gotData))
}
}
}

func TestReverseMultiFileDiff(t *testing.T) {
tests := []struct {
inputFile string
wantFile string
}{
{
inputFile: "sample_file.diff",
wantFile: "sample_file.reversed",
},
{
inputFile: "sample_multi_file.diff",
wantFile: "sample_multi_file.reversed",
},
}
for _, test := range tests {
inputData, err := os.ReadFile(filepath.Join("testdata", test.inputFile))
if err != nil {
t.Fatal(err)
}
wantData, err := os.ReadFile(filepath.Join("testdata", test.wantFile))
if err != nil {
t.Fatal(err)
}
input, err := ParseMultiFileDiff(inputData)
if err != nil {
t.Fatal(err)
}
reversed, err := ReverseMultiFileDiff(input)
if err != nil {
t.Errorf("%s: ReverseMultiFileDiff: %s", test.inputFile, err)
}
gotData, err := PrintMultiFileDiff(reversed)
if err != nil {
t.Errorf("%s: PrintMultiFileDiff of reversed data: %s", test.inputFile, err)
}
if !bytes.Equal(wantData, gotData) {
t.Errorf("%s: Reversed diff does not match expected.\nWant vs got:\n%s",
test.inputFile, cmp.Diff(wantData, gotData))
}
}
}
5 changes: 5 additions & 0 deletions diff/testdata/no_newline_both.reversed
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
@@ -1,1 +1,1 @@
-b
\ No newline at end of file
+a
\ No newline at end of file
6 changes: 6 additions & 0 deletions diff/testdata/no_newline_new.reversed
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
@@ -1,2 +1,3 @@
a
-a
\ No newline at end of file
+a
+a
4 changes: 4 additions & 0 deletions diff/testdata/no_newline_orig.reversed
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
@@ -1,1 +1,1 @@
-b
+a
\ No newline at end of file
31 changes: 31 additions & 0 deletions diff/testdata/sample_file.reversed
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
--- newname 2009-10-11 15:12:30.000000000 +0000
+++ oldname 2009-10-11 15:12:20.000000000 +0000
@@ -1,9 +1,3 @@
-This is an important
-notice! It should
-therefore be located at
-the beginning of this
-document!
-
This part of the
document has stayed the
same from version to
@@ -11,10 +5,16 @@
be shown if it doesn't
change. Otherwise, that
would not be helping to
-compress anything.
+compress the size of the
+changes.
+
+This paragraph contains
+text that is outdated.
+It will be deleted in the
+near future.

It is important to spell
-check this document. On
+check this dokument. On
the other hand, a
misspelled word isn't
the end of the world.
37 changes: 37 additions & 0 deletions diff/testdata/sample_hunks.reversed
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
@@ -1,9 +1,3 @@ Section Header
-This is an important
-notice! It should
-therefore be located at
-the beginning of this
-document!
-
This part of the
document has stayed the
same from version to
@@ -11,10 +5,16 @@
be shown if it doesn't
change. Otherwise, that
would not be helping to
-compress anything.
+compress the size of the
+changes.
+
+This paragraph contains
+text that is outdated.
+It will be deleted in the
+near future.

It is important to spell
-check this document. On
+check this dokument. On
the other hand, a
misspelled word isn't
the end of the world.
@@ -22,7 +22,3 @@
this paragraph needs to
be changed. Things can
be added after it.
-
-This paragraph contains
-important new additions
-to this document.
Loading