gitroast/models/git_diff.go

535 lines
13 KiB
Go
Raw Normal View History

2014-04-13 01:35:36 +00:00
// Copyright 2014 The Gogs Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package models
import (
"bufio"
2014-12-09 07:18:25 +00:00
"bytes"
2014-06-19 05:08:03 +00:00
"fmt"
2016-01-09 06:51:17 +00:00
"html"
"html/template"
2014-04-13 01:35:36 +00:00
"io"
2015-12-14 14:38:21 +00:00
"io/ioutil"
2014-04-13 01:35:36 +00:00
"os"
"os/exec"
"strings"
2016-11-11 12:11:45 +00:00
"code.gitea.io/git"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/process"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/template/highlight"
2016-11-11 12:11:45 +00:00
"github.com/Unknwon/com"
2016-11-05 16:56:35 +00:00
"github.com/sergi/go-diff/diffmatchpatch"
"golang.org/x/net/html/charset"
"golang.org/x/text/transform"
2014-04-13 01:35:36 +00:00
)
2016-11-24 08:30:08 +00:00
// DiffLineType represents the type of a DiffLine.
type DiffLineType uint8
// DiffLineType possible values.
2014-04-13 01:35:36 +00:00
const (
2016-11-07 16:24:59 +00:00
DiffLinePlain DiffLineType = iota + 1
DiffLineAdd
DiffLineDel
2016-11-07 16:33:03 +00:00
DiffLineSection
2014-04-13 01:35:36 +00:00
)
2016-11-24 08:30:08 +00:00
// DiffFileType represents the type of a DiffFile.
type DiffFileType uint8
// DiffFileType possible values.
2014-04-13 01:35:36 +00:00
const (
2016-11-07 16:24:59 +00:00
DiffFileAdd DiffFileType = iota + 1
DiffFileChange
DiffFileDel
2016-11-07 16:33:03 +00:00
DiffFileRename
2014-04-13 01:35:36 +00:00
)
2016-11-24 08:30:08 +00:00
// DiffLine represents a line difference in a DiffSection.
2014-04-13 01:35:36 +00:00
type DiffLine struct {
LeftIdx int
RightIdx int
Type DiffLineType
Content string
2014-04-13 01:35:36 +00:00
}
2016-11-24 08:30:08 +00:00
// GetType returns the type of a DiffLine.
func (d *DiffLine) GetType() int {
return int(d.Type)
2014-04-13 01:35:36 +00:00
}
2016-11-24 08:30:08 +00:00
// DiffSection represents a section of a DiffFile.
2014-04-13 01:35:36 +00:00
type DiffSection struct {
Name string
Lines []*DiffLine
}
2016-01-09 06:51:17 +00:00
var (
addedCodePrefix = []byte("<span class=\"added-code\">")
removedCodePrefix = []byte("<span class=\"removed-code\">")
codeTagSuffix = []byte("</span>")
)
func diffToHTML(diffs []diffmatchpatch.Diff, lineType DiffLineType) template.HTML {
buf := bytes.NewBuffer(nil)
// Reproduce signs which are cutted for inline diff before.
switch lineType {
2016-11-07 16:24:59 +00:00
case DiffLineAdd:
buf.WriteByte('+')
2016-11-07 16:24:59 +00:00
case DiffLineDel:
buf.WriteByte('-')
}
2016-01-09 06:51:17 +00:00
for i := range diffs {
switch {
2016-11-07 16:24:59 +00:00
case diffs[i].Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd:
2016-01-09 06:51:17 +00:00
buf.Write(addedCodePrefix)
buf.WriteString(html.EscapeString(diffs[i].Text))
buf.Write(codeTagSuffix)
2016-11-07 16:24:59 +00:00
case diffs[i].Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel:
2016-01-09 06:51:17 +00:00
buf.Write(removedCodePrefix)
buf.WriteString(html.EscapeString(diffs[i].Text))
buf.Write(codeTagSuffix)
case diffs[i].Type == diffmatchpatch.DiffEqual:
2016-01-09 06:51:17 +00:00
buf.WriteString(html.EscapeString(diffs[i].Text))
}
}
2016-01-09 06:51:17 +00:00
return template.HTML(buf.Bytes())
}
// GetLine gets a specific line by type (add or del) and file line number
func (diffSection *DiffSection) GetLine(lineType DiffLineType, idx int) *DiffLine {
var (
difference = 0
addCount = 0
delCount = 0
matchDiffLine *DiffLine
)
LOOP:
for _, diffLine := range diffSection.Lines {
switch diffLine.Type {
2016-11-07 16:24:59 +00:00
case DiffLineAdd:
addCount++
2016-11-07 16:24:59 +00:00
case DiffLineDel:
delCount++
default:
if matchDiffLine != nil {
break LOOP
}
difference = diffLine.RightIdx - diffLine.LeftIdx
addCount = 0
delCount = 0
}
switch lineType {
2016-11-07 16:24:59 +00:00
case DiffLineDel:
2016-01-09 06:51:17 +00:00
if diffLine.RightIdx == 0 && diffLine.LeftIdx == idx-difference {
matchDiffLine = diffLine
}
2016-11-07 16:24:59 +00:00
case DiffLineAdd:
2016-01-09 06:51:17 +00:00
if diffLine.LeftIdx == 0 && diffLine.RightIdx == idx+difference {
matchDiffLine = diffLine
}
}
}
if addCount == delCount {
return matchDiffLine
}
return nil
}
var diffMatchPatch = diffmatchpatch.New()
func init() {
diffMatchPatch.DiffEditCost = 100
}
// GetComputedInlineDiffFor computes inline diff for the given line.
func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) template.HTML {
if setting.Git.DisableDiffHighlight {
return template.HTML(html.EscapeString(diffLine.Content[1:]))
}
var (
compareDiffLine *DiffLine
diff1 string
diff2 string
)
// try to find equivalent diff line. ignore, otherwise
switch diffLine.Type {
2016-11-07 16:24:59 +00:00
case DiffLineAdd:
compareDiffLine = diffSection.GetLine(DiffLineDel, diffLine.RightIdx)
if compareDiffLine == nil {
return template.HTML(html.EscapeString(diffLine.Content))
}
diff1 = compareDiffLine.Content
diff2 = diffLine.Content
2016-11-07 16:24:59 +00:00
case DiffLineDel:
compareDiffLine = diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx)
if compareDiffLine == nil {
return template.HTML(html.EscapeString(diffLine.Content))
}
diff1 = diffLine.Content
diff2 = compareDiffLine.Content
default:
return template.HTML(html.EscapeString(diffLine.Content))
}
diffRecord := diffMatchPatch.DiffMain(diff1[1:], diff2[1:], true)
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
return diffToHTML(diffRecord, diffLine.Type)
}
2016-11-24 08:30:08 +00:00
// DiffFile represents a file diff.
2014-04-13 01:35:36 +00:00
type DiffFile struct {
Name string
2015-11-03 00:55:24 +00:00
OldName string
2014-05-13 16:40:32 +00:00
Index int
2014-04-13 01:35:36 +00:00
Addition, Deletion int
Type DiffFileType
IsCreated bool
IsDeleted bool
2014-04-16 00:01:20 +00:00
IsBin bool
2015-11-03 00:55:24 +00:00
IsRenamed bool
IsSubmodule bool
2014-04-13 01:35:36 +00:00
Sections []*DiffSection
IsIncomplete bool
2014-04-13 01:35:36 +00:00
}
// GetType returns type of diff file.
func (diffFile *DiffFile) GetType() int {
return int(diffFile.Type)
}
2016-11-24 08:30:08 +00:00
// GetHighlightClass returns highlight class for a filename.
func (diffFile *DiffFile) GetHighlightClass() string {
return highlight.FileNameToHighlightClass(diffFile.Name)
}
2016-11-24 08:30:08 +00:00
// Diff represents a difference between two git trees.
2014-04-13 01:35:36 +00:00
type Diff struct {
TotalAddition, TotalDeletion int
Files []*DiffFile
IsIncomplete bool
2014-04-13 01:35:36 +00:00
}
2016-11-24 08:30:08 +00:00
// NumFiles returns number of files changes in a diff.
2014-04-13 01:35:36 +00:00
func (diff *Diff) NumFiles() int {
return len(diff.Files)
}
const cmdDiffHead = "diff --git "
2014-04-13 01:35:36 +00:00
2016-11-24 08:30:08 +00:00
// ParsePatch builds a Diff object from a io.Reader and some
// parameters.
// TODO: move this function to gogits/git-module
func ParsePatch(maxLines, maxLineCharacteres, maxFiles int, reader io.Reader) (*Diff, error) {
2014-04-13 01:35:36 +00:00
var (
diff = &Diff{Files: make([]*DiffFile, 0)}
2014-04-13 01:35:36 +00:00
curFile *DiffFile
curSection = &DiffSection{
Lines: make([]*DiffLine, 0, 10),
}
leftLine, rightLine int
lineCount int
curFileLinesCount int
2014-04-13 01:35:36 +00:00
)
input := bufio.NewReader(reader)
isEOF := false
for !isEOF {
line, err := input.ReadString('\n')
if err != nil {
if err == io.EOF {
isEOF = true
} else {
return nil, fmt.Errorf("ReadString: %v", err)
}
}
if len(line) > 0 && line[len(line)-1] == '\n' {
// Remove line break.
line = line[:len(line)-1]
2014-04-13 01:35:36 +00:00
}
if strings.HasPrefix(line, "+++ ") || strings.HasPrefix(line, "--- ") || len(line) == 0 {
continue
}
curFileLinesCount++
lineCount++
2014-04-13 01:35:36 +00:00
// Diff data too large, we only show the first about maxlines lines
if curFileLinesCount >= maxLines || len(line) >= maxLineCharacteres {
curFile.IsIncomplete = true
2014-04-13 01:35:36 +00:00
}
2014-04-16 00:01:20 +00:00
switch {
case line[0] == ' ':
2016-11-07 16:24:59 +00:00
diffLine := &DiffLine{Type: DiffLinePlain, Content: line, LeftIdx: leftLine, RightIdx: rightLine}
2014-04-13 01:35:36 +00:00
leftLine++
rightLine++
curSection.Lines = append(curSection.Lines, diffLine)
continue
2014-04-16 00:01:20 +00:00
case line[0] == '@':
2014-04-13 01:35:36 +00:00
curSection = &DiffSection{}
curFile.Sections = append(curFile.Sections, curSection)
ss := strings.Split(line, "@@")
2016-11-07 16:33:03 +00:00
diffLine := &DiffLine{Type: DiffLineSection, Content: line}
2014-04-13 01:35:36 +00:00
curSection.Lines = append(curSection.Lines, diffLine)
// Parse line number.
2015-07-29 14:55:01 +00:00
ranges := strings.Split(ss[1][1:], " ")
2014-07-26 04:24:27 +00:00
leftLine, _ = com.StrTo(strings.Split(ranges[0], ",")[0][1:]).Int()
2015-07-29 14:55:01 +00:00
if len(ranges) > 1 {
rightLine, _ = com.StrTo(strings.Split(ranges[1], ",")[0]).Int()
} else {
log.Warn("Parse line number failed: %v", line)
rightLine = leftLine
}
2014-04-13 01:35:36 +00:00
continue
2014-04-16 00:01:20 +00:00
case line[0] == '+':
2014-04-13 01:35:36 +00:00
curFile.Addition++
diff.TotalAddition++
2016-11-07 16:24:59 +00:00
diffLine := &DiffLine{Type: DiffLineAdd, Content: line, RightIdx: rightLine}
2014-04-13 01:35:36 +00:00
rightLine++
curSection.Lines = append(curSection.Lines, diffLine)
continue
2014-04-16 00:01:20 +00:00
case line[0] == '-':
2014-04-13 01:35:36 +00:00
curFile.Deletion++
diff.TotalDeletion++
2016-11-07 16:24:59 +00:00
diffLine := &DiffLine{Type: DiffLineDel, Content: line, LeftIdx: leftLine}
2014-04-13 01:35:36 +00:00
if leftLine > 0 {
leftLine++
}
curSection.Lines = append(curSection.Lines, diffLine)
2014-04-16 00:01:20 +00:00
case strings.HasPrefix(line, "Binary"):
curFile.IsBin = true
2014-04-13 01:35:36 +00:00
continue
}
// Get new file.
if strings.HasPrefix(line, cmdDiffHead) {
2015-11-03 00:55:24 +00:00
middle := -1
2015-11-20 06:18:50 +00:00
// Note: In case file name is surrounded by double quotes (it happens only in git-shell).
// e.g. diff --git "a/xxx" "b/xxx"
hasQuote := line[len(cmdDiffHead)] == '"'
2015-11-03 00:55:24 +00:00
if hasQuote {
middle = strings.Index(line, ` "b/`)
} else {
middle = strings.Index(line, " b/")
}
2014-04-13 01:35:36 +00:00
beg := len(cmdDiffHead)
2015-11-03 00:55:24 +00:00
a := line[beg+2 : middle]
b := line[middle+3:]
if hasQuote {
2015-11-20 06:18:50 +00:00
a = string(git.UnescapeChars([]byte(a[1 : len(a)-1])))
b = string(git.UnescapeChars([]byte(b[1 : len(b)-1])))
2015-08-20 08:08:26 +00:00
}
2014-04-13 01:35:36 +00:00
curFile = &DiffFile{
2015-11-03 00:55:24 +00:00
Name: a,
2014-05-13 16:40:32 +00:00
Index: len(diff.Files) + 1,
2016-11-07 16:24:59 +00:00
Type: DiffFileChange,
2014-04-13 01:35:36 +00:00
Sections: make([]*DiffSection, 0, 10),
}
diff.Files = append(diff.Files, curFile)
if len(diff.Files) >= maxFiles {
diff.IsIncomplete = true
io.Copy(ioutil.Discard, reader)
break
}
curFileLinesCount = 0
2014-04-13 01:35:36 +00:00
// Check file diff type and is submodule.
for {
line, err := input.ReadString('\n')
if err != nil {
if err == io.EOF {
isEOF = true
} else {
return nil, fmt.Errorf("ReadString: %v", err)
}
}
2014-04-13 01:35:36 +00:00
switch {
case strings.HasPrefix(line, "new file"):
2016-11-07 16:24:59 +00:00
curFile.Type = DiffFileAdd
curFile.IsCreated = true
case strings.HasPrefix(line, "deleted"):
2016-11-07 16:24:59 +00:00
curFile.Type = DiffFileDel
curFile.IsDeleted = true
case strings.HasPrefix(line, "index"):
2016-11-07 16:24:59 +00:00
curFile.Type = DiffFileChange
case strings.HasPrefix(line, "similarity index 100%"):
2016-11-07 16:33:03 +00:00
curFile.Type = DiffFileRename
2015-11-03 00:55:24 +00:00
curFile.IsRenamed = true
curFile.OldName = curFile.Name
curFile.Name = b
2014-04-13 01:35:36 +00:00
}
if curFile.Type > 0 {
if strings.HasSuffix(line, " 160000\n") {
curFile.IsSubmodule = true
}
2014-04-13 01:35:36 +00:00
break
}
}
}
}
// FIXME: detect encoding while parsing.
var buf bytes.Buffer
2015-07-29 14:55:01 +00:00
for _, f := range diff.Files {
buf.Reset()
for _, sec := range f.Sections {
for _, l := range sec.Lines {
buf.WriteString(l.Content)
buf.WriteString("\n")
}
}
charsetLabel, err := base.DetectEncoding(buf.Bytes())
if charsetLabel != "UTF-8" && err == nil {
2015-07-29 14:55:01 +00:00
encoding, _ := charset.Lookup(charsetLabel)
if encoding != nil {
d := encoding.NewDecoder()
2014-12-09 07:18:25 +00:00
for _, sec := range f.Sections {
for _, l := range sec.Lines {
if c, _, err := transform.String(d, l.Content); err == nil {
l.Content = c
}
2014-12-09 07:18:25 +00:00
}
}
}
}
}
2014-04-13 01:35:36 +00:00
return diff, nil
}
2016-11-24 08:30:08 +00:00
// GetDiffRange builds a Diff between two commits of a repository.
// passing the empty string as beforeCommitID returns a diff from the
// parent commit.
func GetDiffRange(repoPath, beforeCommitID, afterCommitID string, maxLines, maxLineCharacteres, maxFiles int) (*Diff, error) {
gitRepo, err := git.OpenRepository(repoPath)
2014-04-13 01:35:36 +00:00
if err != nil {
return nil, err
}
commit, err := gitRepo.GetCommit(afterCommitID)
2014-04-13 01:35:36 +00:00
if err != nil {
return nil, err
}
2014-05-29 02:15:15 +00:00
var cmd *exec.Cmd
2014-08-26 12:20:18 +00:00
// if "after" commit given
if len(beforeCommitID) == 0 {
2014-08-26 12:20:18 +00:00
// First commit of repository.
if commit.ParentCount() == 0 {
cmd = exec.Command("git", "show", afterCommitID)
2014-08-26 12:20:18 +00:00
} else {
c, _ := commit.Parent(0)
cmd = exec.Command("git", "diff", "-M", c.ID.String(), afterCommitID)
2014-08-26 12:20:18 +00:00
}
2014-05-29 02:15:15 +00:00
} else {
cmd = exec.Command("git", "diff", "-M", beforeCommitID, afterCommitID)
2014-04-13 01:35:36 +00:00
}
2014-05-29 02:15:15 +00:00
cmd.Dir = repoPath
cmd.Stderr = os.Stderr
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, fmt.Errorf("StdoutPipe: %v", err)
}
if err = cmd.Start(); err != nil {
return nil, fmt.Errorf("Start: %v", err)
}
pid := process.Add(fmt.Sprintf("GetDiffRange [repo_path: %s]", repoPath), cmd)
defer process.Remove(pid)
diff, err := ParsePatch(maxLines, maxLineCharacteres, maxFiles, stdout)
if err != nil {
return nil, fmt.Errorf("ParsePatch: %v", err)
}
if err = cmd.Wait(); err != nil {
return nil, fmt.Errorf("Wait: %v", err)
}
return diff, nil
2014-04-13 01:35:36 +00:00
}
2014-08-26 12:20:18 +00:00
2016-11-24 08:30:08 +00:00
// RawDiffType type of a raw diff.
type RawDiffType string
// RawDiffType possible values.
const (
2016-11-07 16:24:59 +00:00
RawDiffNormal RawDiffType = "diff"
RawDiffPatch RawDiffType = "patch"
)
// GetRawDiff dumps diff results of repository in given commit ID to io.Writer.
// TODO: move this function to gogits/git-module
func GetRawDiff(repoPath, commitID string, diffType RawDiffType, writer io.Writer) error {
repo, err := git.OpenRepository(repoPath)
if err != nil {
return fmt.Errorf("OpenRepository: %v", err)
}
commit, err := repo.GetCommit(commitID)
if err != nil {
return fmt.Errorf("GetCommit: %v", err)
}
var cmd *exec.Cmd
switch diffType {
2016-11-07 16:24:59 +00:00
case RawDiffNormal:
if commit.ParentCount() == 0 {
cmd = exec.Command("git", "show", commitID)
} else {
c, _ := commit.Parent(0)
cmd = exec.Command("git", "diff", "-M", c.ID.String(), commitID)
}
2016-11-07 16:24:59 +00:00
case RawDiffPatch:
if commit.ParentCount() == 0 {
cmd = exec.Command("git", "format-patch", "--no-signature", "--stdout", "--root", commitID)
} else {
c, _ := commit.Parent(0)
query := fmt.Sprintf("%s...%s", commitID, c.ID.String())
cmd = exec.Command("git", "format-patch", "--no-signature", "--stdout", query)
}
default:
return fmt.Errorf("invalid diffType: %s", diffType)
}
stderr := new(bytes.Buffer)
cmd.Dir = repoPath
cmd.Stdout = writer
cmd.Stderr = stderr
if err = cmd.Run(); err != nil {
return fmt.Errorf("Run: %v - %s", err, stderr)
}
return nil
}
2016-11-24 08:30:08 +00:00
// GetDiffCommit builds a Diff representing the given commitID.
func GetDiffCommit(repoPath, commitID string, maxLines, maxLineCharacteres, maxFiles int) (*Diff, error) {
return GetDiffRange(repoPath, "", commitID, maxLines, maxLineCharacteres, maxFiles)
2014-08-26 12:20:18 +00:00
}