Skip to content
This repository has been archived by the owner on Sep 9, 2020. It is now read-only.

Allow using various git diff options for comparisons #126

Closed
wants to merge 8 commits into from
33 changes: 25 additions & 8 deletions gitlint/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@

Usage:
git-lint [-f | --force] [--json] [--last-commit] [FILENAME ...]
git-lint [-t | --tracked] [-f | --force] [--json] [--last-commit]
git-lint [-t | --tracked] [-f | --force] [--json]
[(--last-commit | --diff DIFF)]
git-lint -h | --version

Options:
Expand All @@ -35,6 +36,9 @@
conjunction with other tools.
--last-commit Checks the last checked-out commit. This is mostly useful
when used as: git checkout <revid>; git lint --last-commit.
--diff=<diff> Checks everything captured by the git diff option. Not
compatible with --last-diff To get commits on the current
branch but not master; git lint --diff 'HEAD ^master'
"""

from __future__ import unicode_literals
Expand Down Expand Up @@ -165,7 +169,7 @@ def get_vcs_root():
return (None, None)


def process_file(vcs, commit, force, gitlint_config, file_data):
def process_file(vcs, commits, force, gitlint_config, file_data):
"""Lint the file

Returns:
Expand All @@ -177,7 +181,7 @@ def process_file(vcs, commit, force, gitlint_config, file_data):
modified_lines = None
else:
modified_lines = vcs.modified_lines(
filename, extra_data, commit=commit)
filename, extra_data, commits=commits)
result = linters.lint(filename, modified_lines, gitlint_config)
result = result[filename]

Expand Down Expand Up @@ -206,9 +210,20 @@ def main(argv, stdout=sys.stdout, stderr=sys.stderr):
stderr.write('fatal: Not a git repository' + linesep)
return 128

commit = None
commits = None
last_commit = None
if arguments['--last-commit']:
commit = vcs.last_commit()
last_commit = vcs.last_commit()
commits = [last_commit]

if arguments['--diff'] and vcs.__name__ != 'gitlint.git':
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you could do vcs != gitlint.git

stderr.write('fatal: --diff is only valid with git repositories' +
linesep)
return 128

diff = arguments['--diff']
if diff:
commits = vcs.commit_diff(diff)

if arguments['FILENAME']:
invalid_filenames = find_invalid_filenames(arguments['FILENAME'],
Expand All @@ -222,7 +237,8 @@ def main(argv, stdout=sys.stdout, stderr=sys.stderr):
changed_files = vcs.modified_files(
repository_root,
tracked_only=arguments['--tracked'],
commit=commit)
commit=last_commit,
diff=diff)
modified_files = {}
for filename in arguments['FILENAME']:
normalized_filename = os.path.abspath(filename)
Expand All @@ -232,7 +248,8 @@ def main(argv, stdout=sys.stdout, stderr=sys.stderr):
modified_files = vcs.modified_files(
repository_root,
tracked_only=arguments['--tracked'],
commit=commit)
commit=last_commit,
diff=diff)

linter_not_found = False
files_with_problems = 0
Expand All @@ -241,7 +258,7 @@ def main(argv, stdout=sys.stdout, stderr=sys.stderr):

with futures.ThreadPoolExecutor(max_workers=multiprocessing.cpu_count())\
as executor:
processfile = functools.partial(process_file, vcs, commit,
processfile = functools.partial(process_file, vcs, commits,
arguments['--force'], gitlint_config)
for filename, result in executor.map(
processfile, [(filename, modified_files[filename])
Expand Down
45 changes: 33 additions & 12 deletions gitlint/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,15 @@ def _remove_filename_quotes(filename):
return filename


def modified_files(root, tracked_only=False, commit=None):
def modified_files(root, tracked_only=False, commit=None, diff=None):
"""Returns a list of files that has been modified since the last commit.

Args:
root: the root of the repository, it has to be an absolute path.
tracked_only: exclude untracked files when True.
commit: SHA1 of the commit. If None, it will get the modified files in the
working copy.
diff: A git style diff. Like dev..master, 1b2ec4..123bade, dev ^master

Returns: a dictionary with the modified files as keys, and additional
information as value. In this case it adds the status returned by
Expand All @@ -68,6 +69,9 @@ def modified_files(root, tracked_only=False, commit=None):
if commit:
return _modified_files_with_commit(root, commit)

if diff:
return _modified_files_with_commit(root, diff)

# Convert to unicode and split
status_lines = subprocess.check_output([
'git', 'status', '--porcelain', '--untracked-files=all',
Expand All @@ -88,12 +92,14 @@ def modified_files(root, tracked_only=False, commit=None):
for filename, mode in modified_file_status)


def _modified_files_with_commit(root, commit):
def _modified_files_with_commit(root, diff_target):
# Convert to unicode and split
status_lines = subprocess.check_output([
'git', 'diff-tree', '-r', '--root', '--no-commit-id', '--name-status',
commit
]).decode('utf-8').split(os.linesep)
command = [
'git', 'diff-tree', '-r', '--root', '--no-commit-id', '--name-status'
]
command.extend(diff_target.split(' '))
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In which cases is the argument going to have spaces, could you give some examples.

status_lines = subprocess.check_output(command).decode('utf-8').split(
os.linesep)

modified_file_status = utils.filter_lines(
status_lines,
Expand All @@ -106,14 +112,14 @@ def _modified_files_with_commit(root, commit):
mode + ' ') for filename, mode in modified_file_status)


def modified_lines(filename, extra_data, commit=None):
def modified_lines(filename, extra_data, commits=None):
"""Returns the lines that have been modifed for this file.

Args:
filename: the file to check.
extra_data: is the extra_data returned by modified_files. Additionally, a
value of None means that the file was not modified.
commit: the complete sha1 (40 chars) of the commit. Note that specifying
commits: the complete sha1 (40 chars) of the commit. Note that specifying
this value will only work (100%) when commit == last_commit (with
respect to the currently checked out revision), otherwise, we could miss
some lines.
Expand All @@ -126,15 +132,30 @@ def modified_lines(filename, extra_data, commit=None):
if extra_data not in ('M ', ' M', 'MM'):
return None

if commit is None:
commit = '0' * 40
commit = commit.encode('utf-8')
if commits is None:
commits = ['0' * 40]
# commit = commit.encode('utf-8')

# Split as bytes, as the output may have some non unicode characters.
blame_lines = subprocess.check_output(
['git', 'blame', '--porcelain', filename]).split(
os.linesep.encode('utf-8'))
regex_pattern = r'^({}) (?P<line>\d+) (\d+)'.format(
'|'.join(commits)).encode('utf-8')
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For safety I'd prefer to escape the commits.

modified_line_numbers = utils.filter_lines(
blame_lines, commit + br' (?P<line>\d+) (\d+)', groups=('line', ))
blame_lines, regex_pattern, groups=('line', ))

return list(map(int, modified_line_numbers))


def commit_diff(diff):
"""Returns a list of commits corresponding to the provided diff

Args:
diff: the diff like branch-a..branch-b or HEAD ^master

Returns: A list of commit shas associated with the provided diff
"""
command = ['git', 'rev-list']
command.extend(diff.split(' '))
return subprocess.check_output(command).decode('utf-8').split(os.linesep)
18 changes: 13 additions & 5 deletions gitlint/hg.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,16 @@ def last_commit():
return None


def modified_files(root, tracked_only=False, commit=None):
# pylint: disable=unused-argument
def modified_files(root, tracked_only=False, commit=None, diff=None):
"""Returns a list of files that has been modified since the last commit.

Args:
root: the root of the repository, it has to be an absolute path.
tracked_only: exclude untracked files when True.
commit: SHA1 of the commit. If None, it will get the modified files in the
working copy.
diff: Unused. Necessary for vcs compatibility

Returns: a dictionary with the modified files as keys, and additional
information as value. In this case it adds the status returned by
Expand Down Expand Up @@ -79,14 +81,17 @@ def modified_files(root, tracked_only=False, commit=None):
for filename, mode in modified_file_status)


def modified_lines(filename, extra_data, commit=None):
# pylint: enable=unused-argument


def modified_lines(filename, extra_data, commits=None):
"""Returns the lines that have been modifed for this file.

Args:
filename: the file to check.
extra_data: is the extra_data returned by modified_files. Additionally, a
value of None means that the file was not modified.
commit: the complete sha1 (40 chars) of the commit. Note that specifying
commits: the complete sha1 (40 chars) of the commit. Note that specifying
this value will only work (100%) when commit == last_commit (with
respect to the currently checked out revision), otherwise, we could miss
some lines.
Expand All @@ -100,8 +105,11 @@ def modified_lines(filename, extra_data, commit=None):
return None

command = ['hg', 'diff', '-U', '0']
if commit:
command.append('--change=%s' % commit)
if commits:
assert len(commits) == 1, \
"git-lint does not support multiple commits for mercurial yet"

command.append('--change=%s' % commits[0])
command.append(filename)

# Split as bytes, as the output may have some non unicode characters.
Expand Down
44 changes: 41 additions & 3 deletions test/unittest/test_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,26 @@ def test_modified_files_with_commit(self, check_output):
'--name-status', commit
])

@mock.patch('subprocess.check_output')
def test_modified_files_with_diff(self, check_output):
check_output.return_value = os.linesep.join([
'M\ttest/e2etest/data/bash/error.sh',
'D\ttest/e2etest/data/ruby/error.rb',
'A\ttest/e2etest/data/rubylint/error.rb',
'',
]).encode('utf-8')
commit = 'branch-a ^branch-b'

self.assertEqual(
{
'/home/user/repo/test/e2etest/data/bash/error.sh': 'M ',
'/home/user/repo/test/e2etest/data/rubylint/error.rb': 'A ',
}, git.modified_files('/home/user/repo', commit=commit))
check_output.assert_called_once_with([
'git', 'diff-tree', '-r', '--root', '--no-commit-id',
'--name-status', 'branch-a', '^branch-b'
])

def test_modified_files_non_absolute_root(self):
with self.assertRaises(AssertionError):
git.modified_files('foo/bar')
Expand Down Expand Up @@ -154,21 +174,21 @@ def test_modified_lines_with_commit(self, check_output):
git.modified_lines(
'/home/user/repo/foo/bar.txt',
' M',
commit='0123456789abcdef31410123456789abcdef3141')))
commits=['0123456789abcdef31410123456789abcdef3141'])))
self.assertEqual(
[2, 5],
list(
git.modified_lines(
'/home/user/repo/foo/bar.txt',
'M ',
commit='0123456789abcdef31410123456789abcdef3141')))
commits=['0123456789abcdef31410123456789abcdef3141'])))
self.assertEqual(
[2, 5],
list(
git.modified_lines(
'/home/user/repo/foo/bar.txt',
'MM',
commit='0123456789abcdef31410123456789abcdef3141')))
commits=['0123456789abcdef31410123456789abcdef3141'])))
expected_calls = [
mock.call(
['git', 'blame', '--porcelain', '/home/user/repo/foo/bar.txt'])
Expand Down Expand Up @@ -199,3 +219,21 @@ def test_last_commit(self, check_output):
def test_last_commit_not_in_repo(self, check_output):
check_output.side_effect = subprocess.CalledProcessError(255, '', '')
self.assertEqual(None, git.last_commit())

@mock.patch('subprocess.check_output')
def test_commit_diff_simple_diff(self, check_output):
check_output.return_value = os.linesep.join(['abc123',
'123abc']).encode('utf-8')
resp = git.commit_diff('branch..other-branch')
check_output.assert_called_once_with(
['git', 'rev-list', 'branch..other-branch'])
self.assertEqual(resp, ['abc123', '123abc'])

@mock.patch('subprocess.check_output')
def test_commit_diff_complicated_diff(self, check_output):
check_output.return_value = os.linesep.join(['abc123',
'123abc']).encode('utf-8')
resp = git.commit_diff('branch ^other-branch')
check_output.assert_called_once_with(
['git', 'rev-list', 'branch', '^other-branch'])
self.assertEqual(resp, ['abc123', '123abc'])
Loading