diff --git a/gitlint/__init__.py b/gitlint/__init__.py index 788c1dc..83b3cab 100644 --- a/gitlint/__init__.py +++ b/gitlint/__init__.py @@ -23,7 +23,8 @@ Usage: git-lint [-f | --force] [--json] [--last-commit] [FILENAME ...] - git-lint [-t | --tracked] [-f | --force] [--json] [--last-commit] + git-lint [-t | --tracked] [-f | --force] [--json] + [(--last-commit | --diff DIFF)] git-lint -h | --version Options: @@ -33,8 +34,11 @@ -t --tracked Lints only tracked files. --json Prints the result as a json string. Useful to use it in conjunction with other tools. - --last-commit Checks the last checked-out commit. This is mostly useful - when used as: git checkout ; git lint --last-commit. + --last-commit Checks the last checked-out commit. Equivalent to + "--diff HEAD^" This is mostly useful when used as: + git checkout ; git lint --last-commit. + --diff= Gets the difference between the current HEAD and the commit + or branch specified """ from __future__ import unicode_literals @@ -165,7 +169,7 @@ def get_vcs_root(): return (None, None) -def process_file(vcs, commit, force, gitlint_config, file_data): +def process_file(vcs, target, force, gitlint_config, file_data): """Lint the file Returns: @@ -177,7 +181,7 @@ def process_file(vcs, commit, force, gitlint_config, file_data): modified_lines = None else: modified_lines = vcs.modified_lines( - filename, extra_data, commit=commit) + filename, extra_data, target=target) result = linters.lint(filename, modified_lines, gitlint_config) result = result[filename] @@ -206,9 +210,15 @@ def main(argv, stdout=sys.stdout, stderr=sys.stderr): stderr.write('fatal: Not a git repository' + linesep) return 128 - commit = None - if arguments['--last-commit']: - commit = vcs.last_commit() + diff = arguments['--diff'] + if diff and vcs is not git: + stderr.write('fatal: --diff is only valid with git repositories' + + linesep) + return 128 + + target = None + if arguments['--last-commit'] or diff: + target = vcs.diff_target(diff=diff) if arguments['FILENAME']: invalid_filenames = find_invalid_filenames(arguments['FILENAME'], @@ -222,7 +232,7 @@ def main(argv, stdout=sys.stdout, stderr=sys.stderr): changed_files = vcs.modified_files( repository_root, tracked_only=arguments['--tracked'], - commit=commit) + target=target) modified_files = {} for filename in arguments['FILENAME']: normalized_filename = os.path.abspath(filename) @@ -232,7 +242,7 @@ def main(argv, stdout=sys.stdout, stderr=sys.stderr): modified_files = vcs.modified_files( repository_root, tracked_only=arguments['--tracked'], - commit=commit) + target=target) linter_not_found = False files_with_problems = 0 @@ -241,7 +251,7 @@ def main(argv, stdout=sys.stdout, stderr=sys.stderr): with futures.ThreadPoolExecutor(max_workers=multiprocessing.cpu_count())\ as executor: - processfile = functools.partial(process_file, vcs, commit, + processfile = functools.partial(process_file, vcs, target, arguments['--force'], gitlint_config) for filename, result in executor.map( processfile, [(filename, modified_files[filename]) diff --git a/gitlint/git.py b/gitlint/git.py index 485668d..f98e230 100644 --- a/gitlint/git.py +++ b/gitlint/git.py @@ -14,6 +14,7 @@ """Functions to get information from git.""" import os.path +import re import subprocess import gitlint.utils as utils @@ -31,17 +32,6 @@ def repository_root(): return None -def last_commit(): - """Returns the SHA1 of the last commit.""" - try: - root = subprocess.check_output( - ['git', 'rev-parse', 'HEAD'], stderr=subprocess.STDOUT).strip() - # Convert to unicode first - return root.decode('utf-8') - except subprocess.CalledProcessError: - return None - - def _remove_filename_quotes(filename): """Removes the quotes from a filename returned by git status.""" if filename.startswith('"') and filename.endswith('"'): @@ -50,14 +40,13 @@ def _remove_filename_quotes(filename): return filename -def modified_files(root, tracked_only=False, commit=None): +def modified_files(root, tracked_only=False, target=None): """Returns a list of files that has been modified since the last commit. Args: root: the root of the repository, it has to be an absolute path. tracked_only: exclude untracked files when True. - commit: SHA1 of the commit. If None, it will get the modified files in the - working copy. + target: The changes to target, e.g. 'master..HEAD' Returns: a dictionary with the modified files as keys, and additional information as value. In this case it adds the status returned by @@ -65,8 +54,8 @@ def modified_files(root, tracked_only=False, commit=None): """ assert os.path.isabs(root), "Root has to be absolute, got: %s" % root - if commit: - return _modified_files_with_commit(root, commit) + if target: + return _modified_files_with_target(root, target) # Convert to unicode and split status_lines = subprocess.check_output([ @@ -88,11 +77,11 @@ def modified_files(root, tracked_only=False, commit=None): for filename, mode in modified_file_status) -def _modified_files_with_commit(root, commit): +def _modified_files_with_target(root, target): # Convert to unicode and split status_lines = subprocess.check_output([ 'git', 'diff-tree', '-r', '--root', '--no-commit-id', '--name-status', - commit + target ]).decode('utf-8').split(os.linesep) modified_file_status = utils.filter_lines( @@ -106,17 +95,16 @@ def _modified_files_with_commit(root, commit): mode + ' ') for filename, mode in modified_file_status) -def modified_lines(filename, extra_data, commit=None): +def modified_lines(filename, extra_data, target=None): """Returns the lines that have been modifed for this file. Args: filename: the file to check. extra_data: is the extra_data returned by modified_files. Additionally, a value of None means that the file was not modified. - commit: the complete sha1 (40 chars) of the commit. Note that specifying - this value will only work (100%) when commit == last_commit (with - respect to the currently checked out revision), otherwise, we could miss - some lines. + target: The diff target to check for modified lines. For example, + diff='master..HEAD' will check for all modified lines from master + to the current HEAD Returns: a list of lines that were modified, or None in case all lines are new. @@ -126,15 +114,36 @@ def modified_lines(filename, extra_data, commit=None): if extra_data not in ('M ', ' M', 'MM'): return None - if commit is None: - commit = '0' * 40 - commit = commit.encode('utf-8') + commits = ['0' * 40] + if target: + commits = _target_commits(target) # Split as bytes, as the output may have some non unicode characters. blame_lines = subprocess.check_output( ['git', 'blame', '--porcelain', filename]).split( os.linesep.encode('utf-8')) + commit_or_regex = '|'.join(re.escape(commit) for commit in commits) + regex_pattern = r'^({}) (?P\d+) (\d+)'.format( + commit_or_regex).encode('utf-8') modified_line_numbers = utils.filter_lines( - blame_lines, commit + br' (?P\d+) (\d+)', groups=('line', )) + blame_lines, regex_pattern, groups=('line', )) return list(map(int, modified_line_numbers)) + + +def _target_commits(target): + return subprocess.check_output(['git', 'rev-list', + target]).decode('utf-8').split(os.linesep) + + +def diff_target(diff): + """Build the git difference string + + Args: + diff: The commit or branch target to compare against + defaults to "HEAD^" + + Returns: A string representing the git difference. + """ + diff = diff or 'HEAD^' + return '{}..HEAD'.format(diff) diff --git a/gitlint/hg.py b/gitlint/hg.py index c780113..aca6468 100644 --- a/gitlint/hg.py +++ b/gitlint/hg.py @@ -30,7 +30,7 @@ def repository_root(): return None -def last_commit(): +def diff_target(): """Returns the SHA1 of the last commit.""" try: root = subprocess.check_output( @@ -42,14 +42,16 @@ def last_commit(): return None -def modified_files(root, tracked_only=False, commit=None): +# pylint: disable=unused-argument +def modified_files(root, tracked_only=False, target=None): """Returns a list of files that has been modified since the last commit. Args: root: the root of the repository, it has to be an absolute path. tracked_only: exclude untracked files when True. - commit: SHA1 of the commit. If None, it will get the modified files in the + target: SHA1 of the commit. If None, it will get the modified files in the working copy. + diff: Unused. Necessary for vcs compatibility Returns: a dictionary with the modified files as keys, and additional information as value. In this case it adds the status returned by @@ -58,8 +60,8 @@ def modified_files(root, tracked_only=False, commit=None): assert os.path.isabs(root), "Root has to be absolute, got: %s" % root command = ['hg', 'status'] - if commit: - command.append('--change=%s' % commit) + if target: + command.append('--change=%s' % target) # Convert to unicode and split status_lines = subprocess.check_output(command).decode('utf-8').split( @@ -79,14 +81,17 @@ def modified_files(root, tracked_only=False, commit=None): for filename, mode in modified_file_status) -def modified_lines(filename, extra_data, commit=None): +# pylint: enable=unused-argument + + +def modified_lines(filename, extra_data, target=None): """Returns the lines that have been modifed for this file. Args: filename: the file to check. extra_data: is the extra_data returned by modified_files. Additionally, a value of None means that the file was not modified. - commit: the complete sha1 (40 chars) of the commit. Note that specifying + target: the complete sha1 (40 chars) of the commit. Note that specifying this value will only work (100%) when commit == last_commit (with respect to the currently checked out revision), otherwise, we could miss some lines. @@ -100,8 +105,8 @@ def modified_lines(filename, extra_data, commit=None): return None command = ['hg', 'diff', '-U', '0'] - if commit: - command.append('--change=%s' % commit) + if target: + command.append('--change=%s' % target) command.append(filename) # Split as bytes, as the output may have some non unicode characters. diff --git a/test/e2etest/test_e2e.py b/test/e2etest/test_e2e.py index d79efc1..7485821 100644 --- a/test/e2etest/test_e2e.py +++ b/test/e2etest/test_e2e.py @@ -47,10 +47,11 @@ class E2EMixin(object): """ @staticmethod - def lint(): + def lint(args=None): """Returns the response and ouput of git-lint.""" + args = args or [] out = io.StringIO() - response = gitlint.main([], stdout=out, stderr=out) + response = gitlint.main(args, stdout=out, stderr=out) return response, out.getvalue() @@ -92,6 +93,21 @@ def test_extension_not_defined(self): self.assertIn('SKIPPED', output) self.assertIn(extension, output) + def get_filenames(self, linter_name, extension): + """Get the common file names to be checked by the linter""" + data_dirname = os.path.join( + os.path.dirname(os.path.realpath(__file__)), 'data') + filename_repo = os.path.join(self.temp_directory, + '%s%s' % (linter_name, extension)) + filename_original = os.path.join(data_dirname, linter_name, + 'original%s' % extension) + filename_error = os.path.join(data_dirname, linter_name, + 'error%s' % extension) + filename_nonewerror = os.path.join(data_dirname, linter_name, + 'nonewerror%s' % extension) + return (filename_original, filename_error, filename_nonewerror, + filename_repo) + # TODO(skreft): check that the first file has more than 1 error, check that # the second file has 1 new error, check also the lines that changed. def assert_linter_works(self, linter_name, extension): @@ -103,16 +119,11 @@ def assert_linter_works(self, linter_name, extension): - /nonewerror.: A line was modified/added from the last file, but no new errors are introduced. """ - data_dirname = os.path.join( - os.path.dirname(os.path.realpath(__file__)), 'data') - self.filename_repo = filename_repo = os.path.join( - self.temp_directory, '%s%s' % (linter_name, extension)) - filename_original = os.path.join(data_dirname, linter_name, - 'original%s' % extension) - filename_error = os.path.join(data_dirname, linter_name, - 'error%s' % extension) - filename_nonewerror = os.path.join(data_dirname, linter_name, - 'nonewerror%s' % extension) + filenames = self.get_filenames(linter_name, extension) + filename_original = filenames[0] + filename_error = filenames[1] + filename_nonewerror = filenames[2] + self.file_name_repo = filename_repo = filenames[3] self.assertTrue( os.path.exists(filename_original), @@ -241,6 +252,61 @@ def test_submodules(self): if repo_dir: shutil.rmtree(repo_dir) + def test_with_diff(self): + """Ensures that the diff command gets errors not on target + + Creates the same file 3 times in 3 different commits. The first + one is on the master branch, the next two are in two commits + on the new branch. Finally, overwrites the first commit with + no new errors. + """ + original_cwd = os.getcwd() + try: + repo_dir = tempfile.mkdtemp(prefix='gitlint') + os.chdir(repo_dir) + self.init_repo() + + _, filename_error, filename_nonewerror, _ = self.get_filenames( + 'pycodestyle', '.py') + repo_filename_first = os.path.join(repo_dir, 'first.py') + shutil.copy(filename_error, repo_filename_first) + self.add(repo_filename_first) + self.commit('first commit') + + execute(['git', 'checkout', '-b', 'new-branch']) + + repo_filename_error = os.path.join(repo_dir, 'new_error.py') + shutil.copy(filename_error, repo_filename_error) + self.add(repo_filename_error) + self.commit('has an error') + + repo_filename_second_error = os.path.join(repo_dir, + 'second_error.py') + shutil.copy(filename_error, repo_filename_second_error) + self.add(repo_filename_second_error) + self.commit('another error') + + shutil.copy(filename_nonewerror, repo_filename_first) + self.add(repo_filename_first) + self.commit('no new errors') + + error, output = self.lint(args=['git-lint', '--diff', 'master']) + # Ensure errors from commits on new branch but not master branch + self.assertEqual(error, 1) + # Original file passes + self.assertIn( + 'Linting file: first.py\x1b[0m\n\x1b[1mOK', + output) + # new files fail + self.assertIn('Linting file: \x1b[1mnew_error.py\x1b[0m\nline 1', + output) + self.assertIn( + 'Linting file: \x1b[1msecond_error.py\x1b[0m\nline 1', output) + finally: + os.chdir(original_cwd) + if repo_dir: + shutil.rmtree(repo_dir) + class TestHgE2E(E2EMixin, unittest.TestCase): @staticmethod diff --git a/test/unittest/test_git.py b/test/unittest/test_git.py index d5ec94d..edf9a33 100644 --- a/test/unittest/test_git.py +++ b/test/unittest/test_git.py @@ -100,13 +100,13 @@ def test_modified_files_with_commit(self, check_output): 'A\ttest/e2etest/data/rubylint/error.rb', '', ]).encode('utf-8') - commit = '0a' * 20 + commit = 'HEAD^..HEAD' self.assertEqual( { '/home/user/repo/test/e2etest/data/bash/error.sh': 'M ', '/home/user/repo/test/e2etest/data/rubylint/error.rb': 'A ', - }, git.modified_files('/home/user/repo', commit=commit)) + }, git.modified_files('/home/user/repo', target=commit)) check_output.assert_called_once_with([ 'git', 'diff-tree', '-r', '--root', '--no-commit-id', '--name-status', commit @@ -141,34 +141,35 @@ def test_modified_lines(self, check_output): ] * 3 self.assertEqual(expected_calls, check_output.call_args_list) + @mock.patch('gitlint.git._target_commits') @mock.patch('subprocess.check_output') - def test_modified_lines_with_commit(self, check_output): + def test_modified_lines_with_commit(self, check_output, target_commits): check_output.return_value = os.linesep.join([ 'baz', '0123456789abcdef31410123456789abcdef3141 2 2 4', 'foo', '0123456789abcdef31410123456789abcdef3141 5 5', 'bar' ]).encode('utf-8') + target_commits.return_value = [ + '0123456789abcdef31410123456789abcdef3141' + ] - self.assertEqual( - [2, 5], - list( - git.modified_lines( - '/home/user/repo/foo/bar.txt', - ' M', - commit='0123456789abcdef31410123456789abcdef3141'))) - self.assertEqual( - [2, 5], - list( - git.modified_lines( - '/home/user/repo/foo/bar.txt', - 'M ', - commit='0123456789abcdef31410123456789abcdef3141'))) - self.assertEqual( - [2, 5], - list( - git.modified_lines( - '/home/user/repo/foo/bar.txt', - 'MM', - commit='0123456789abcdef31410123456789abcdef3141'))) + self.assertEqual([2, 5], + list( + git.modified_lines( + '/home/user/repo/foo/bar.txt', + ' M', + target='HEAD^..HEAD'))) + self.assertEqual([2, 5], + list( + git.modified_lines( + '/home/user/repo/foo/bar.txt', + 'M ', + target='HEAD^..HEAD'))) + self.assertEqual([2, 5], + list( + git.modified_lines( + '/home/user/repo/foo/bar.txt', + 'MM', + target='HEAD^..HEAD'))) expected_calls = [ mock.call( ['git', 'blame', '--porcelain', '/home/user/repo/foo/bar.txt']) @@ -189,13 +190,8 @@ def test_modified_lines_no_info(self): git.modified_lines('/home/user/repo/foo/bar.txt', None))) - @mock.patch('subprocess.check_output', return_value=b'0a' * 20 + b'\n') - def test_last_commit(self, check_output): - self.assertEqual('0a' * 20, git.last_commit()) - check_output.assert_called_once_with( - ['git', 'rev-parse', 'HEAD'], stderr=subprocess.STDOUT) + def test_diff_target_no_diff(self): + self.assertEqual('HEAD^..HEAD', git.diff_target(None)) - @mock.patch('subprocess.check_output') - def test_last_commit_not_in_repo(self, check_output): - check_output.side_effect = subprocess.CalledProcessError(255, '', '') - self.assertEqual(None, git.last_commit()) + def test_diff_target_with_diff(self): + self.assertEqual('master..HEAD', git.diff_target('master')) diff --git a/test/unittest/test_gitlint.py b/test/unittest/test_gitlint.py index ca25fea..625a826 100644 --- a/test/unittest/test_gitlint.py +++ b/test/unittest/test_gitlint.py @@ -17,6 +17,7 @@ import os import sys +import docopt import mock from pyfakefs import fake_filesystem_unittest @@ -71,7 +72,7 @@ def setUp(self): self.addCleanup(self.git_modified_lines_patch.stop) self.git_last_commit_patch = mock.patch( - 'gitlint.git.last_commit', return_value="abcd" * 10) + 'gitlint.git.diff_target', return_value='HEAD^..HEAD') self.git_last_commit = self.git_last_commit_patch.start() self.addCleanup(self.git_last_commit_patch.stop) @@ -86,15 +87,15 @@ def reset_mock_calls(self): self.git_modified_lines.reset_mock() self.lint.reset_mock() - def assert_mocked_calls(self, tracked_only=False, commit=None): + def assert_mocked_calls(self, tracked_only=False, target=None): """Checks if the mocks were called as expected. This method exists to avoid duplication. """ self.git_modified_files.assert_called_once_with( - self.root, tracked_only=tracked_only, commit=commit) + self.root, tracked_only=tracked_only, target=target) self.git_modified_lines.assert_called_once_with( - self.filename, ' M', commit=commit) + self.filename, ' M', target=target) self.lint.assert_called_once_with(self.filename, [3, 14], mock.ANY) def test_find_invalid_filenames(self): @@ -129,11 +130,30 @@ def test_main_not_in_repo(self): [], stdout=None, stderr=self.stderr)) self.assertIn('Not a git repository', self.stderr.getvalue()) + def test_main_diff_and_last_commit(self): + self.assertRaises( + docopt.DocoptExit, + gitlint.main, ['git-lint', '--diff', 'a ^b', '--last-commit'], + stdout=None, + stderr=self.stderr) + + def test_main_diff_not_in_git_repo(self): + self.git_repository_root.return_value = None + self.hg_repository_root.return_value = self.root + self.assertEqual( + 128, + gitlint.main( + ['git-lint', '--diff', 'a ^b'], + stdout=None, + stderr=self.stderr)) + self.assertIn('fatal: --diff is only valid with git repositories', + self.stderr.getvalue()) + def test_main_nothing_changed(self): self.git_modified_files.return_value = {} self.assertEqual(0, gitlint.main([], stdout=None, stderr=None)) self.git_modified_files.assert_called_once_with( - self.root, tracked_only=False, commit=None) + self.root, tracked_only=False, target=None) def test_main_file_changed_and_still_valid(self): lint_response = {self.filename: {'comments': []}} @@ -153,7 +173,7 @@ def test_main_file_changed_and_still_valid_with_commit(self): ['git-lint', '--last-commit'], stdout=self.stdout, stderr=None)) self.assertIn('OK', self.stdout.getvalue()) - self.assert_mocked_calls(commit='abcd' * 10) + self.assert_mocked_calls(target='HEAD^..HEAD') def test_main_file_changed_and_still_valid_tracked_only(self): lint_response = {self.filename: {'comments': []}} @@ -315,7 +335,7 @@ def test_main_force_all_lines(self): self.assertIn('line 3: error', self.stdout.getvalue()) self.git_modified_files.assert_called_once_with( - self.root, tracked_only=False, commit=None) + self.root, tracked_only=False, target=None) self.lint.assert_called_once_with(self.filename, None, mock.ANY) self.reset_mock_calls() @@ -327,7 +347,7 @@ def test_main_force_all_lines(self): self.assertIn('line 3: error', self.stdout.getvalue()) self.git_modified_files.assert_called_once_with( - self.root, tracked_only=False, commit=None) + self.root, tracked_only=False, target=None) self.lint.assert_called_once_with(self.filename, None, mock.ANY) def test_main_with_invalid_files(self): @@ -365,10 +385,10 @@ def test_main_with_valid_files(self): os.path.basename(self.filename2), self.stdout.getvalue()) self.git_modified_files.assert_called_once_with( - self.root, tracked_only=False, commit=None) + self.root, tracked_only=False, target=None) expected_calls = [ - mock.call(self.filename, ' M', commit=None), - mock.call(self.filename2, None, commit=None), + mock.call(self.filename, ' M', target=None), + mock.call(self.filename2, None, target=None), ] self.assertEqual(expected_calls, self.git_modified_lines.call_args_list) @@ -400,10 +420,10 @@ def test_main_with_valid_files_relative(self): self.assertEqual('', self.stderr.getvalue()) self.git_modified_files.assert_called_once_with( - self.root, tracked_only=False, commit=None) + self.root, tracked_only=False, target=None) expected_calls = [ - mock.call(self.filename, ' M', commit=None), - mock.call(self.filename2, None, commit=None) + mock.call(self.filename, ' M', target=None), + mock.call(self.filename2, None, target=None) ] self.assertEqual(expected_calls, self.git_modified_lines.call_args_list) diff --git a/test/unittest/test_hg.py b/test/unittest/test_hg.py index d219823..4455601 100644 --- a/test/unittest/test_hg.py +++ b/test/unittest/test_hg.py @@ -81,7 +81,7 @@ def test_modified_files_with_commit(self, check_output): '/home/user/repo/docs/file1.txt': 'A', '/home/user/repo/data/file2.json': 'M', '/home/user/repo/untracked.txt': '?' - }, hg.modified_files('/home/user/repo', commit=commit)) + }, hg.modified_files('/home/user/repo', target=commit)) check_output.assert_called_once_with( ['hg', 'status', '--change=%s' % commit]) @@ -128,7 +128,7 @@ def test_modified_lines_with_commit(self, check_output): hg.modified_lines( '/home/user/repo/foo/bar.txt', 'M', - commit=commit))) + target=commit))) check_output.assert_called_once_with([ 'hg', 'diff', '-U', '0', '--change=%s' % commit, '/home/user/repo/foo/bar.txt' @@ -149,12 +149,12 @@ def test_modified_lines_no_info(self): None))) @mock.patch('subprocess.check_output', return_value=b'0a' * 20 + b'\n') - def test_last_commit(self, check_output): - self.assertEqual('0a' * 20, hg.last_commit()) + def test_diff_target(self, check_output): + self.assertEqual('0a' * 20, hg.diff_target()) check_output.assert_called_once_with( ['hg', 'parent', '--template={node}'], stderr=subprocess.STDOUT) @mock.patch('subprocess.check_output') - def test_last_commit_not_in_repo(self, check_output): + def test_diff_target_not_in_repo(self, check_output): check_output.side_effect = subprocess.CalledProcessError(255, '', '') - self.assertEqual(None, hg.last_commit()) + self.assertEqual(None, hg.diff_target())