From 31059ec7b37940ee9a5f0085b13f76b82a1256f1 Mon Sep 17 00:00:00 2001 From: Matias Bordese Date: Thu, 27 May 2021 19:21:38 -0300 Subject: [PATCH] Fix a few issues with git renames handling --- tests/samples/git_rename.diff | 19 +++++++++ tests/test_parser.py | 29 +++++++++----- unidiff/constants.py | 9 +++-- unidiff/patch.py | 73 ++++++++++++++--------------------- 4 files changed, 74 insertions(+), 56 deletions(-) diff --git a/tests/samples/git_rename.diff b/tests/samples/git_rename.diff index 6a28228..86ea7e2 100644 --- a/tests/samples/git_rename.diff +++ b/tests/samples/git_rename.diff @@ -11,3 +11,22 @@ index a071991..4dbab21 100644 Some content -Some content +Some modified content + +diff --git a/oldfile b/newfile +similarity index 85% +rename from oldfile +rename to newfile +index a071991..4dbab21 100644 +--- a/oldfile ++++ b/newfile +@@ -9,4 +9,4 @@ Some content + Some content + Some content + Some content +-Some content ++Some modified content + +diff --git a/sub/onefile b/sub/otherfile +similarity index 100% +rename from onefile +rename to otherfile diff --git a/tests/test_parser.py b/tests/test_parser.py index 841c1c1..964dbc6 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # The MIT License (MIT) -# Copyright (c) 2014-2020 Matias Bordese +# Copyright (c) 2014-2021 Matias Bordese # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -246,7 +246,7 @@ def test_parse_diff_with_new_and_modified_binary_files(self): self.assertTrue(res[0].is_added_file) self.assertTrue(res[0].is_binary_file) - # second file is added + # second file is modified self.assertTrue(res[1].is_modified_file) self.assertFalse(res[1].is_removed_file) self.assertFalse(res[1].is_added_file) @@ -411,16 +411,27 @@ def test_git_renaming(self): with codecs.open(file_path, 'r', encoding='utf-8') as diff_file: res = PatchSet(diff_file) - self.assertEqual(len(res), 1) - - patch = res[0] - self.assertTrue(patch.is_rename) - self.assertEqual(patch.added, 1) - self.assertEqual(patch.removed, 1) - self.assertEqual(len(res.modified_files), 1) + self.assertEqual(len(res), 3) + self.assertEqual(len(res.modified_files), 3) self.assertEqual(len(res.added_files), 0) self.assertEqual(len(res.removed_files), 0) + # renamed and modified files + for patch in res[:2]: + self.assertTrue(patch.is_rename) + self.assertEqual(patch.added, 1) + self.assertEqual(patch.removed, 1) + # renamed file under sub-path + patch = res[2] + self.assertTrue(patch.is_rename) + self.assertEqual(patch.added, 0) + self.assertEqual(patch.removed, 0) + # confirm the full path is in source/target filenames + self.assertEqual(patch.source_file, 'a/sub/onefile') + self.assertEqual(patch.target_file, 'b/sub/otherfile') + # check path is the target path + self.assertEqual(patch.path, 'sub/otherfile') + # check that original diffs and those produced # by unidiff are the same with codecs.open(file_path, 'r', encoding='utf-8') as diff_file: diff --git a/unidiff/constants.py b/unidiff/constants.py index c9b97ee..e3b1ae6 100644 --- a/unidiff/constants.py +++ b/unidiff/constants.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # The MIT License (MIT) -# Copyright (c) 2014-2020 Matias Bordese +# Copyright (c) 2014-2021 Matias Bordese # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -35,9 +35,9 @@ r'^\+\+\+ (?P[^\t\n]+)(?:\t(?P[^\n]+))?') -# git renamed files support -RE_RENAME_SOURCE_FILENAME = re.compile(r'^rename from (?P[^\t\n]+)') -RE_RENAME_TARGET_FILENAME = re.compile(r'^rename to (?P[^\t\n]+)') +# check diff git line for git renamed files support +RE_DIFF_GIT_HEADER = re.compile( + r'^diff --git (?Pa/[^\t\n]+) (?Pb/[^\t\n]+)') # @@ (source offset, length) (target offset, length) @@ (section header) @@ -63,6 +63,7 @@ DEFAULT_ENCODING = 'UTF-8' +DEV_NULL = '/dev/null' LINE_TYPE_ADDED = '+' LINE_TYPE_REMOVED = '-' LINE_TYPE_CONTEXT = ' ' diff --git a/unidiff/patch.py b/unidiff/patch.py index 009b74b..7319515 100644 --- a/unidiff/patch.py +++ b/unidiff/patch.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # The MIT License (MIT) -# Copyright (c) 2014-2020 Matias Bordese +# Copyright (c) 2014-2021 Matias Bordese # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -31,17 +31,17 @@ from unidiff.constants import ( DEFAULT_ENCODING, + DEV_NULL, LINE_TYPE_ADDED, LINE_TYPE_CONTEXT, LINE_TYPE_EMPTY, LINE_TYPE_REMOVED, LINE_TYPE_NO_NEWLINE, LINE_VALUE_NO_NEWLINE, + RE_DIFF_GIT_HEADER, RE_HUNK_BODY_LINE, RE_HUNK_EMPTY_BODY_LINE, RE_HUNK_HEADER, - RE_RENAME_SOURCE_FILENAME, - RE_RENAME_TARGET_FILENAME, RE_SOURCE_FILENAME, RE_TARGET_FILENAME, RE_NO_NEWLINE_MARKER, @@ -355,18 +355,15 @@ def _append_trailing_empty_line(self): @property def path(self): """Return the file path abstracted from VCS.""" - if (self.source_file.startswith('a/') and - self.target_file.startswith('b/')): - filepath = self.source_file[2:] - elif (self.source_file.startswith('a/') and - self.target_file == '/dev/null'): - filepath = self.source_file[2:] - elif (self.target_file is not None and - self.target_file.startswith('b/') and - self.source_file == '/dev/null'): - filepath = self.target_file[2:] - else: - filepath = self.source_file + filepath = self.source_file + if filepath in (None, DEV_NULL) or ( + self.is_rename and self.target_file not in (None, DEV_NULL)): + # if this is a rename, prefer the target filename + filepath = self.target_file + + if filepath.startswith('a/') or filepath.startswith('b/'): + filepath = filepath[2:] + return filepath @property @@ -382,7 +379,7 @@ def removed(self): @property def is_added_file(self): """Return True if this patch adds the file.""" - if self.source_file == '/dev/null': + if self.source_file == DEV_NULL: return True return (len(self) == 1 and self[0].source_start == 0 and self[0].source_length == 0) @@ -390,7 +387,7 @@ def is_added_file(self): @property def is_removed_file(self): """Return True if this patch removes the file.""" - if self.target_file == '/dev/null': + if self.target_file == DEV_NULL: return True return (len(self) == 1 and self[0].target_start == 0 and self[0].target_length == 0) @@ -435,33 +432,22 @@ def _parse(self, diff, encoding, metadata_only): if encoding is not None: line = line.decode(encoding) - # check for a git rename, source file - is_rename_source_filename = RE_RENAME_SOURCE_FILENAME.match(line) - if is_rename_source_filename: - # prefix with 'a/' to match expected git source format - source_file = ( - 'a/' + is_rename_source_filename.group('filename')) - # keep line as patch_info - patch_info.append(line) - # reset current file - current_file = None - continue - - # check for a git rename, target file - is_rename_target_filename = RE_RENAME_TARGET_FILENAME.match(line) - if is_rename_target_filename: - if current_file is not None: - raise UnidiffParseError('Target without source: %s' % line) - # prefix with 'b/' to match expected git source format - target_file = ( - 'b/' + is_rename_target_filename.group('filename')) - # keep line as patch_info + # check for a git file rename + is_diff_git_header = RE_DIFF_GIT_HEADER.match(line) + if is_diff_git_header: + if patch_info is None: + patch_info = PatchInfo() + source_file = is_diff_git_header.group('source') + target_file = is_diff_git_header.group('target') + if (source_file != DEV_NULL + and target_file != DEV_NULL + and source_file[2:] != target_file[2:]): + # this is a renamed file + current_file = PatchedFile( + patch_info, source_file, target_file, None, None, + is_rename=True) + self.append(current_file) patch_info.append(line) - # add current file to PatchSet - current_file = PatchedFile( - patch_info, source_file, target_file, None, None, - is_rename=True) - self.append(current_file) continue # check for source file header @@ -495,6 +481,7 @@ def _parse(self, diff, encoding, metadata_only): # check for hunk header is_hunk_header = RE_HUNK_HEADER.match(line) if is_hunk_header: + patch_info = None if current_file is None: raise UnidiffParseError('Unexpected hunk found: %s' % line) current_file._parse_hunk(line, diff, encoding, metadata_only)