Skip to content

Commit dd6a211

Browse files
committed
fix: "ignoreWhitespaces" doesn't produce the same result with GNU diff's
- fix/update upstream jfcherng/php-sequence-matcher - pre-append some empty lines to inputs and then post-strip extra results Signed-off-by: Jack Cherng <[email protected]>
1 parent dd96f78 commit dd6a211

File tree

2 files changed

+151
-37
lines changed

2 files changed

+151
-37
lines changed

src/Differ.php

+148-34
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,23 @@ final class Differ
4141
'oldNewComparison' => 0,
4242
];
4343

44+
/**
45+
* Some extra lines which will be appended to input strings to
46+
* make the diff result stable about diff around the EOF...
47+
*
48+
* @var array
49+
*/
50+
private const APPENDED_HELPERLINES = [
51+
"\u{fcf28}\u{fc232}",
52+
"\u{fcf28}\u{fc232}",
53+
"\u{fcf28}\u{fc232}",
54+
"\u{fcf28}\u{fc232}",
55+
"\u{fcf28}\u{fc232}",
56+
"\u{fcf28}\u{fc232}",
57+
"\u{fcf28}\u{fc232}",
58+
"\u{fcf28}\u{fc232}",
59+
];
60+
4461
/**
4562
* @var array array of the options that have been applied for generating the diff
4663
*/
@@ -66,6 +83,16 @@ final class Differ
6683
*/
6784
private $sequenceMatcher;
6885

86+
/**
87+
* @var int
88+
*/
89+
private $oldSrcLength = 0;
90+
91+
/**
92+
* @var int
93+
*/
94+
private $newSrcLength = 0;
95+
6996
/**
7097
* @var int the end index for the old if the old has no EOL at EOF
7198
* -1 means the old has an EOL at EOF
@@ -271,9 +298,17 @@ public function getGroupedOpcodes(): array
271298
return $this->groupedOpcodes;
272299
}
273300

274-
return $this->groupedOpcodes = $this->sequenceMatcher
275-
->setSequences($this->old, $this->new)
301+
$old = $this->old;
302+
$new = $this->new;
303+
$this->getGroupedOpcodesPre($old, $new);
304+
305+
$opcodes = $this->sequenceMatcher
306+
->setSequences($old, $new)
276307
->getGroupedOpcodes($this->options['context']);
308+
309+
$this->getGroupedOpcodesPost($opcodes);
310+
311+
return $this->groupedOpcodes = $opcodes;
277312
}
278313

279314
/**
@@ -289,12 +324,118 @@ public function getGroupedOpcodesGnu(): array
289324
return $this->groupedOpcodesGnu;
290325
}
291326

292-
return $this->groupedOpcodesGnu = $this->sequenceMatcher
293-
->setSequences(
294-
$this->createGnuCompatibleLines($this->old),
295-
$this->createGnuCompatibleLines($this->new)
296-
)
327+
$old = $this->old;
328+
$new = $this->new;
329+
$this->getGroupedOpcodesGnuPre($old, $new);
330+
331+
$opcodes = $this->sequenceMatcher
332+
->setSequences($old, $new)
297333
->getGroupedOpcodes($this->options['context']);
334+
335+
$this->getGroupedOpcodesGnuPost($opcodes);
336+
337+
return $this->groupedOpcodesGnu = $opcodes;
338+
}
339+
340+
/**
341+
* Triggered before getGroupedOpcodes(). May modify the $old and $new.
342+
*
343+
* @param string[] $old the old
344+
* @param string[] $new the new
345+
*/
346+
private function getGroupedOpcodesPre(array &$old, array &$new): void
347+
{
348+
$this->oldSrcLength = \count($old);
349+
\array_push($old, ...self::APPENDED_HELPERLINES);
350+
351+
$this->newSrcLength = \count($new);
352+
\array_push($new, ...self::APPENDED_HELPERLINES);
353+
}
354+
355+
/**
356+
* Triggered after getGroupedOpcodes(). May modify the $opcodes.
357+
*
358+
* @param int[][][] $opcodes the opcodes
359+
*/
360+
private function getGroupedOpcodesPost(array &$opcodes): void
361+
{
362+
foreach ($opcodes as $hunkIdx => &$hunk) {
363+
foreach ($hunk as $blockIdx => &$block) {
364+
// range overflow
365+
if ($block[1] > $this->oldSrcLength) {
366+
$block[1] = $this->oldSrcLength;
367+
}
368+
if ($block[2] > $this->oldSrcLength) {
369+
$block[2] = $this->oldSrcLength;
370+
}
371+
if ($block[3] > $this->newSrcLength) {
372+
$block[3] = $this->newSrcLength;
373+
}
374+
if ($block[4] > $this->newSrcLength) {
375+
$block[4] = $this->newSrcLength;
376+
}
377+
// useless extra block?
378+
/** @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset */
379+
if ($block[1] === $block[2] && $block[3] === $block[4]) {
380+
unset($hunk[$blockIdx]);
381+
}
382+
}
383+
384+
if (empty($hunk)) {
385+
unset($opcodes[$hunkIdx]);
386+
}
387+
}
388+
}
389+
390+
/**
391+
* Triggered before getGroupedOpcodesGnu(). May modify the $old and $new.
392+
*
393+
* @param string[] $old the old
394+
* @param string[] $new the new
395+
*/
396+
private function getGroupedOpcodesGnuPre(array &$old, array &$new): void
397+
{
398+
/**
399+
* Make the lines to be prepared for GNU-style diff.
400+
*
401+
* This method checks whether $lines has no EOL at EOF and append a special
402+
* indicator to the last line.
403+
*
404+
* @param string[] $lines the lines created by simply explode("\n", $string)
405+
*/
406+
$createGnuCompatibleLines = static function (array $lines): array {
407+
// note that the $lines should not be empty at this point
408+
// they have at least one element "" in the array because explode("\n", "") === [""]
409+
$lastLineIdx = \count($lines) - 1;
410+
$lastLine = &$lines[$lastLineIdx];
411+
412+
if ($lastLine === '') {
413+
// remove the last plain "" line since we don't need it anymore
414+
// use array_slice() to also reset the array index
415+
$lines = \array_slice($lines, 0, -1);
416+
} else {
417+
// this means the original source has no EOL at EOF
418+
// we append a special indicator to that line so it no longer matches
419+
$lastLine .= self::LINE_NO_EOL;
420+
}
421+
422+
return $lines;
423+
};
424+
425+
$old = $createGnuCompatibleLines($old);
426+
$new = $createGnuCompatibleLines($new);
427+
428+
$this->getGroupedOpcodesPre($old, $new);
429+
}
430+
431+
/**
432+
* Triggered after getGroupedOpcodesGnu(). May modify the $opcodes.
433+
*
434+
* @param int[][][] $opcodes the opcodes
435+
*/
436+
private function getGroupedOpcodesGnuPost(array &$opcodes): void
437+
{
438+
$this->getGroupedOpcodesPost($opcodes);
298439
}
299440

300441
/**
@@ -334,31 +475,4 @@ private function resetCachedResults(): self
334475

335476
return $this;
336477
}
337-
338-
/**
339-
* Make the lines to be prepared for GNU-style diff.
340-
*
341-
* This method checks whether $lines has no EOL at EOF and append a special
342-
* indicator to the last line.
343-
*
344-
* @param string[] $lines the lines created by simply explode("\n", $string)
345-
*/
346-
private function createGnuCompatibleLines(array $lines): array
347-
{
348-
// note that the $lines should not be empty at this point
349-
// they have at least one element "" in the array because explode("\n", "") === [""]
350-
$lastLineIdx = \count($lines) - 1;
351-
$lastLine = &$lines[$lastLineIdx];
352-
353-
if ($lastLine === '') {
354-
// remove the last plain "" line since we don't need it anymore
355-
unset($lines[$lastLineIdx]);
356-
} else {
357-
// this means the original source has no EOL at EOF
358-
// we append a special indicator to that line so it no longer matches
359-
$lastLine .= self::LINE_NO_EOL;
360-
}
361-
362-
return $lines;
363-
}
364478
}

src/Renderer/Text/Unified.php

+3-3
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,11 @@ protected function renderHunkBlocks(Differ $differ, array $hunk): string
9292
// because of ignoreCase, ignoreWhitespace, etc
9393
if ($op === SequenceMatcher::OP_EQ) {
9494
// we could only pick either the old or the new to show
95-
// here we pick the new one to let the user know what it is now
95+
// note that the GNU diff will use the old one because it creates a patch
9696
$ret .= $this->renderContext(
9797
' ',
98-
$differ->getNew($j1, $j2),
99-
$j2 === $newNoEolAtEofIdx
98+
$differ->getOld($i1, $i2),
99+
$i2 === $oldNoEolAtEofIdx
100100
);
101101

102102
continue;

0 commit comments

Comments
 (0)