From be561d1a2d82ba77dc61e1d519577cb920c2d837 Mon Sep 17 00:00:00 2001 From: Tom Rochette Date: Thu, 15 Jan 2015 23:21:50 -0500 Subject: [PATCH] Added a source filter to pre-process files to eliminate unnecessary scanning of exact replica of files. A SourceFilter class was added which goal is to compare two source trees and filter out any duplicate source code files. It does so by hashing each file content and comparing the before and after hash sets. The intersection set is removed from the before and after list of files since there is no point to scan files with the exact same source code. The benefits of this a important both in term of cpu usage and memory usage since we do not parse code that will in the end reveal itself not to have changed. For example, here are the before and after of this change, comparing Symfony 2.5.9 with 2.6.3. Before [Symfony 2.5.9] 2936 php files in src [Symfony 2.6.3] 3078 php files in src 2564 files are exactly the same After [Symfony 2.5.9] 367 scanned and parsed (12.5% of the original amount) [Symfony 2.6.3] 509 scanned and parsed (16.5% of the original amount) But most important of all, it can compare the whole source trees in a reasonable amount of time, something it couldn't before. --- .../Console/Command/CompareCommand.php | 10 +++++++ src/PHPSemVerChecker/Filter/SourceFilter.php | 28 +++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 src/PHPSemVerChecker/Filter/SourceFilter.php diff --git a/src/PHPSemVerChecker/Console/Command/CompareCommand.php b/src/PHPSemVerChecker/Console/Command/CompareCommand.php index b67e779..dc36016 100644 --- a/src/PHPSemVerChecker/Console/Command/CompareCommand.php +++ b/src/PHPSemVerChecker/Console/Command/CompareCommand.php @@ -4,6 +4,7 @@ use File_Iterator_Facade; use PHPSemVerChecker\Analyzer\Analyzer; +use PHPSemVerChecker\Filter\SourceFilter; use PHPSemVerChecker\Reporter\Reporter; use PHPSemVerChecker\Scanner\Scanner; use Symfony\Component\Console\Command\Command; @@ -43,6 +44,14 @@ protected function execute(InputInterface $input, OutputInterface $output) $progress = new ProgressBar($output, count($sourceBefore) + count($sourceAfter)); $progress->setFormat("%message%\n%current%/%max% [%bar%] %percent:3s%% %elapsed:6s%/%estimated:-6s% %memory:6s%"); + $progress->setMessage('Pre-processing before/after files'); + $progress->start(); + + $sourceFilter = new SourceFilter(); + $identicalCount = $sourceFilter->filter($sourceBefore, $sourceAfter); + + $progress->start(count($sourceBefore) + count($sourceAfter)); + $progress->setMessage('Scanning before files'); foreach ($sourceBefore as $file) { $scannerBefore->scan($file); @@ -68,6 +77,7 @@ protected function execute(InputInterface $input, OutputInterface $output) $duration = microtime(true) - $startTime; $output->writeln(''); + $output->writeln('[Scanned files] Before: ' . count($sourceBefore) . ', After: ' . count($sourceAfter) . ', Identical: ' . $identicalCount); $output->writeln('Time: ' . round($duration, 3) . ' seconds, Memory: ' . round(memory_get_peak_usage() / 1024 / 1024, 3) . ' MB'); } } diff --git a/src/PHPSemVerChecker/Filter/SourceFilter.php b/src/PHPSemVerChecker/Filter/SourceFilter.php new file mode 100644 index 0000000..640e2eb --- /dev/null +++ b/src/PHPSemVerChecker/Filter/SourceFilter.php @@ -0,0 +1,28 @@ +