-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstage2.pl
51 lines (42 loc) · 1.17 KB
/
stage2.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/perl
# Takes output from stage1, computes sha256 sums for files where two files have
# the same size and no sha256 sum has been calculated yet
use strict;
use warnings;
use DBI;
use File::Spec;
my $dbconf = "dbi:SQLite:dbname=/var/tmp/files.db";
my $dbh;
$dbh = DBI->connect($dbconf,"","") or die $dbh->errstr;
# files with duplicate size
my $possible_dupes = $dbh->prepare(<<SQL);
SELECT basename,dirname,f.size,sha256
FROM files f
INNER JOIN
( SELECT COUNT(*) count, size
FROM files g
GROUP BY g.size
HAVING COUNT(*) > 1 ) sel
ON f.size = sel.size
ORDER BY sel.count DESC
SQL
my $update_hash = $dbh->prepare(<<SQL);
UPDATE files
SET sha256 = ?
WHERE dirname = ? AND basename = ?
SQL
sub sha256file {
my $f = shift;
open my $fd, "-|", 'sha256sum', '-b', '--', $f;
my $sha = <$fd>;
print "$sha\n";
return (split /[* ]/, $sha)[0];
}
$possible_dupes->execute;
while (my @row = $possible_dupes->fetchrow_array) {
my ($basename, $dirname, $size, $sha) = @row;
next if defined $sha && $sha ne ''; # don't do work twice
my $file = File::Spec->catfile($dirname, $basename);
$sha = sha256file $file;
$update_hash->execute($sha, $dirname, $basename);
}