forked from starryalley/public-misc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dir_checksum.sh
executable file
·185 lines (148 loc) · 4.96 KB
/
dir_checksum.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#!/bin/bash
#
# Directory checksum script
#
# It computes md5sum of all files in one directory
# recursively and saved to a checksum file under it.
# Second execution on that directory will compare
# last checksum with current one and report missed/
# added/modified files.
#
# =======================================
#
# Author: Mark Kuo ([email protected])
# Date: 2013.3.13
#
#
# Original Requirements:
#
# 可以把所有的hash data都放在一個文字檔案裡面
# 在檔案名稱永遠相同的前提之下
# 程式提式的訊息基本上只需要
# 1 match
# 2 modified
# 3 disappeared
#
#
# Dependent commands:
# md5sum, diff, comm, xargs, find, sort
# grep, egrep, echo, sed, rm, mv, wc, pv
#
# Notes:
# only tested under Ubuntu 12.04
#
# GLOBAL CONFIG
# ==============
# checksum filename (old checksum will have same filename with .old suffix)
CHECKSUM_NAME=".dir_checksum"
# number of parallel process (will be updated to core count + 1 when script runs)
PARALLEL_COUNT=2
# === get core count ===
function core_count()
{
echo `grep -c ^processor /proc/cpuinfo`
}
# === create checksum ===
# $1 target dir
# $2 target checksum filename
function create_checksum()
{
local path=$1
local checksum=$2
echo "Count files..."
local count=`find -L $path ! -name $CHECKSUM_NAME ! -name $CHECKSUM_NAME.old \
-type f | wc -l`
echo "$count files found"
echo "Computing checksum..."
# the long pipeline of 'find | xargs md5sum | pv | sort'
find -L $path ! -name $CHECKSUM_NAME ! -name $CHECKSUM_NAME.old \
-type f -print0 | #find every file under $path (follow symbolic links)
xargs -0 -n 1 -P $PARALLEL_COUNT md5sum | # parallel create md5sum
#xargs -0 -n 1 -P $PARALLEL_COUNT sh -c 'md5sum $1' sh | # with another shell command example
pv -cN MD5SUM --line-mode -s $count | #showing nice progress bar using pv
sort --parallel=$PARALLEL_COUNT -k 2 | #should sort or diff will fail badly
#pv -cN SORT --line-mode -s $count | #showing nice progress bar using pv
sed '' > $checksum #save to checksume file only
#tee $checksum #save to checksume file and output to screen
echo "Done. Checksum file written to $checksum"
}
# === compare checksum ===
# $1 target dir
# $2 old checksum file
# $3 new checksum file
function compare_checksum()
{
# diff filename
local DIFF_NAME="${CHECKSUM_NAME}.diff"
local path=$1
local old=$2
local new=$3
#echo "comparing $old and $new..."
diff --suppress-common-lines --unified=0 $old $new | #diff
egrep -v "\-\-\-|\+\+\+|\@\@" | #remove other info
sed '' > $path/$DIFF_NAME
# example output here:
# -0dea76f1d4581b591409bffe8fe6f722 ../tmp/test_enum/main.c
# +330a71bf82c38415860d19490cec2648 ../tmp/test_enum/main.c
# -d41d8cd98f00b204e9800998ecf8427e ../tmp/test_enum/test1
# +d41d8cd98f00b204e9800998ecf8427e ../tmp/test_enum/test3
# grep - and + respectively into 2 sets (miss and new)
grep ^- $path/$DIFF_NAME | cut -d' ' -f3 | sort > $path/$DIFF_NAME.miss
grep ^+ $path/$DIFF_NAME | cut -d' ' -f3 | sort > $path/$DIFF_NAME.new
echo "=== Report ==="
echo "Modified:" # the intersection
comm -12 $path/$DIFF_NAME.miss $path/$DIFF_NAME.new | sed '/^$/d'
echo "--------------"
echo "Missed:" #in miss but not in new
comm -2 $path/$DIFF_NAME.miss $path/$DIFF_NAME.new | cut -f 1 | sed '/^$/d'
echo "--------------"
echo "Added:" #in new but not in miss
comm -2 $path/$DIFF_NAME.new $path/$DIFF_NAME.miss | cut -f 1 | sed '/^$/d'
echo "--------------"
# clean up tmp files
rm $path/$DIFF_NAME*
}
# === usage ===
function usage()
{
local E_BADARGS=65
echo "Usage: $0 [directory]"
echo " directory: the directory to check (default: current directory)"
exit $E_BADARGS
}
# === main ===
# check arguments
if [ $# -gt 1 ]; then
echo "Wrong arguments"
usage
fi
# default: current working directory
dir=${1:-`pwd`}
if [ ! -e $dir ]; then
echo "$1 doesn't exist or is not a directory. Exiting.."
exit 1
fi
echo "Target directory: $dir"
# set parallel count
PARALLEL_COUNT=$(($(core_count) + 1)) #core+1
echo "Parallel process: $PARALLEL_COUNT"
# check if checksum already exist
checksum_path="$dir/$CHECKSUM_NAME"
if [ -e $checksum_path ]; then
echo "Old checksum exists. Renamed: $checksum_path.old"
mv $checksum_path $checksum_path.old
fi
# create_checksum
create_checksum $dir $checksum_path
# see if we need to compare
if [ -e $checksum_path.old ]; then
compare_checksum $dir $checksum_path.old $checksum_path
# keep old copy for reference?
#rm $checksum_path.old
fi
# progress example: (may work only in linux)
#echo -ne '##### (33%)\r'; sleep 1
#echo -ne '############# (66%)\r'; sleep 1
#echo -ne '####################### (100%)\r'
#echo -ne '\n'
exit