-
Notifications
You must be signed in to change notification settings - Fork 24
/
Copy pathbuild-caching
executable file
·292 lines (256 loc) · 8.93 KB
/
build-caching
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
#!/bin/sh
# Nice hack to write in python and have a valid script working everywhere
# vim: syntax=python
if type python >/dev/null 2>&1
then
if python -V 2>&1 | grep "Python 2\.[0-5]" >/dev/null
then
echo "Python to old" 1>&2
[ "$1" = "put" ] && exit 0
exit 9
fi
PYTHON=python
elif type python3 >/dev/null 2>&1
then
PYTHON=python3
else
echo "No python" 1>&2
[ "$1" = "put" ] && exit 0
exit 9
fi
${PYTHON} - "$@" <<'EOF'
#!/usr/bin/python
"""
Rudder cache management system
Usage:
build-caching get <destination> [--force-config] [--with-env] [--from=<file>] [<key>=<value> ...]
build-caching put <source> [--force-config] [--with-env] [--move] [--from=<file>] [<key>=<value> ...]
build-caching show [--force-config] [--with-env] [--from=<file>] [<key>=<value> ...]
build-caching purge [--force-config] [--with-env] [--keep=<count>] [--from=<file>] [<key>=<value> ...]
Options:
--force-config Force the configuration to define explicitly a cache path
to have caching. This allows to hide a command within a
build process and create a potentially big cache directory
only if the user knows and want it.
--from=<file> Allow to pass key=values from a json file in addition to
command line parameters
--with-env Automatically include OS/arch as a discriminating key/value
Synopsys:
This command manages a cache on the filesystem. This cache contains entries
identified by a group of key,value pairs. When creating the entry, all
key,value pairs must be provided (the cache system adds a key called 'path').
When searching for an entry, you can provide any number of key,value pairs.
The search returns an exact match on all key,value pairs provided, if a the
key asked for isn't known in the index, there is no match.
The database is ordered. When purging or showing, the oldest entries are
purged or returned first. When getting an entry, the earliest entry matching
is used.
<source> and <destination> use the same format as rsync. Which means that
ending them with a '/' means you want to synchronize its content whereas
removing it means you want to synchronise the directory itself.
Moreover you can use the syntax ssh:// or rsync:// to synchronise to remote
hosts.
The cache is stored in a configured directory. Put it on the same filesystem
as your builds to benefit from the --move option to reduce IOs. The cache
directory is organized with uuids to void collision and. Moreover, if a <name>
key is provided, it is used as a first level name for readability, if not,
the named used is 'unknown'.
The configuration file is ~/.build-caching and is structured as follow :
# path to cache directory
path=/tmp/cache
Commands:
get
Get an entry from the cache and put it to <destination>.
put
Put an entry into the cache from <source>. Use --move to use mv instead
of rsync. This can avoid costly IOs if you are on the same filesystem
and wanted to delete the directory afterwards.
show
Show all cache entries.
purge
Purge entries from the cache.
"""
from __future__ import print_function
from pprint import pprint
import sys
sys.path.insert(0,"..")
sys.path.insert(0,"../..")
import re
import json
import uuid
import os
import shutil
import hashlib
import platform
# manage non stdlib import
try:
import docopt # apt-get install python-docopt
except:
print("Missing dependency docopt, aborting cache management!", file=sys.stderr)
if len(sys.argv) > 1 and sys.argv[1] == "put":
exit(0)
exit(9)
DB = []
CACHE_PATH = '/srv/cache/build'
DB_FILE = CACHE_PATH + '/index.json'
CONFIG_FILE = "~/.build-caching"
# Indexing methods
# We may do better with a dedicated library (but not sure)
def db_load():
""" Load the index file into a global variable """
global DB
if os.path.isfile(DB_FILE):
with open(DB_FILE) as fd:
DB = json.load(fd)
def db_save():
""" Save the index into a file """
with open(DB_FILE, 'w') as fd:
json.dump(DB, fd)
def db_match(stored_data, query):
""" Match a set or key,value pairs with a stored entry """
for key, value in query.items():
if key not in stored_data:
return False
if stored_data[key] != value:
return False
return True
def db_get_entry(metadata):
for i in range(len(DB)-1,-1,-1):
if db_match(DB[i], metadata):
return DB[i]
return None
# End of indexing methods
def new_cache_path(metadata):
""" Prepare a directory entry in the cache """
my_uuid = uuid.uuid1()
name ='unknown'
if 'name' in metadata:
name = metadata['name']
path = CACHE_PATH + "/" + name + "/" + str(my_uuid) + '/'
os.makedirs(path)
return path
def parse_metadata(arguments):
""" Parse parameters to extract a single medatada object """
metadata = {}
if arguments['--from'] is not None:
filename = arguments['--from']
if not os.path.isfile(filename):
print(filename + " doesn't exist", file=sys.stderr)
exit(1)
with open(filename) as fd:
metadata = json.load(fd)
if arguments['--with-env']:
m = hashlib.md5()
for filename in ['/etc/os-release', '/etc/debian_version', '/etc/redhat-release', '/etc/SuSE-release', '/etc/slackware-version', '/etc/tunables/lastboot']:
if os.path.isfile(filename):
with open(filename, 'rb') as inputfile:
data = inputfile.read()
m.update(data)
metadata['env'] = m.hexdigest()
print("Environment summarised as " + m.hexdigest(), file=sys.stderr)
metadata['arch'] = platform.architecture()[0]
metadata['machine'] = platform.machine()
metadata['platform'] = platform.platform()
print("Architecture: " + metadata['arch'], file=sys.stderr)
print("Machine: " + metadata['machine'], file=sys.stderr)
print("Platform: " + metadata['platform'], file=sys.stderr)
for kv in arguments['<key>=<value>']:
match = re.match(r'(^.*?)=(.*)', kv)
if match:
metadata[match.group(1)] = match.group(2)
else:
print("Can't parse key=value " + kv, file=sys.stderr)
exit(2)
return metadata
def sync(source, destination, move=False):
""" Synchronize a source with a destination, can use move instead of rsync while keeping rsync '/' semantic """
if destination[-1] == '/':
try:
os.makedirs(destination)
except OSError:
pass
try:
if move and source[-1] == '/':
for src in os.listdir(source):
os.rename(src, destination+os.path.basename(src))
elif move and source[-1] != '/':
os.rename(source, destination+'/'+os.path.basename(source))
else:
ret = os.system("rsync -a '" + source + "' '" + destination + "'")
if ret != 0:
return False
except OSError:
return False
return True
def put(metadata, source, move):
""" Put a source in the cache """
entry=db_get_entry(metadata)
if entry is None:
path = new_cache_path(metadata)
else:
path = entry['path']
if not sync(source, path, move):
print("Synchronization error, aborting!", file=sys.stderr)
shutil.rmtree(path)
exit(3)
if entry is None:
metadata['path'] = path
DB.append(metadata)
db_save()
print("Cache saved", file=sys.stderr)
def get(metadata, destination):
""" Retrieve data from cache and put it into destination """
entry=db_get_entry(metadata)
if entry is None:
print("No such entry in the cache", file=sys.stderr)
exit(1)
print("Restoring cache", file=sys.stderr)
if not sync(entry['path'], destination):
print("Synchronization error, aborting!", file=sys.stderr)
exit(3)
def show(metadata):
""" Show cache content """
print(json.dumps([ x for x in DB if db_match(x, metadata) ], indent=2))
def purge(metadata):
""" Purge cache """
global DB
keep_db = []
for data in DB:
if db_match(data, metadata):
shutil.rmtree(data['path'])
else:
keep_db.append(data)
DB = keep_db
db_save()
# Main loop
if __name__ == "__main__":
arguments = docopt.docopt(__doc__)
config_file = os.path.expanduser(CONFIG_FILE)
if arguments['--force-config'] and not os.path.isfile(config_file) or not os.path.isdir(CACHE_PATH):
print("Cache management without CACHE_PATH is disabled!", file=sys.stderr)
if len(sys.argv) > 1 and sys.argv[1] == "put":
exit(0)
else:
exit(9)
if os.path.isfile(config_file):
with open(config_file) as fd:
for line in fd:
match = re.match(r'^path\s*=\s*(.*?)\s*$', line)
if match:
CACHE_PATH = os.path.expanduser(match.group(1))
DB_FILE = CACHE_PATH+'/index.json'
# let's go
db_load()
metadata = parse_metadata(arguments)
if arguments['get']:
get(metadata, arguments['<destination>'])
elif arguments['put']:
put(metadata, arguments['<source>'], arguments['--move'])
elif arguments['show']:
show(metadata)
elif arguments['purge']:
purge(metadata)
else:
print("Unknown arguments")
exit(9)
EOF