-
Notifications
You must be signed in to change notification settings - Fork 2
/
backup_all.py
executable file
·88 lines (76 loc) · 3.17 KB
/
backup_all.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env python
import os, sys, requests, json, types, argparse, bz2, shutil
def backup(url, backup_root):
"""Recurse over all indices at the ElasticSearch URL and backup indices."""
# save backup root
if os.path.isdir(backup_root):
saved = backup_root + '.bak'
if os.path.isdir(saved): shutil.rmtree(saved)
shutil.move(backup_root, saved)
os.makedirs(backup_root)
# get all indices
r = requests.get('%s/_aliases' % url, verify=False)
r.raise_for_status()
res = r.json()
#print(json.dumps(res, indent=2))
indices = sorted(res)
# loop over each index and save settings, mapping, and docs
for idx in indices:
if idx == 'geonames': continue
print("Backup up %s..." % idx)
d = os.path.join(backup_root, idx)
if not os.path.isdir(d): os.makedirs(d)
# save settings
r = requests.get('%s/%s/_settings' % (url, idx), verify=False)
r.raise_for_status()
settings = r.json()
s = os.path.join(d, '%s.settings' % idx)
with open(s, 'w') as f:
json.dump(settings, f, indent=2, sort_keys=True)
print("Backed up settings for %s" % idx)
# save mapping
r = requests.get('%s/%s/_mapping' % (url, idx), verify=False)
r.raise_for_status()
mapping = r.json()
m = os.path.join(d, '%s.mapping' % idx)
with open(m, 'w') as f:
json.dump(mapping, f, indent=2, sort_keys=True)
print("Backed up mapping for %s" % idx)
# save docs
query = {
"fields": "_source",
"query": {
"match_all": {}
}
}
txt = os.path.join(d, '%s.docs' % idx)
r = requests.post('%s/%s/_search?search_type=scan&scroll=120m&size=100' % (url, idx), data=json.dumps(query), verify=False)
scan_result = r.json()
count = scan_result['hits']['total']
scroll_id = scan_result['_scroll_id']
docs = []
with open(txt, 'w') as f:
while True:
r = requests.post('%s/_search/scroll?scroll=120m' % url, data=scroll_id, verify=False)
if r.status_code != 200:
print("Got status code: {}\n{}".format(r.status_code, r.content))
r.raise_for_status()
res = r.json()
scroll_id = res.get('_scroll_id', None)
if scroll_id is None or len(res['hits']['hits']) == 0: break
for hit in res['hits']['hits']:
f.write("%s\n" % json.dumps(hit['_source']))
#b = os.path.join(d, '%s.docs.bz2' % idx)
#with bz2.BZ2File(b, 'w') as f:
# for doc in docs:
# f.write("%s\n" % json.dumps(doc))
print("Backed up docs for %s" % idx)
def main():
parser = argparse.ArgumentParser(description="Backup all ElasticSearch indexes.")
parser.add_argument('--url', dest='url', default='http://localhost:9200',
help="ElasticSearch URL to backup")
parser.add_argument('directory', help="backup directory location")
args = parser.parse_args()
backup(args.url, args.directory)
if __name__ == "__main__":
main()