Skip to content

Commit 1305d07

Browse files
committed
Adding files for data/ and client/
1 parent 5653739 commit 1305d07

File tree

6 files changed

+271
-18
lines changed

6 files changed

+271
-18
lines changed

client/README.md

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Filesystem Aging
2+
3+
# Quick Overview
4+
5+
* `partition_dataset.py`: the LANL dataset is divided into partitions of equal size, each corresponding to a client.
6+
* `fsaging.py`: uses the partitioned logs to age the Lustre filesystem.
7+
8+
9+
# Personalize Our Code for Your Environment
10+
11+
- Configure the number of client nodes according to your Lustre Cluster in `partition_dataset.py`
12+
13+
``` diff
14+
$ vi partition_dataset.py
15+
16+
# change number of clients
17+
NUMBER_OF_CLIENTS = Number_of_clients
18+
```
19+
20+
- Configure the Lustre mount point in `fsaging.py`
21+
22+
``` diff
23+
$ vi fsaging.py
24+
25+
# Lustre filesystem mount point
26+
lustre_mount_point = "path_to_mountpoint"
27+
```
28+
29+
- Configure the number of OST nodes according to your Lustre Cluster in `fsaging.py`
30+
31+
``` diff
32+
$ vi fsaging.py
33+
34+
# Configure according to the number of OSTs in Lustre Cluster
35+
SCALE_SIZE = number_of_OSTs
36+
```
37+
38+
39+
40+
41+
42+
43+
44+

client/fsaging.py

+103
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import os
2+
import math
3+
from timeit import default_timer as timer
4+
5+
# todo: make this an argument input
6+
# Path to the input file
7+
file_size_map_file = "FaultyRank/client/lanl_log_client.txt"
8+
9+
# Lustre filesystem mount point
10+
lustre_mount_point = "/mnt/skfs"
11+
12+
# Configure according to the number of OSTs in Lustre Cluster
13+
SCALE_SIZE = 8
14+
15+
# Calculate the stripe size
16+
STRIPE_SIZE_PER_OST = 65536 # 64 KB
17+
STRIP_SIZE = SCALE_SIZE * STRIPE_SIZE_PER_OST
18+
#STRIP_SIZE = 524288 # (0.5 MB) => 8 OST * 64 KB Stripe Size per OST
19+
MAX_FILE_SIZE = 4194304 # 4 MB
20+
21+
LOG_PROGRESS_INTERVAL = 10000
22+
FILE_PROGRESS_INTERVAL = 10000
23+
ONE_MILLION = 1000000.0
24+
25+
num_dirs = 0
26+
num_empty_dirs = 0
27+
num_empty_files = 0
28+
total_space = 0
29+
num_lines_processed = 0
30+
num_lines_processed_prev = 0 # this line needs to be changed
31+
MAX_LINES_TO_READ = 30000
32+
file_created = 0
33+
34+
input_log = open(file_size_map_file, 'r')
35+
36+
start = timer()
37+
for line in input_log:
38+
if num_lines_processed == MAX_LINES_TO_READ:
39+
break
40+
41+
if num_lines_processed <= num_lines_processed_prev:
42+
num_lines_processed += 1
43+
continue
44+
45+
# if file_created > MAX_FILES_TO_CREATE:
46+
# break
47+
48+
data = line.split("|")
49+
50+
permission = data[1]
51+
file_size = int(data[4])
52+
53+
is_dir = permission[0]
54+
if is_dir == 'd':
55+
num_dirs += 1
56+
if file_size == 0:
57+
num_empty_dirs += 1
58+
59+
else:
60+
if file_size == 0:
61+
num_empty_files += 1
62+
continue
63+
64+
lanl_path = data[8].strip()
65+
log_path = lanl_path[1:]
66+
parsed_log_path = log_path.rsplit('/', 1)[0]
67+
dir_path = os.path.join(lustre_mount_point, parsed_log_path)
68+
69+
if not os.path.exists(dir_path):
70+
os.makedirs(dir_path)
71+
# print ("dir path donot exist\n")
72+
73+
file_path = os.path.join(lustre_mount_point, log_path)
74+
# print(file_path)
75+
# print("dir_path: {}, file_path: {}".format(dir_path, file_path))
76+
77+
if file_size > STRIP_SIZE:
78+
if file_size > MAX_FILE_SIZE:
79+
file_size = MAX_FILE_SIZE # file size larger than 4MB is scaled down to 4MB
80+
file_size = int(math.ceil(file_size / SCALE_SIZE))
81+
82+
if file_size == 0:
83+
file_size = 128
84+
85+
total_space += file_size
86+
command = "dd if=/dev/zero of=" + file_path + " bs=" + str(file_size) + " count=1 status=none"
87+
os.system(command)
88+
file_created += 1
89+
90+
num_lines_processed += 1
91+
if (file_created % FILE_PROGRESS_INTERVAL) == 0:
92+
print("created {}M files from the log.".format(float(file_created / ONE_MILLION)))
93+
94+
end = timer()
95+
print("total time to finish the processing: {} sec.".format(end - start))
96+
print("number of directory entries in the log: {}".format(num_dirs))
97+
print("number of empty directory entries in the log: {}".format(num_empty_dirs))
98+
print("number of empty files in the log: {}".format(num_empty_files))
99+
print("previously line courser was in: {}".format(num_lines_processed_prev))
100+
print("currently line courser is in: {}".format(num_lines_processed))
101+
print("number of line processed in this run: {}".format(num_lines_processed - num_lines_processed_prev))
102+
print("number of files created in this run: {}".format(file_created))
103+
print("total space required: {}".format(total_space))

client/partition_dataset.py

+122
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
import os
2+
import math
3+
4+
# change number of clients
5+
NUMBER_OF_CLIENTS = 10
6+
TOTAL_LINES = 112020366
7+
LINES_PER_PARTITION = math.ceil(TOTAL_LINES/NUMBER_OF_CLIENTS)
8+
9+
input_file = "anon-archive-fs1.txt"
10+
input_log = open(input_file, 'r')
11+
print ("reading lanl log")
12+
content = input_log.readlines()
13+
print ("complete reading lanl log")
14+
15+
16+
17+
# todo: make this into one for loop for the number of clients.
18+
19+
lanl_log_client1 = "dataset_client1.txt"
20+
lanl_log_client2 = "lanl_log_client2.txt"
21+
lanl_log_client3 = "/client/faultyrank/multiple_clients/lanl_log_client3.txt"
22+
lanl_log_client4 = "/client/faultyrank/multiple_clients/lanl_log_client4.txt"
23+
lanl_log_client5 = "/client/faultyrank/multiple_clients/lanl_log_client5.txt"
24+
lanl_log_client6 = "/client/faultyrank/multiple_clients/lanl_log_client6.txt"
25+
lanl_log_client7 = "/client/faultyrank/multiple_clients/lanl_log_client7.txt"
26+
lanl_log_client8 = "/client/faultyrank/multiple_clients/lanl_log_client8.txt"
27+
lanl_log_client9 = "/client/faultyrank/multiple_clients/lanl_log_client9.txt"
28+
lanl_log_client10 = "/client/faultyrank/multiple_clients/lanl_log_client10.txt"
29+
30+
output_1 = open(lanl_log_client1, 'w')
31+
output_2 = open(lanl_log_client2, 'w')
32+
output_3 = open(lanl_log_client3, 'w')
33+
output_4 = open(lanl_log_client4, 'w')
34+
output_5 = open(lanl_log_client5, 'w')
35+
output_6 = open(lanl_log_client6, 'w')
36+
output_7 = open(lanl_log_client7, 'w')
37+
output_8 = open(lanl_log_client8, 'w')
38+
output_9 = open(lanl_log_client9, 'w')
39+
output_10 = open(lanl_log_client10, 'w')
40+
41+
# first portion of lanl logs for client 1
42+
print("writing first portion")
43+
start1 = 0
44+
end1 = int(LINES_PER_PARTITION)
45+
portion_1 = ''.join(content[start1:end1])
46+
output_1.write(portion_1)
47+
print("completed first portion")
48+
49+
# second portion of lanl logs for client 2
50+
print("writing second portion")
51+
start2 = int(end1)
52+
end2 = int(start2 + LINES_PER_PARTITION)
53+
portion_2 = ''.join(content[start2:end2])
54+
output_2.write(portion_2)
55+
print("completed second portion")
56+
57+
# third portion of lanl logs for client 3
58+
print("writing third portion")
59+
start3 = int(end2)
60+
end3 = int(start3 + LINES_PER_PARTITION)
61+
portion_3 = ''.join(content[start3:end3])
62+
output_3.write(portion_3)
63+
print("completed third portion")
64+
65+
# fourth portion of lanl logs for client 4
66+
print("writing fourth portion")
67+
start4 = int(end3)
68+
end4 = int(start4 + LINES_PER_PARTITION)
69+
portion_4 = ''.join(content[start4:end4])
70+
output_4.write(portion_4)
71+
print("completed fourth portion")
72+
73+
# fifth portion of lanl logs for client 5
74+
print("writing fifth portion")
75+
start5 = int(end4)
76+
end5 = int(start5 + LINES_PER_PARTITION)
77+
portion_5 = ''.join(content[start5:end5])
78+
output_5.write(portion_5)
79+
print("completed fifth portion")
80+
81+
# sixth portion of lanl logs for client 6
82+
print("writing sixth portion")
83+
start6 = int(end5)
84+
end6 = int(start6 + LINES_PER_PARTITION)
85+
portion_6 = ''.join(content[start6:end6])
86+
output_6.write(portion_6)
87+
print("completed sixth portion")
88+
89+
# seventh portion of lanl logs for client 7
90+
print("writing seventh portion")
91+
start7 = int(end6)
92+
end7 = int(start7 + LINES_PER_PARTITION)
93+
portion_7 = ''.join(content[start7:end7])
94+
output_7.write(portion_7)
95+
print("completed seventh portion")
96+
97+
# eight portion of lanl logs for client 8
98+
print("writing eight portion")
99+
start8 = int(end7)
100+
end8 = int(start8 + LINES_PER_PARTITION)
101+
portion_8 = ''.join(content[start8:end8])
102+
output_8.write(portion_8)
103+
print("completed eight portion")
104+
105+
# ninth portion of lanl logs for client 9
106+
print("writing ninth portion")
107+
start9 = int(end8)
108+
end9 = int(start9 + LINES_PER_PARTITION)
109+
portion_9 = ''.join(content[start9:end9])
110+
output_9.write(portion_9)
111+
print("completed ninth portion")
112+
113+
# tenth portion of lanl logs for client 10
114+
print("writing tenth portion")
115+
start10 = int(end9)
116+
end10 = int(start10 + LINES_PER_PARTITION)
117+
portion_10 = ''.join(content[start10:end10])
118+
output_10.write(portion_10)
119+
print("completed tenth portion")
120+
121+
122+

data/README.md

-16
This file was deleted.

data/test_graph.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1+
0 1
12
0 2
2-
1 0
3+
1 2
34
2 0
45
2 3
56
3 2

data/test_unfilled.txt

-1
This file was deleted.

0 commit comments

Comments
 (0)