|
| 1 | +import os |
| 2 | +import math |
| 3 | + |
| 4 | +# change number of clients |
| 5 | +NUMBER_OF_CLIENTS = 10 |
| 6 | +TOTAL_LINES = 112020366 |
| 7 | +LINES_PER_PARTITION = math.ceil(TOTAL_LINES/NUMBER_OF_CLIENTS) |
| 8 | + |
| 9 | +input_file = "anon-archive-fs1.txt" |
| 10 | +input_log = open(input_file, 'r') |
| 11 | +print ("reading lanl log") |
| 12 | +content = input_log.readlines() |
| 13 | +print ("complete reading lanl log") |
| 14 | + |
| 15 | + |
| 16 | + |
| 17 | +# todo: make this into one for loop for the number of clients. |
| 18 | + |
| 19 | +lanl_log_client1 = "dataset_client1.txt" |
| 20 | +lanl_log_client2 = "lanl_log_client2.txt" |
| 21 | +lanl_log_client3 = "/client/faultyrank/multiple_clients/lanl_log_client3.txt" |
| 22 | +lanl_log_client4 = "/client/faultyrank/multiple_clients/lanl_log_client4.txt" |
| 23 | +lanl_log_client5 = "/client/faultyrank/multiple_clients/lanl_log_client5.txt" |
| 24 | +lanl_log_client6 = "/client/faultyrank/multiple_clients/lanl_log_client6.txt" |
| 25 | +lanl_log_client7 = "/client/faultyrank/multiple_clients/lanl_log_client7.txt" |
| 26 | +lanl_log_client8 = "/client/faultyrank/multiple_clients/lanl_log_client8.txt" |
| 27 | +lanl_log_client9 = "/client/faultyrank/multiple_clients/lanl_log_client9.txt" |
| 28 | +lanl_log_client10 = "/client/faultyrank/multiple_clients/lanl_log_client10.txt" |
| 29 | + |
| 30 | +output_1 = open(lanl_log_client1, 'w') |
| 31 | +output_2 = open(lanl_log_client2, 'w') |
| 32 | +output_3 = open(lanl_log_client3, 'w') |
| 33 | +output_4 = open(lanl_log_client4, 'w') |
| 34 | +output_5 = open(lanl_log_client5, 'w') |
| 35 | +output_6 = open(lanl_log_client6, 'w') |
| 36 | +output_7 = open(lanl_log_client7, 'w') |
| 37 | +output_8 = open(lanl_log_client8, 'w') |
| 38 | +output_9 = open(lanl_log_client9, 'w') |
| 39 | +output_10 = open(lanl_log_client10, 'w') |
| 40 | + |
| 41 | +# first portion of lanl logs for client 1 |
| 42 | +print("writing first portion") |
| 43 | +start1 = 0 |
| 44 | +end1 = int(LINES_PER_PARTITION) |
| 45 | +portion_1 = ''.join(content[start1:end1]) |
| 46 | +output_1.write(portion_1) |
| 47 | +print("completed first portion") |
| 48 | + |
| 49 | +# second portion of lanl logs for client 2 |
| 50 | +print("writing second portion") |
| 51 | +start2 = int(end1) |
| 52 | +end2 = int(start2 + LINES_PER_PARTITION) |
| 53 | +portion_2 = ''.join(content[start2:end2]) |
| 54 | +output_2.write(portion_2) |
| 55 | +print("completed second portion") |
| 56 | + |
| 57 | +# third portion of lanl logs for client 3 |
| 58 | +print("writing third portion") |
| 59 | +start3 = int(end2) |
| 60 | +end3 = int(start3 + LINES_PER_PARTITION) |
| 61 | +portion_3 = ''.join(content[start3:end3]) |
| 62 | +output_3.write(portion_3) |
| 63 | +print("completed third portion") |
| 64 | + |
| 65 | +# fourth portion of lanl logs for client 4 |
| 66 | +print("writing fourth portion") |
| 67 | +start4 = int(end3) |
| 68 | +end4 = int(start4 + LINES_PER_PARTITION) |
| 69 | +portion_4 = ''.join(content[start4:end4]) |
| 70 | +output_4.write(portion_4) |
| 71 | +print("completed fourth portion") |
| 72 | + |
| 73 | +# fifth portion of lanl logs for client 5 |
| 74 | +print("writing fifth portion") |
| 75 | +start5 = int(end4) |
| 76 | +end5 = int(start5 + LINES_PER_PARTITION) |
| 77 | +portion_5 = ''.join(content[start5:end5]) |
| 78 | +output_5.write(portion_5) |
| 79 | +print("completed fifth portion") |
| 80 | + |
| 81 | +# sixth portion of lanl logs for client 6 |
| 82 | +print("writing sixth portion") |
| 83 | +start6 = int(end5) |
| 84 | +end6 = int(start6 + LINES_PER_PARTITION) |
| 85 | +portion_6 = ''.join(content[start6:end6]) |
| 86 | +output_6.write(portion_6) |
| 87 | +print("completed sixth portion") |
| 88 | + |
| 89 | +# seventh portion of lanl logs for client 7 |
| 90 | +print("writing seventh portion") |
| 91 | +start7 = int(end6) |
| 92 | +end7 = int(start7 + LINES_PER_PARTITION) |
| 93 | +portion_7 = ''.join(content[start7:end7]) |
| 94 | +output_7.write(portion_7) |
| 95 | +print("completed seventh portion") |
| 96 | + |
| 97 | +# eight portion of lanl logs for client 8 |
| 98 | +print("writing eight portion") |
| 99 | +start8 = int(end7) |
| 100 | +end8 = int(start8 + LINES_PER_PARTITION) |
| 101 | +portion_8 = ''.join(content[start8:end8]) |
| 102 | +output_8.write(portion_8) |
| 103 | +print("completed eight portion") |
| 104 | + |
| 105 | +# ninth portion of lanl logs for client 9 |
| 106 | +print("writing ninth portion") |
| 107 | +start9 = int(end8) |
| 108 | +end9 = int(start9 + LINES_PER_PARTITION) |
| 109 | +portion_9 = ''.join(content[start9:end9]) |
| 110 | +output_9.write(portion_9) |
| 111 | +print("completed ninth portion") |
| 112 | + |
| 113 | +# tenth portion of lanl logs for client 10 |
| 114 | +print("writing tenth portion") |
| 115 | +start10 = int(end9) |
| 116 | +end10 = int(start10 + LINES_PER_PARTITION) |
| 117 | +portion_10 = ''.join(content[start10:end10]) |
| 118 | +output_10.write(portion_10) |
| 119 | +print("completed tenth portion") |
| 120 | + |
| 121 | + |
| 122 | + |
0 commit comments