Skip to content

Commit

Permalink
Update documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
piotrjurkiewicz committed Sep 13, 2024
1 parent 4eaaec0 commit b7dde39
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 24 deletions.
5 changes: 0 additions & 5 deletions docs/flow_models.elephants.skl.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,33 +5,28 @@ flow\_models.elephants.skl.plot\_classifiers module
---------------------------------------------------

.. automodule:: flow_models.elephants.skl.plot_classifiers
:members:
:show-inheritance:

flow\_models.elephants.skl.plot\_regressors module
--------------------------------------------------

.. automodule:: flow_models.elephants.skl.plot_regressors
:members:
:show-inheritance:

flow\_models.elephants.skl.train\_classifiers module
----------------------------------------------------

.. automodule:: flow_models.elephants.skl.train_classifiers
:members:
:show-inheritance:

flow\_models.elephants.skl.train\_regressors module
---------------------------------------------------

.. automodule:: flow_models.elephants.skl.train_regressors
:members:
:show-inheritance:

flow\_models.elephants.skl.tune module
--------------------------------------

.. automodule:: flow_models.elephants.skl.tune
:members:
:show-inheritance:
2 changes: 1 addition & 1 deletion flow_models/elephants/plot_entropy.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def calculate_entropy(directory):
Returns
-------
{"bytes": [], "bits": []}
dict
entropy for subsequent bytes and bits of (sa, da, sp, dp, prot) fields
"""

Expand Down
10 changes: 8 additions & 2 deletions flow_models/elephants/simulate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,14 @@ def simulate_data(directory, index=Ellipsis, mask=None, pps=None, fps=None, time
Returns
-------
int, int, np.array, np.array
flows_sum, octets_sum, flows_slots, octets_slots
flows_sum : int
sum of flows added to flow table
octets_sum : int
sum of octets transmitted by flows while being in flow table
flows_slots : np.array
number of flows present in flow table in each second
octets_slots : np.array
amount of octets trasmitted by flows in flow table in each second
"""

d = pathlib.Path(directory)
Expand Down
16 changes: 8 additions & 8 deletions flow_models/elephants/skl/train_classifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,17 @@ class Data:

def parser():
p = argparse.ArgumentParser(description=__doc__)
p.add_argument('-O', '--output', default='sklearn', help='output directory')
p.add_argument('-O', '--output', default='sklearn', help='results output directory')
p.add_argument('--seed', type=int, default=None, help='seed')
p.add_argument('--fork', action='store_true', help='')
p.add_argument('--jobs', type=int, default=1, help='')
p.add_argument('files', help='directory')
p.add_argument('--fork', action='store_true', help='fork to subprocess for each simulation')
p.add_argument('--jobs', type=int, default=1, help='maximum number of simultaneous subprocesses')
p.add_argument('directory', help='binary flow records directory')
return p

def main():
app_args = parser().parse_args()
jobs = set()
data = load_arrays(app_args.files)
data = load_arrays(app_args.directory)
results = collections.defaultdict(lambda: collections.defaultdict(list))
decisions_true = collections.defaultdict(list)
decisions_predicted = collections.defaultdict(list)
Expand Down Expand Up @@ -73,8 +73,8 @@ def main():
for n, (train_index, test_index) in enumerate(sklearn.model_selection.KFold(data_par.get('folds', 5)).split(all_octets)):
logmsg(f"Folding {n}")
train_octets, test_octets = all_octets[train_index], all_octets[test_index]
train_flows_sum, train_octets_sum, train_flows_slots, train_octets_slots = simulate_data(app_args.files, index=train_index, mask=None, pps=PPS, fps=FPS, timeout=TIMEOUT)
test_flows_sum, test_octets_sum, test_flows_slots, test_octets_slots = simulate_data(app_args.files, index=test_index, mask=None, pps=PPS, fps=FPS, timeout=TIMEOUT)
train_flows_sum, train_octets_sum, train_flows_slots, train_octets_slots = simulate_data(app_args.directory, index=train_index, mask=None, pps=PPS, fps=FPS, timeout=TIMEOUT)
test_flows_sum, test_octets_sum, test_flows_slots, test_octets_slots = simulate_data(app_args.directory, index=test_index, mask=None, pps=PPS, fps=FPS, timeout=TIMEOUT)
for prep_par in prep_params:
logmsg(f"Preparing {prep_par} {n}")
prepared_inp = prepare_input(all_inp, **prep_par)
Expand Down Expand Up @@ -136,7 +136,7 @@ def main():
decision_predicted = clf.predict(inp)
decisions_true[f'{name} ({mode})'].append(np.packbits(decision_true))
decisions_predicted[f'{name} ({mode})'].append(np.packbits(decision_predicted))
this_flows_sum, this_octets_sum, this_flows_slots, this_octets_slots = simulate_data(app_args.files, index=index, mask=decision_predicted, pps=PPS, fps=FPS, timeout=TIMEOUT)
this_flows_sum, this_octets_sum, this_flows_slots, this_octets_slots = simulate_data(app_args.directory, index=index, mask=decision_predicted, pps=PPS, fps=FPS, timeout=TIMEOUT)
c = itertools.count()
results[f'{name} ({mode})'][next(c)].append(training_coverage)
results[f'{name} ({mode})'][next(c)].append(octets[decision_predicted].sum() / octets.sum())
Expand Down
16 changes: 8 additions & 8 deletions flow_models/elephants/skl/train_regressors.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,17 @@ class Data:

def parser():
p = argparse.ArgumentParser(description=__doc__)
p.add_argument('-O', '--output', default='sklearn', help='output directory')
p.add_argument('-O', '--output', default='sklearn', help='results output directory')
p.add_argument('--seed', type=int, default=None, help='seed')
p.add_argument('--fork', action='store_true', help='')
p.add_argument('--jobs', type=int, default=1, help='')
p.add_argument('files', help='directory')
p.add_argument('--fork', action='store_true', help='fork to subprocess for each simulation')
p.add_argument('--jobs', type=int, default=1, help='maximum number of simultaneous subprocesses')
p.add_argument('directory', help='binary flow records directory')
return p

def main():
app_args = parser().parse_args()
jobs = set()
data = load_arrays(app_args.files)
data = load_arrays(app_args.directory)
results = collections.defaultdict(lambda: collections.defaultdict(list))
predictions = {}
decisions_true = collections.defaultdict(list)
Expand Down Expand Up @@ -74,8 +74,8 @@ def main():
for n, (train_index, test_index) in enumerate(sklearn.model_selection.KFold(data_par.get('folds', 5)).split(all_octets)):
logmsg(f"Folding {n}")
train_octets, test_octets = all_octets[train_index], all_octets[test_index]
train_flows_sum, train_octets_sum, train_flows_slots, train_octets_slots = simulate_data(app_args.files, index=train_index, mask=None, pps=PPS, fps=FPS, timeout=TIMEOUT)
test_flows_sum, test_octets_sum, test_flows_slots, test_octets_slots = simulate_data(app_args.files, index=test_index, mask=None, pps=PPS, fps=FPS, timeout=TIMEOUT)
train_flows_sum, train_octets_sum, train_flows_slots, train_octets_slots = simulate_data(app_args.directory, index=train_index, mask=None, pps=PPS, fps=FPS, timeout=TIMEOUT)
test_flows_sum, test_octets_sum, test_flows_slots, test_octets_slots = simulate_data(app_args.directory, index=test_index, mask=None, pps=PPS, fps=FPS, timeout=TIMEOUT)
for prep_par in prep_params:
logmsg(f"Preparing {prep_par} {n}")
prepared_inp = prepare_input(all_inp, **prep_par)
Expand Down Expand Up @@ -136,7 +136,7 @@ def main():
thresholds = np.logspace(0, 24, 64, base=2) * 64
for threshold in thresholds:
decision_predicted = octets_predicted > threshold
sim_results.append(pool.apply_async(simulate_data, (app_args.files, index, decision_predicted, PPS, FPS, TIMEOUT)))
sim_results.append(pool.apply_async(simulate_data, (app_args.directory, index, decision_predicted, PPS, FPS, TIMEOUT)))
for i, threshold in enumerate(thresholds):
logmsg(f"Evaluating {name} mode: {mode} threshold: {threshold}")
decision_true = octets > threshold
Expand Down

0 comments on commit b7dde39

Please sign in to comment.