Skip to content

Commit

Permalink
Merge pull request #46 from cisagov/feature/excel-updates
Browse files Browse the repository at this point in the history
feature: excel updates
  • Loading branch information
Dbones202 authored Aug 28, 2023
2 parents 427b3a5 + 773018d commit bc7013c
Show file tree
Hide file tree
Showing 6 changed files with 163 additions and 133 deletions.
85 changes: 74 additions & 11 deletions src/navv/bll.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import json
import os
from ipaddress import IPv4Address, IPv6Address
import pandas as pd

from navv.zeek import perform_zeekcut
from navv.utilities import get_mac_vendor
from navv.utilities import get_mac_vendor, timeit
from navv.validators import is_ipv4_address, is_ipv6_address


MAC_VENDORS_JSON_FILE = os.path.abspath(__file__ + "/../" + "data/mac-vendors.json")


def get_zeek_data(zeek_logs):
"""Return a list of Zeek conn.log data."""
return (
Expand All @@ -27,16 +30,31 @@ def get_zeek_data(zeek_logs):
)


def get_zeek_df(zeek_data: list):
"""Return a pandas dataframe of the conn.log data."""
def get_zeek_df(zeek_data: list, dns_data: dict):
"""Return a pandas dataframe of the conn.log data with its dns data."""
zeek_data = [row.split("\t") for row in zeek_data]
# Insert dns data to zeek data
for row in zeek_data:
row.insert(1, dns_data.get(row[0], ""))
row.insert(3, dns_data.get(row[2], ""))

return pd.DataFrame(
zeek_data,
columns=["src_ip", "dst_ip", "port", "proto", "conn", "src_mac", "dst_mac"],
columns=[
"src_ip",
"src_hostname",
"dst_ip",
"dst_hostname",
"port",
"proto",
"conn",
"src_mac",
"dst_mac",
],
)


@timeit
def get_inventory_report_df(zeek_df: pd.DataFrame):
"""Return a pandas dataframe of the inventory report data."""
zeek_df["port_and_proto"] = zeek_df["port"] + "/" + zeek_df["proto"]
Expand All @@ -56,12 +74,30 @@ def get_inventory_report_df(zeek_df: pd.DataFrame):
)

src_df = zeek_df[
["src_mac", "src_ipv4", "src_ipv6", "dst_ipv4", "dst_ipv6", "port_and_proto"]
[
"src_mac",
"src_ipv4",
"src_hostname",
"src_ipv6",
"dst_ipv4",
"dst_hostname",
"dst_ipv6",
"port_and_proto",
]
].reset_index(drop=True)
src_df["mac"] = src_df["src_mac"]

dst_df = zeek_df[
["dst_mac", "src_ipv4", "src_ipv6", "dst_ipv4", "dst_ipv6", "port_and_proto"]
[
"dst_mac",
"src_ipv4",
"src_hostname",
"src_ipv6",
"dst_ipv4",
"dst_hostname",
"dst_ipv6",
"port_and_proto",
]
].reset_index(drop=True)
dst_df["mac"] = dst_df["dst_mac"]

Expand All @@ -70,33 +106,60 @@ def get_inventory_report_df(zeek_df: pd.DataFrame):
.reset_index(drop=True)
.drop(columns=["src_mac", "dst_mac"])
.drop_duplicates(
subset=["src_ipv4", "src_ipv6", "dst_ipv4", "dst_ipv6", "port_and_proto"]
subset=[
"src_ipv4",
"src_hostname",
"src_ipv6",
"dst_ipv4",
"dst_hostname",
"dst_ipv6",
"port_and_proto",
]
)
)
df["vendor"] = df["mac"].apply(lambda mac: get_mac_vendor(mac))

grouped_df = (
df.groupby("mac", as_index=False)
.agg(
{
"src_ipv4": list,
"src_hostname": list,
"src_ipv6": list,
"dst_ipv4": list,
"dst_hostname": list,
"dst_ipv6": list,
"port_and_proto": list,
}
)
.reset_index()
)
grouped_df["vendor"] = grouped_df["mac"].apply(lambda mac: get_mac_vendor(mac))

mac_vendors = {}
with open(MAC_VENDORS_JSON_FILE) as f:
mac_vendors = json.load(f)
grouped_df["vendor"] = grouped_df["mac"].apply(
lambda mac: get_mac_vendor(mac_vendors, mac)
)
grouped_df["ipv4"] = (grouped_df["src_ipv4"] + grouped_df["dst_ipv4"]).apply(
lambda ip: list(set(ip))
)
grouped_df["ipv6"] = (grouped_df["src_ipv6"] + grouped_df["dst_ipv6"]).apply(
lambda ip: list(set(ip))
)
grouped_df["hostname"] = (
grouped_df["src_hostname"] + grouped_df["dst_hostname"]
).apply(lambda hostname: list(set(hostname)))

grouped_df.drop(
columns=["src_ipv4", "src_ipv6", "dst_ipv4", "dst_ipv6"], inplace=True
columns=[
"src_ipv4",
"src_hostname",
"src_ipv6",
"dst_ipv4",
"dst_hostname",
"dst_ipv6",
],
inplace=True,
)

return grouped_df
33 changes: 12 additions & 21 deletions src/navv/commands.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""CLI Commands."""
import json
import os
import webbrowser

Expand All @@ -22,12 +21,11 @@
write_conn_states_sheet,
write_externals_sheet,
write_inventory_report_sheet,
write_macs_sheet,
write_stats_sheet,
write_unknown_internals_sheet,
)
from navv.zeek import run_zeek, perform_zeekcut
from navv.utilities import pushd, trim_dns_data
from navv.zeek import get_dns_data, run_zeek, perform_zeekcut
from navv.utilities import pushd


@click.command("generate")
Expand Down Expand Up @@ -73,26 +71,21 @@ def generate(customer_name, output_dir, pcap, zeek_logs):

# Get zeek data
zeek_data = get_zeek_data(zeek_logs)
zeek_df = get_zeek_df(zeek_data)

# Get dns data for resolution
json_path = os.path.join(output_dir, f"{customer_name}_dns_data.json")

# Get dns data from zeek logs
dns_filtered = get_dns_data(customer_name, output_dir, zeek_logs)

# Get zeek dataframe
zeek_df = get_zeek_df(zeek_data, dns_filtered)

# Get inventory report dataframe
inventory_df = get_inventory_report_df(zeek_df)

# Turn zeekcut data into rows for spreadsheet
rows, mac_dict = create_analysis_array(zeek_data, timer=timer_data)

# Get dns data for resolution
json_path = os.path.join(output_dir, f"{customer_name}_dns_data.json")

if os.path.exists(json_path):
with open(json_path, "rb") as json_file:
dns_filtered = json.load(json_file)
else:
dns_data = perform_zeekcut(
fields=["query", "answers", "qtype", "rcode_name"],
log_file=os.path.join(zeek_logs, "dns.log"),
)
dns_filtered = trim_dns_data(dns_data)
rows = create_analysis_array(zeek_data, timer=timer_data)

ext_IPs = set()
unk_int_IPs = set()
Expand All @@ -112,8 +105,6 @@ def generate(customer_name, output_dir, pcap, zeek_logs):

write_inventory_report_sheet(inventory_df, wb)

write_macs_sheet(mac_dict, wb)

write_externals_sheet(ext_IPs, wb)

write_unknown_internals_sheet(unk_int_IPs, wb)
Expand Down
2 changes: 0 additions & 2 deletions src/navv/network_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
# package imports
from navv.commands import generate, launch
from navv.message_handler import info_msg
from navv import utilities
from navv._version import __version__


Expand All @@ -31,7 +30,6 @@ def cli(ctx):
pass


@utilities.timeit
def main():
"""Main function for performing zeek-cut commands and sorting the output"""

Expand Down
Loading

0 comments on commit bc7013c

Please sign in to comment.