From f40f704f7cbd40c9fbee60a1e6149d6597d6649a Mon Sep 17 00:00:00 2001 From: Mostafa Abdo Date: Wed, 9 Aug 2023 20:56:12 -0700 Subject: [PATCH 1/9] move zeek logic to zeek.py --- src/navv/commands.py | 3 ++- src/navv/utilities.py | 22 ---------------------- src/navv/zeek.py | 25 +++++++++++++++++++++++++ 3 files changed, 27 insertions(+), 23 deletions(-) create mode 100644 src/navv/zeek.py diff --git a/src/navv/commands.py b/src/navv/commands.py index 300d79e..9221f8f 100644 --- a/src/navv/commands.py +++ b/src/navv/commands.py @@ -25,7 +25,8 @@ write_stats_sheet, write_unknown_internals_sheet, ) -from navv.utilities import pushd, run_zeek, perform_zeekcut, trim_dns_data +from navv.zeek import run_zeek, perform_zeekcut +from navv.utilities import pushd, trim_dns_data @click.command("generate") diff --git a/src/navv/utilities.py b/src/navv/utilities.py index 1076cc7..f3211a9 100644 --- a/src/navv/utilities.py +++ b/src/navv/utilities.py @@ -5,7 +5,6 @@ import os import contextlib import json -from subprocess import Popen, PIPE, STDOUT, check_call import time from netaddr import EUI, core as netaddr_core @@ -45,27 +44,6 @@ def timed(*args, **kw): return timed -@timeit -def run_zeek(pcap_path, zeek_logs_path, **kwargs): - with pushd(zeek_logs_path): - # can we add Site::local_nets to the zeek call here? - err = check_call(["zeek", "-C", "-r", pcap_path, "local.zeek"]) - error_msg(f"Zeek returned with code: {err}") - - -def perform_zeekcut(fields, log_file): - """Perform the call to zeek-cut with the identified fields on the specified log file""" - try: - with open(log_file, "rb") as f: - zeekcut = Popen( - ["zeek-cut"] + fields, stdout=PIPE, stdin=PIPE, stderr=STDOUT - ) - return zeekcut.communicate(input=f.read())[0] - except OSError as e: - # probably "file does not exist" - return b"" - - def trim_dns_data(data): """Find entries in dns log that contain no_error and return a dict of {ip: hostname,}""" ret_data = {} diff --git a/src/navv/zeek.py b/src/navv/zeek.py new file mode 100644 index 0000000..b98ebf1 --- /dev/null +++ b/src/navv/zeek.py @@ -0,0 +1,25 @@ +from subprocess import Popen, PIPE, STDOUT, check_call + +from navv.message_handler import error_msg +from navv.utilities import pushd, timeit + + +@timeit +def run_zeek(pcap_path, zeek_logs_path, **kwargs): + with pushd(zeek_logs_path): + # can we add Site::local_nets to the zeek call here? + err = check_call(["zeek", "-C", "-r", pcap_path, "local.zeek"]) + error_msg(f"Zeek returned with code: {err}") + + +def perform_zeekcut(fields, log_file): + """Perform the call to zeek-cut with the identified fields on the specified log file""" + try: + with open(log_file, "rb") as f: + zeekcut = Popen( + ["zeek-cut"] + fields, stdout=PIPE, stdin=PIPE, stderr=STDOUT + ) + return zeekcut.communicate(input=f.read())[0] + except OSError as e: + # probably "file does not exist" + return b"" From b51d14919c3f6b6131d714bdf84944af51cc8712 Mon Sep 17 00:00:00 2001 From: Mostafa Abdo Date: Wed, 9 Aug 2023 20:57:04 -0700 Subject: [PATCH 2/9] add pandas package --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index ad98be5..c2c1ef5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,6 +29,7 @@ install_requires = lxml>=4.3.2 netaddr>=0.8.0 openpyxl>=3.1.2 + pandas>=2.0.3 tqdm>=4.57.0 [options.packages.find] From 05578a5b6644d3d0591a37f3805a0cc164903828 Mon Sep 17 00:00:00 2001 From: Mostafa Abdo Date: Thu, 10 Aug 2023 12:24:23 -0700 Subject: [PATCH 3/9] add ip validators --- src/navv/validators.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 src/navv/validators.py diff --git a/src/navv/validators.py b/src/navv/validators.py new file mode 100644 index 0000000..1e54c76 --- /dev/null +++ b/src/navv/validators.py @@ -0,0 +1,19 @@ +from ipaddress import IPv4Address, IPv6Address + + +def is_ipv4_address(ip_address: str) -> bool: + """Return True if address is a valid IPv4 address.""" + try: + IPv4Address(ip_address) + return True + except ValueError: + return False + + +def is_ipv6_address(ip_address: str) -> bool: + """Return True if address is a valid IPv6 address.""" + try: + IPv6Address(ip_address) + return True + except ValueError: + return False From 9d967c765baa20b3778c39a6edf7a62e2216265a Mon Sep 17 00:00:00 2001 From: Mostafa Abdo Date: Thu, 10 Aug 2023 12:25:06 -0700 Subject: [PATCH 4/9] move run_zeek in try except block --- src/navv/zeek.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/navv/zeek.py b/src/navv/zeek.py index b98ebf1..36d5a3e 100644 --- a/src/navv/zeek.py +++ b/src/navv/zeek.py @@ -8,8 +8,10 @@ def run_zeek(pcap_path, zeek_logs_path, **kwargs): with pushd(zeek_logs_path): # can we add Site::local_nets to the zeek call here? - err = check_call(["zeek", "-C", "-r", pcap_path, "local.zeek"]) - error_msg(f"Zeek returned with code: {err}") + try: + check_call(["zeek", "-C", "-r", pcap_path, "local.zeek"]) + except Exception as e: + error_msg(e) def perform_zeekcut(fields, log_file): From e5c25e4eb5200cd4d03fb3c2f14092596cd11851 Mon Sep 17 00:00:00 2001 From: Mostafa Abdo Date: Thu, 10 Aug 2023 12:25:30 -0700 Subject: [PATCH 5/9] modify get_mac_vendor --- src/navv/utilities.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/navv/utilities.py b/src/navv/utilities.py index f3211a9..2c3bbc2 100644 --- a/src/navv/utilities.py +++ b/src/navv/utilities.py @@ -58,7 +58,7 @@ def trim_dns_data(data): return ret_data -def get_mac_vendor(mac_address: str) -> list: +def get_mac_vendor(mac_address: str) -> str: """Return the vendor of the MAC address.""" mac_address = mac_address.upper() @@ -66,19 +66,22 @@ def get_mac_vendor(mac_address: str) -> list: EUI(mac_address) except netaddr_core.AddrFormatError: error_msg(f"Invalid MAC address: {mac_address}") - return [f"Bad MAC address {mac_address}"] + return f"Bad MAC address {mac_address}" with open(MAC_VENDORS_JSON_FILE) as f: mac_vendors = json.load(f) - vendor = [ - vendor["vendorName"] - for vendor in mac_vendors - if mac_address.startswith(vendor["macPrefix"]) - ] + try: + vendor = [ + vendor["vendorName"] + for vendor in mac_vendors + if mac_address.startswith(vendor["macPrefix"]) + ][0] + except IndexError: + vendor = "" if not vendor: error_msg(f"Unknown vendor for MAC address: {mac_address}") - return [f"Unknown vendor for MAC address {mac_address}"] + return "Unknown Vendor" return vendor From 8ace15c2071adc014bcf53376c868f8b7898eb3a Mon Sep 17 00:00:00 2001 From: Mostafa Abdo Date: Mon, 14 Aug 2023 15:56:34 -0700 Subject: [PATCH 6/9] feature: add mac address validation --- src/navv/validators.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/navv/validators.py b/src/navv/validators.py index 1e54c76..039e39a 100644 --- a/src/navv/validators.py +++ b/src/navv/validators.py @@ -1,4 +1,5 @@ from ipaddress import IPv4Address, IPv6Address +import re def is_ipv4_address(ip_address: str) -> bool: @@ -17,3 +18,10 @@ def is_ipv6_address(ip_address: str) -> bool: return True except ValueError: return False + + +def is_mac_address(mac_address: str) -> bool: + if re.match("[0-9a-f]{2}([-:])[0-9a-f]{2}(\\1[0-9a-f]{2}){4}$", x.lower()) + return True + return False + \ No newline at end of file From 66248aa1697c28f3ef752cae78d4e32f8e3dfe8c Mon Sep 17 00:00:00 2001 From: Mostafa Abdo Date: Mon, 14 Aug 2023 15:56:58 -0700 Subject: [PATCH 7/9] feature: add mac address validation --- src/navv/validators.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/navv/validators.py b/src/navv/validators.py index 039e39a..15eb318 100644 --- a/src/navv/validators.py +++ b/src/navv/validators.py @@ -21,7 +21,8 @@ def is_ipv6_address(ip_address: str) -> bool: def is_mac_address(mac_address: str) -> bool: - if re.match("[0-9a-f]{2}([-:])[0-9a-f]{2}(\\1[0-9a-f]{2}){4}$", x.lower()) + if re.match( + "[0-9a-f]{2}([-:])[0-9a-f]{2}(\\1[0-9a-f]{2}){4}$", mac_address.lower() + ): return True return False - \ No newline at end of file From 5d5ea802b9d7369a7ad39eb519334eab746b9c57 Mon Sep 17 00:00:00 2001 From: Mostafa Abdo Date: Mon, 14 Aug 2023 15:59:49 -0700 Subject: [PATCH 8/9] add description to is_mac_address func --- src/navv/validators.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/navv/validators.py b/src/navv/validators.py index 15eb318..2f01106 100644 --- a/src/navv/validators.py +++ b/src/navv/validators.py @@ -21,6 +21,7 @@ def is_ipv6_address(ip_address: str) -> bool: def is_mac_address(mac_address: str) -> bool: + """Return True if address is a valid MAC address.""" if re.match( "[0-9a-f]{2}([-:])[0-9a-f]{2}(\\1[0-9a-f]{2}){4}$", mac_address.lower() ): From ff55011211c0401a92ed2c35f4563b992eb8f3f3 Mon Sep 17 00:00:00 2001 From: Mostafa Abdo Date: Tue, 15 Aug 2023 08:18:29 -0700 Subject: [PATCH 9/9] add business logic file and refactor --- src/navv/bll.py | 102 ++++++++++++++++++++++++++++++++++ src/navv/commands.py | 30 ++++------ src/navv/spreadsheet_tools.py | 61 ++++++++++++++------ src/navv/utilities.py | 9 +-- 4 files changed, 158 insertions(+), 44 deletions(-) create mode 100644 src/navv/bll.py diff --git a/src/navv/bll.py b/src/navv/bll.py new file mode 100644 index 0000000..37854be --- /dev/null +++ b/src/navv/bll.py @@ -0,0 +1,102 @@ +import os +from ipaddress import IPv4Address, IPv6Address +import pandas as pd + +from navv.zeek import perform_zeekcut +from navv.utilities import get_mac_vendor +from navv.validators import is_ipv4_address, is_ipv6_address + + +def get_zeek_data(zeek_logs): + """Return a list of Zeek conn.log data.""" + return ( + perform_zeekcut( + fields=[ + "id.orig_h", + "id.resp_h", + "id.resp_p", + "proto", + "conn_state", + "orig_l2_addr", + "resp_l2_addr", + ], + log_file=os.path.join(zeek_logs, "conn.log"), + ) + .decode("utf-8") + .split("\n")[:-1] + ) + + +def get_zeek_df(zeek_data: list): + """Return a pandas dataframe of the conn.log data.""" + zeek_data = [row.split("\t") for row in zeek_data] + + return pd.DataFrame( + zeek_data, + columns=["src_ip", "dst_ip", "port", "proto", "conn", "src_mac", "dst_mac"], + ) + + +def get_inventory_report_df(zeek_df: pd.DataFrame): + """Return a pandas dataframe of the inventory report data.""" + zeek_df["port_and_proto"] = zeek_df["port"] + "/" + zeek_df["proto"] + + zeek_df["src_ipv4"] = zeek_df["src_ip"].apply( + lambda ip: ip if is_ipv4_address(ip) else None + ) + zeek_df["src_ipv6"] = zeek_df["src_ip"].apply( + lambda ip: ip if is_ipv6_address(ip) else None + ) + + zeek_df["dst_ipv4"] = zeek_df["dst_ip"].apply( + lambda ip: ip if is_ipv4_address(ip) else None + ) + zeek_df["dst_ipv6"] = zeek_df["dst_ip"].apply( + lambda ip: ip if is_ipv6_address(ip) else None + ) + + src_df = zeek_df[ + ["src_mac", "src_ipv4", "src_ipv6", "dst_ipv4", "dst_ipv6", "port_and_proto"] + ].reset_index(drop=True) + src_df["mac"] = src_df["src_mac"] + + dst_df = zeek_df[ + ["dst_mac", "src_ipv4", "src_ipv6", "dst_ipv4", "dst_ipv6", "port_and_proto"] + ].reset_index(drop=True) + dst_df["mac"] = dst_df["dst_mac"] + + df = ( + pd.concat([src_df, dst_df]) + .reset_index(drop=True) + .drop(columns=["src_mac", "dst_mac"]) + .drop_duplicates( + subset=["src_ipv4", "src_ipv6", "dst_ipv4", "dst_ipv6", "port_and_proto"] + ) + ) + df["vendor"] = df["mac"].apply(lambda mac: get_mac_vendor(mac)) + + grouped_df = ( + df.groupby("mac", as_index=False) + .agg( + { + "src_ipv4": list, + "src_ipv6": list, + "dst_ipv4": list, + "dst_ipv6": list, + "port_and_proto": list, + } + ) + .reset_index() + ) + grouped_df["vendor"] = grouped_df["mac"].apply(lambda mac: get_mac_vendor(mac)) + grouped_df["ipv4"] = (grouped_df["src_ipv4"] + grouped_df["dst_ipv4"]).apply( + lambda ip: list(set(ip)) + ) + grouped_df["ipv6"] = (grouped_df["src_ipv6"] + grouped_df["dst_ipv6"]).apply( + lambda ip: list(set(ip)) + ) + grouped_df.drop( + columns=["src_ipv4", "src_ipv6", "dst_ipv4", "dst_ipv6"], inplace=True + ) + + return grouped_df diff --git a/src/navv/commands.py b/src/navv/commands.py index 9221f8f..4ab00ff 100644 --- a/src/navv/commands.py +++ b/src/navv/commands.py @@ -6,6 +6,7 @@ # Third-Party Libraries import click +from navv.bll import get_inventory_report_df, get_zeek_data, get_zeek_df # cisagov Libraries from navv.gui import app @@ -69,27 +70,18 @@ def generate(customer_name, output_dir, pcap, zeek_logs): run_zeek(os.path.abspath(pcap), zeek_logs, timer=timer_data) else: timer_data["run_zeek"] = "NOT RAN" - zeek_data = ( - perform_zeekcut( - fields=[ - "id.orig_h", - "id.resp_h", - "id.resp_p", - "proto", - "conn_state", - "orig_l2_addr", - "resp_l2_addr", - ], - log_file=os.path.join(zeek_logs, "conn.log"), - ) - .decode("utf-8") - .split("\n")[:-1] - ) - # turn zeekcut data into rows for spreadsheet + # Get zeek data + zeek_data = get_zeek_data(zeek_logs) + zeek_df = get_zeek_df(zeek_data) + + # Get inventory report dataframe + inventory_df = get_inventory_report_df(zeek_df) + + # Turn zeekcut data into rows for spreadsheet rows, mac_dict = create_analysis_array(zeek_data, timer=timer_data) - # get dns data for resolution + # Get dns data for resolution json_path = os.path.join(output_dir, f"{customer_name}_dns_data.json") if os.path.exists(json_path): @@ -118,7 +110,7 @@ def generate(customer_name, output_dir, pcap, zeek_logs): timer=timer_data, ) - write_inventory_report_sheet(mac_dict, wb) + write_inventory_report_sheet(inventory_df, wb) write_macs_sheet(mac_dict, wb) diff --git a/src/navv/spreadsheet_tools.py b/src/navv/spreadsheet_tools.py index f435ece..f4a272b 100644 --- a/src/navv/spreadsheet_tools.py +++ b/src/navv/spreadsheet_tools.py @@ -347,27 +347,52 @@ def write_conn_states_sheet(conn_states, wb): auto_adjust_width(new_ws) -def write_inventory_report_sheet(mac_dict, wb): +def write_inventory_report_sheet(inventory_df, wb): """Get Mac Addresses with their associated IP addresses and manufacturer.""" ir_sheet = make_sheet(wb, "Inventory Report", idx=4) - ir_sheet.append(["MAC", "Vendor", "IPs"]) - for row_index, mac in enumerate(mac_dict, start=2): - ir_sheet[f"A{row_index}"].value = mac - orgs = utilities.get_mac_vendor(mac) + ir_sheet.append(["MAC", "Vendor", "IPv4", "IPv6", "Port and Proto"]) + + inventory_data = inventory_df.to_dict(orient="records") + for index, row in enumerate(inventory_data, start=2): + # Mac Address column + ir_sheet[f"A{index}"].value = row["mac"] + + # Vendor column + ir_sheet[f"B{index}"].value = row["vendor"] + + # IPv4 Address column + ipv4_list_cell = ir_sheet[f"C{index}"] + ipv4_list_cell.alignment = openpyxl.styles.Alignment(wrap_text=True) + + ipv4 = "" + if row["ipv4"]: + ipv4 = ", ".join(each for each in row["ipv4"] if each) + ipv4_list_cell.value = ipv4 + + # IPv6 Address column + ipv6_list_cell = ir_sheet[f"D{index}"] + ipv6_list_cell.alignment = openpyxl.styles.Alignment(wrap_text=True) + + ipv6 = "" + if row["ipv6"]: + ipv6 = ", ".join(each for each in row["ipv6"] if each) + ipv6_list_cell.value = ipv6 + + # Port and Protocol column + pnp_sheet = ir_sheet[f"E{index}"] + pnp_sheet.alignment = openpyxl.styles.Alignment(wrap_text=True) + + port_and_proto = "" + if row["port_and_proto"]: + port_and_proto = ", ".join( + list(set(each for each in row["port_and_proto"] if each))[:10] + ) - ir_sheet[f"B{row_index}"].value = "\n".join(orgs) - ip_list_cell = ir_sheet[f"C{row_index}"] - ip_list_cell.alignment = openpyxl.styles.Alignment(wrap_text=True) - num_ips = len(mac_dict[mac]) - if num_ips > 10: - display_list = mac_dict[mac][:10] - display_list.append(f"Displaying 10 IPs of {num_ips}") - ip_list_cell.value = "\n".join(display_list) - else: - ip_list_cell.value = "\n".join(mac_dict[mac][:10]) - ir_sheet.row_dimensions[row_index].height = min(num_ips, 11) * 15 - if row_index % 2 == 0: - for cell in ir_sheet[f"{row_index}:{row_index}"]: + pnp_sheet.value = port_and_proto + + # Add styling to every other row + if index % 2 == 0: + for cell in ir_sheet[f"{index}:{index}"]: cell.fill = openpyxl.styles.PatternFill("solid", fgColor="AAAAAA") auto_adjust_width(ir_sheet) ir_sheet.column_dimensions["C"].width = 39 * 1.2 diff --git a/src/navv/utilities.py b/src/navv/utilities.py index 2c3bbc2..8787b13 100644 --- a/src/navv/utilities.py +++ b/src/navv/utilities.py @@ -7,10 +7,10 @@ import json import time -from netaddr import EUI, core as netaddr_core from tqdm import tqdm from navv.message_handler import info_msg, error_msg +from navv.validators import is_mac_address MAC_VENDORS_JSON_FILE = os.path.abspath(__file__ + "/../" + "data/mac-vendors.json") @@ -62,9 +62,7 @@ def get_mac_vendor(mac_address: str) -> str: """Return the vendor of the MAC address.""" mac_address = mac_address.upper() - try: - EUI(mac_address) - except netaddr_core.AddrFormatError: + if not is_mac_address(mac_address): error_msg(f"Invalid MAC address: {mac_address}") return f"Bad MAC address {mac_address}" @@ -78,9 +76,6 @@ def get_mac_vendor(mac_address: str) -> str: if mac_address.startswith(vendor["macPrefix"]) ][0] except IndexError: - vendor = "" - - if not vendor: error_msg(f"Unknown vendor for MAC address: {mac_address}") return "Unknown Vendor"