Skip to content

Commit

Permalink
Merge pull request #216 from DedSecInside/add_single_data_folder
Browse files Browse the repository at this point in the history
Organize data file location
  • Loading branch information
KingAkeem authored Jul 9, 2021
2 parents 4eb6cdc + fa96f93 commit 4221a36
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 17 deletions.
4 changes: 3 additions & 1 deletion src/modules/analyzer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Module is used for analyzing link relationships
"""
from .utils import join_local_path
from requests.exceptions import HTTPError

from ete3 import faces, Tree, TreeStyle, TextFace, add_face_to_node
Expand Down Expand Up @@ -50,7 +51,8 @@ def save(self, file_name, tree_style=default_style):
tree_style (TreeStyle): Styling of downloaded tree
"""
self._tree.layout_fn = default_layout
self._tree.render(file_name, tree_style=tree_style)
file_path = join_local_path(file_name)
self._tree.render(file_path, tree_style=tree_style)

def show(self, tree_style=default_style):
"""
Expand Down
18 changes: 3 additions & 15 deletions src/modules/collect_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,12 @@
import os

from bs4 import BeautifulSoup
from dotenv import load_dotenv
from .utils import find_file
from threadsafe.safe_csv import SafeDictWriter
from progress.bar import Bar

from .utils import join_local_path
from .validators import validate_link


dev_file = find_file("torbot_dev.env", "../")
if not dev_file:
raise FileNotFoundError
load_dotenv(dotenv_path=dev_file)

def parse_links(html):
"""Parses HTML page to extract links.
Expand Down Expand Up @@ -63,15 +56,10 @@ def collect_data(user_url):
url = user_url if user_url is not None else default_url
print(f"Gathering data for {url}")
links = get_links(url)
# Create data directory if it doesn't exist
data_directory = os.getenv('TORBOT_DATA_DIR')
if not os.path.exists(data_directory):
os.makedirs(data_directory)

current_time = datetime.datetime.now().isoformat()
file_name = f'torbot_{current_time}.csv'
file_path = os.path.join(data_directory, file_name)
with open(file_path, 'w+', newline='') as outcsv:
file_path = join_local_path(file_name)
with open(file_path, 'w+') as outcsv:
fieldnames = ['ID', 'Title', 'Metadata', 'Content']
writer = SafeDictWriter(outcsv, fieldnames=fieldnames)
bar = Bar(f'Processing...', max=len(links))
Expand Down
4 changes: 3 additions & 1 deletion src/modules/savefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import time

from .utils import join_local_path

def saveJson(datatype, data):
"""
Expand All @@ -18,8 +19,9 @@ def saveJson(datatype, data):
"""
timestr = time.strftime("%Y%m%d-%H%M%S")
file_name = "TorBot-Export-"+datatype+timestr+".json"
file_path = join_local_path(file_name)
# Json File Creation
with open(file_name, "w+") as f:
with open(file_path, 'w+') as f:
# Store data in Json format
output = {datatype: data}
# Dump output to file
Expand Down
19 changes: 19 additions & 0 deletions src/modules/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import requests
from requests.exceptions import HTTPError

from dotenv import load_dotenv


# ALGORITHM UTILITY FUNCTIONS

Expand Down Expand Up @@ -100,6 +102,8 @@ def get_url_status(url, headers=False):
return 0


# File Functions

def find_file(name, path):
"""Search for file within specific dir and any child dirs.
Expand All @@ -115,3 +119,18 @@ def find_file(name, path):
if name in files:
return os.path.join(root, name)
return False

def join_local_path(file_name=""):
if file_name == "":
return

dev_file = find_file("torbot_dev.env", "../")
if not dev_file:
raise FileNotFoundError
load_dotenv(dotenv_path=dev_file)
# Create data directory if it doesn't exist
data_directory = os.getenv('TORBOT_DATA_DIR')
if not os.path.exists(data_directory):
os.makedirs(data_directory)
local_path = os.path.join(data_directory, file_name)
return local_path

0 comments on commit 4221a36

Please sign in to comment.