Skip to content

Commit

Permalink
clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
jasonbohne123 committed Sep 7, 2023
1 parent e71650b commit a7ebe02
Show file tree
Hide file tree
Showing 8 changed files with 44 additions and 559 deletions.
152 changes: 20 additions & 132 deletions Example_Data_NB.ipynb

Large diffs are not rendered by default.

48 changes: 22 additions & 26 deletions data_preprocessing/get_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import os

import paramiko
from scp import SCPClient
from dotenv import load_dotenv
import os
from scp import SCPClient


def get_trades(symbols, start_date, end_date, row_limit):
# load the contents of the .env file into the environment
Expand All @@ -28,26 +30,23 @@ def get_trades(symbols, start_date, end_date, row_limit):
python3 trade_server_helpers.py "{db_user}" "{db_pass}" "{symbol}" "{start_date}" "{end_date}" "{row_limit}"'
stdin, stdout, stderr = ssh.exec_command(command)



print(f"Output for symbol {symbol}:")
for line in stdout:
print('... ' + line.strip('\n'))
print("... " + line.strip("\n"))

print(f"Errors for symbol {symbol}:")
for line in stderr:
print('... ' + line.strip('\n'))
print("... " + line.strip("\n"))

# SCPCLient takes a paramiko transport as an argument
scp = SCPClient(ssh.get_transport())

# fetch the remote file 'trade_results.csv' from the directory 'TAQNYSE-Clickhouse'
# and save it to the data directory in the pipelines folder
local_file_path = f'../data/trades_{symbol}_{start_date.replace("-", "")}-{end_date.replace("-", "")}.csv'
local_file_path = f'data/trades_{symbol}_{start_date.replace("-", "")}-{end_date.replace("-", "")}.csv'
os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
local_file_path = f'data/trades_{symbol}_{start_date.replace("-", "")}-{end_date.replace("-", "")}.csv'
os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
scp.get('TAQNYSE-Clickhouse/trade_results.csv', local_file_path)

scp.get("TAQNYSE-Clickhouse/trade_results.csv", local_file_path)

except Exception as e:
print(f"An error occurred: {e}")
Expand All @@ -58,6 +57,7 @@ def get_trades(symbols, start_date, end_date, row_limit):
scp.close()
ssh.close()


def get_quotes(symbols, start_date, end_date, row_limit):
# load the contents of the .env file into the environment
load_dotenv()
Expand All @@ -66,8 +66,8 @@ def get_quotes(symbols, start_date, end_date, row_limit):
host = os.getenv("host")
server_user = os.getenv("server_user")
server_password = os.getenv("server_password")
db_user = os.getenv("db_user")
db_pass = os.getenv("db_pass")
os.getenv("db_user")
os.getenv("db_pass")

ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
Expand All @@ -85,11 +85,11 @@ def get_quotes(symbols, start_date, end_date, row_limit):

print(f"Output for symbol {symbol}:")
for line in stdout:
print('... ' + line.strip('\n'))
print("... " + line.strip("\n"))

print(f"Errors for symbol {symbol}:")
for line in stderr:
print('... ' + line.strip('\n'))
print("... " + line.strip("\n"))

# SCPCLient takes a paramiko transport as an argument
scp = SCPClient(ssh.get_transport())
Expand All @@ -98,10 +98,7 @@ def get_quotes(symbols, start_date, end_date, row_limit):
# and save it to the data directory in the pipelines folder
local_file_path = f'data/quotes_{symbol}_{start_date.replace("-", "")}-{end_date.replace("-", "")}.csv'
os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
local_file_path = f'data/quotes_{symbol}_{start_date.replace("-", "")}-{end_date.replace("-", "")}.csv'
os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
scp.get('TAQNYSE-Clickhouse/quote_results.csv', local_file_path)

scp.get("TAQNYSE-Clickhouse/quote_results.csv", local_file_path)

except Exception as e:
print(f"An error occurred: {e}")
Expand All @@ -112,6 +109,7 @@ def get_quotes(symbols, start_date, end_date, row_limit):
scp.close()
ssh.close()


def get_ref(symbols, start_date, end_date, row_limit):
# load the contents of the .env file into the environment
load_dotenv()
Expand All @@ -120,8 +118,8 @@ def get_ref(symbols, start_date, end_date, row_limit):
host = os.getenv("host")
server_user = os.getenv("server_user")
server_password = os.getenv("server_password")
db_user = os.getenv("db_user")
db_pass = os.getenv("db_pass")
os.getenv("db_user")
os.getenv("db_pass")

ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
Expand All @@ -139,11 +137,11 @@ def get_ref(symbols, start_date, end_date, row_limit):

print(f"Output for symbol {symbol}:")
for line in stdout:
print('... ' + line.strip('\n'))
print("... " + line.strip("\n"))

print(f"Errors for symbol {symbol}:")
for line in stderr:
print('... ' + line.strip('\n'))
print("... " + line.strip("\n"))

# SCPCLient takes a paramiko transport as an argument
scp = SCPClient(ssh.get_transport())
Expand All @@ -152,9 +150,7 @@ def get_ref(symbols, start_date, end_date, row_limit):
# and save it to the data directory in the pipelines folder
local_file_path = f'data/ref_{symbol}_{start_date.replace("-", "")}-{end_date.replace("-", "")}.csv'
os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
local_file_path = f'data/ref_{symbol}_{start_date.replace("-", "")}-{end_date.replace("-", "")}.csv'
os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
scp.get('TAQNYSE-Clickhouse/refdata_results.csv', local_file_path)
scp.get("TAQNYSE-Clickhouse/refdata_results.csv", local_file_path)

except Exception as e:
print(f"An error occurred: {e}")
Expand All @@ -163,4 +159,4 @@ def get_ref(symbols, start_date, end_date, row_limit):
if scp is not None:
# close the SCP session
scp.close()
ssh.close()
ssh.close()
4 changes: 2 additions & 2 deletions run_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pandas as pd

from data_preprocessing.get_data import get_quotes, get_reference_data, get_trades
from data_preprocessing.get_data import get_quotes, get_ref, get_trades


def run_jobs(symbol: str, start_date: str, end_date: str, row_limit=1000000):
Expand All @@ -24,7 +24,7 @@ def run_jobs(symbol: str, start_date: str, end_date: str, row_limit=1000000):
# To-DO: create get-reference-data function

# load data
get_reference_data(symbol, start_date, end_date, row_limit)
get_ref(symbol, start_date, end_date, row_limit)
get_trades(symbol, start_date, end_date, row_limit)
get_quotes(symbol, start_date, end_date, row_limit)

Expand Down
49 changes: 0 additions & 49 deletions utils/backup_conda_env.py

This file was deleted.

46 changes: 0 additions & 46 deletions utils/conda_install.py

This file was deleted.

70 changes: 0 additions & 70 deletions utils/environment.yml

This file was deleted.

46 changes: 0 additions & 46 deletions utils/remove_dependencies.py

This file was deleted.

Loading

0 comments on commit a7ebe02

Please sign in to comment.