Skip to content

Commit

Permalink
Merge pull request #32 from AMS-QF/connect_db
Browse files Browse the repository at this point in the history
Connect to database
  • Loading branch information
jasonbohne123 authored Sep 7, 2023
2 parents c22e916 + 59623a0 commit f33bdfd
Show file tree
Hide file tree
Showing 14 changed files with 9,379 additions and 6,690 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ feature_generation/__pycache__/
pipelines/__pycache__/
personal_research/
HFT_Research/
.DS_Store
.DS_Store
utils/__pycache__
3,388 changes: 219 additions & 3,169 deletions Example_Data_NB.ipynb

Large diffs are not rendered by default.

4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ Included are the client side-scripts for access to the TAQ-Clickhouse Database r

Detailed instructions are also included in Accessing the TAQ-Clickhouse Database PDF

## Setup

### Remote Setup
- Pull TAQNYSE-Clickhouse repo into home directory on remote server
Expand Down Expand Up @@ -52,7 +51,7 @@ Detailed instructions are also included in Accessing the TAQ-Clickhouse Database



### Example Usage
### Depreciated Setup

Here we will show how to access the TAQ-Clickhouse database remotely, clean data, and generate features from the data with one command.

Expand All @@ -69,7 +68,6 @@ To download data for entire S&P500:
```python run_jobs.py --symbol "S&P500" --start_date "2020-01-01" --end_date "2020-01-03"```

Notes
- Sometimes the connection will be reset by peer; Hitting enter in the terminal will restart the connection
- To check whether a query has finished you can login into the TAQ server with a single terminal and search for running jobs (within the docker container)
- Feel free to create a directory for your own research called `personal_research` in the root directory of the repo. This directory is ignored by git and can be used to store your own scripts and data

Expand Down
162 changes: 162 additions & 0 deletions data_preprocessing/get_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import os

import paramiko
from dotenv import load_dotenv
from scp import SCPClient


def get_trades(symbols, start_date, end_date, row_limit):
# load the contents of the .env file into the environment
load_dotenv()

# read the credentials from the environment variables
host = os.getenv("host")
server_user = os.getenv("server_user")
server_password = os.getenv("server_password")
db_user = os.getenv("db_user")
db_pass = os.getenv("db_pass")

ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())

scp = None

try:
ssh.connect(host, username=server_user, password=server_password)

for symbol in symbols:
# Execute a command to change directory and list files
command = f'source /opt/anaconda3/etc/profile.d/conda.sh && conda activate query_user && cd TAQNYSE-Clickhouse && cd server_helpers && \
python3 trade_server_helpers.py "{db_user}" "{db_pass}" "{symbol}" "{start_date}" "{end_date}" "{row_limit}"'
stdin, stdout, stderr = ssh.exec_command(command)

print(f"Output for symbol {symbol}:")
for line in stdout:
print("... " + line.strip("\n"))

print(f"Errors for symbol {symbol}:")
for line in stderr:
print("... " + line.strip("\n"))

# SCPCLient takes a paramiko transport as an argument
scp = SCPClient(ssh.get_transport())

# fetch the remote file 'trade_results.csv' from the directory 'TAQNYSE-Clickhouse'
# and save it to the data directory in the pipelines folder
local_file_path = f'data/trades_{symbol}_{start_date.replace("-", "")}-{end_date.replace("-", "")}.csv'
os.makedirs(os.path.dirname(local_file_path), exist_ok=True)

scp.get("TAQNYSE-Clickhouse/trade_results.csv", local_file_path)

except Exception as e:
print(f"An error occurred: {e}")

finally:
if scp is not None:
# close the SCP session
scp.close()
ssh.close()


def get_quotes(symbols, start_date, end_date, row_limit):
# load the contents of the .env file into the environment
load_dotenv()

# read the credentials from the environment variables
host = os.getenv("host")
server_user = os.getenv("server_user")
server_password = os.getenv("server_password")
os.getenv("db_user")
os.getenv("db_pass")

ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())

scp = None

try:
ssh.connect(host, username=server_user, password=server_password)

for symbol in symbols:
# Execute a command to change directory and list files
command = f'source /opt/anaconda3/etc/profile.d/conda.sh && conda activate query_user && cd TAQNYSE-Clickhouse && cd server_helpers && \
python3 quote_server_helpers.py "{server_user}" "testpassword321" "{symbol}" "{start_date}" "{end_date}" "{row_limit}"'
stdin, stdout, stderr = ssh.exec_command(command)

print(f"Output for symbol {symbol}:")
for line in stdout:
print("... " + line.strip("\n"))

print(f"Errors for symbol {symbol}:")
for line in stderr:
print("... " + line.strip("\n"))

# SCPCLient takes a paramiko transport as an argument
scp = SCPClient(ssh.get_transport())

# fetch the remote file 'trade_results.csv' from the directory 'TAQNYSE-Clickhouse'
# and save it to the data directory in the pipelines folder
local_file_path = f'data/quotes_{symbol}_{start_date.replace("-", "")}-{end_date.replace("-", "")}.csv'
os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
scp.get("TAQNYSE-Clickhouse/quote_results.csv", local_file_path)

except Exception as e:
print(f"An error occurred: {e}")

finally:
if scp is not None:
# close the SCP session
scp.close()
ssh.close()


def get_ref(symbols, start_date, end_date, row_limit):
# load the contents of the .env file into the environment
load_dotenv()

# read the credentials from the environment variables
host = os.getenv("host")
server_user = os.getenv("server_user")
server_password = os.getenv("server_password")
os.getenv("db_user")
os.getenv("db_pass")

ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())

scp = None

try:
ssh.connect(host, username=server_user, password=server_password)

for symbol in symbols:
# Execute a command to change directory and list files
command = f'source root/anaconda3/conda.sh && conda activate query_user && cd TAQNYSE-Clickhouse && cd server_helpers && \
python3 refdata_server_helpers.py "{server_user}" "{server_password}" "{symbol}" "{start_date}" "{end_date}" "{row_limit}"'
stdin, stdout, stderr = ssh.exec_command(command)

print(f"Output for symbol {symbol}:")
for line in stdout:
print("... " + line.strip("\n"))

print(f"Errors for symbol {symbol}:")
for line in stderr:
print("... " + line.strip("\n"))

# SCPCLient takes a paramiko transport as an argument
scp = SCPClient(ssh.get_transport())

# fetch the remote file 'trade_results.csv' from the directory 'TAQNYSE-Clickhouse'
# and save it to the data directory in the pipelines folder
local_file_path = f'data/ref_{symbol}_{start_date.replace("-", "")}-{end_date.replace("-", "")}.csv'
os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
scp.get("TAQNYSE-Clickhouse/refdata_results.csv", local_file_path)

except Exception as e:
print(f"An error occurred: {e}")

finally:
if scp is not None:
# close the SCP session
scp.close()
ssh.close()
96 changes: 0 additions & 96 deletions data_preprocessing/load_data.py

This file was deleted.

Loading

0 comments on commit f33bdfd

Please sign in to comment.