-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhsb_trade_tracker.py
360 lines (293 loc) · 16.6 KB
/
hsb_trade_tracker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
# -*- coding: utf-8 -*-
"""
Filename: hsb_trade_tracker.py
Date created: Fri Aug 21 12:17:57 2020
@author: Julio Hong
Purpose: Tracks all the ongoing trades in the bazaar of Hypixel Skyblock. Focus on lapis lazuli for now.
Track how the amount per price changes over time for buy/sell orders.
Maybe also track instant buy/sell
Steps:
Find the lapis_info
Create a df for each price. Then each row represents a timestamp, and the data represents the amount transacted.
Create a dict of dfs? But updating them would be annoying. Maybe multi-index? Timestamp -> prices
But if a price is absent? Still have to add a timestamp row to show zero value.
It'll be wasteful I guess, but if timestamp -> price, some prices will be absent within timestamps.
Price -> Timestamp. Timestamps are columnns because easier to add to the df
If there is a new price, then add a new row to the outer index
Store the lapis_info in an external document, maybe a spreadsheet.
Refresh the scraping of lapis_info
Change to melons because lapis is full of speculation and price depression.
Graph the info
Notes: https://hypixel.net/threads/skyblock-cant-find-what-item-is-that.3064833/
Cowtipper said:
Hypixel SkyBlock uses the pre-Minecraft 1.13 damage/data values for some items:
INK_SACK:0 is the normal ink sack (= a dye with damage value zero).
INK_SACK:3 is a dye with damage value 3: Cocoa Beans.
INK_SACK:4 is Lapis Lazuli respectively.
You can see the full list of all item>damage values on the Minecraft Wiki.
It works the same way for logs and fish: for example RAW_FISH:0 is the normal fish, while RAW_FISH:3 is pufferfish.
Check for the largest spread for bazaar flipping?
"""
import requests
import datetime as dt
from time import sleep
import pandas as pd
import os
from openpyxl import load_workbook
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = [10, 5]
# To adjust the dataframe appearance
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 20)
pd.set_option('display.width', 200)
# sold_over_time_df = pd.DataFrame(index=)
# Unique to each player and can be updated
api_key = '4aa4f936-c170-4a8e-849a-4418f76c8fe5'
payload = {'key': api_key}
sold_over_time_dfs = {}
bought_over_time_dfs = {}
spread_over_time_df = pd.DataFrame([])
# Add the selected keys into a list
# goods_of_interest = ['MELON', 'IRON_INGOT', 'DIAMOND', 'WHEAT', 'OBSIDIAN']
# goods_of_interest = ['DIAMOND', 'INK_SACK:4']
goods_of_interest = 'ALL GOODS'
def export_create_or_update(filename, input_df, good=''):
# Export the results
# Use goods_of_interest to generate filenames?
# Open the existing file and add a new sheet based on datetime
# Might convert this into a function in future
if good == '':
# Check if the files already exist
if os.path.exists(filename):
print('Update existing spreadsheet')
book = load_workbook(filename)
writer = pd.ExcelWriter(filename, engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
else:
print('Create new spreadsheet')
writer = pd.ExcelWriter(filename, engine='xlsxwriter')
else:
# Determine if buy or sell sheet is being updated
if 'sold' in filename:
txn_type = 'sell'
elif 'bought' in filename:
txn_type = 'buy'
# Check if the files already exist
if os.path.exists(filename):
print('Update existing ' + txn_type + '_spreadsheet for ' + good)
# sold_over_time
book = load_workbook(filename)
writer = pd.ExcelWriter(filename, engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
else:
print('Create new ' + txn_type + '_spreadsheet for ' + good)
writer = pd.ExcelWriter(filename, engine='xlsxwriter')
# Put all the common code after the 'else' statement
input_df.to_excel(writer, sheet_name=timestamp)
writer.save()
writer.close()
def find_best_spread(bought_over_time_dfs, spread_over_time_df, interval=50):
# Need to track number of transactions for each item also
# See how popular they are and the spread as well or else I try to flip slow-moving items
# If the delta is non-zero for a certain period of time, then mark it on the spread_df
# This is kinda nasty. Just take the average within the latest interval and see if there are any transactions
# See what is the best interval value to select? Run a few values
# Then calculate a weight based on number of transactions within the interval. Price doesn't matter just the number of orders.
# I removed the orders up earlier. Uh oh. I guess it's not that needed.
def calc_timestamp_deltas(bought_over_time_dfs, interval):
trade_freq = pd.Series(index=bought_over_time_dfs.keys())
for item in bought_over_time_dfs.keys():
# Track the delta between columns.
deltas_over_time = bought_over_time_dfs[item].diff(axis=1)
delta_average = deltas_over_time.sum().sum() / interval
trade_freq.loc[item] = abs(delta_average)
return trade_freq
# I was going to take the latest value but average is less susceptible to random error
latest_spreads = pd.DataFrame(index=spread_over_time_df.index)
latest_spreads['mean_spread'] = spread_over_time_df.iloc[:,-interval:].mean(axis=1)
# latest_spreads = spread_over_time_df.iloc[:,-interval:].mean(axis=1)
latest_spreads['trade_freq_multiplier'] = calc_timestamp_deltas(bought_over_time_dfs, interval)
# latest_spreads['buy_trades'] = calc_timestamp_deltas(bought_over_time_dfs, interval)
# latest_spreads['sell_trades'] = calc_timestamp_deltas(sold_over_time_dfs, interval)
# Take the min of the buying trades and selling trades, which might represent what is being satisfied?
# latest_spreads['trade_freq_multiplier'] = latest_spreads[['buy_trades', 'sell_trades']].max(axis=1)
# No, how to distinguish between trades being satisfied and trades being removed?
# Find the sellPrice (bazaar sells)
# Brutal way of doing this
# for key in json_products.keys():
# # I'm not sure how this value is calculated, but it sometimes doesn't match the bazaar sell values
# latest_spreads.loc[key, 'sellPrice'] = json_products[key]['quick_status']['sellPrice']
for key in sold_over_time_dfs.keys():
# Find the lowest sell price within the interval
# print(key)
latest_means = sold_over_time_dfs[key].iloc[:, -interval:].mean(axis=1)
# print(latest_means)
non_nan_means = latest_means.dropna()
try:
latest_spreads.loc[key, 'sellPrice'] = max(list(non_nan_means.index))
except ValueError:
# Make this very high so it won't show up under best options
latest_spreads.loc[key, 'sellPrice'] = -1
# Calculate profit% from the spread
latest_spreads['profit_pct'] = latest_spreads['mean_spread'] / latest_spreads['sellPrice']
# Multiply it by the spread.
# latest_spreads['weighted_spread'] = latest_spreads['mean_spread'] * latest_spreads['trade_freq_multiplier']
latest_spreads['weighted_spread'] = latest_spreads['mean_spread'] * latest_spreads['trade_freq_multiplier'] * latest_spreads['profit_pct']
# Suppress scientific notation in pandas
# latest_spreads.apply(lambda x: '%.5f' % x)
latest_spreads = latest_spreads.round(1)
return latest_spreads
def load_goods_records(path):
# Load the results and stitch them together?
# Find all the files that exist in the directory
recording_spreadsheets = [file for file in os.listdir(path) if file.endswith(".xlsx")]
for file in recording_spreadsheets:
filename_list = file.split('_')
good = filename_list[0]
# Combines all the sheets into a single dataframe
read_df = pd.concat(pd.read_excel(file, sheet_name=None, index_col=0), axis=0)
# Removes the highest level of the multi-index columns
read_df.columns = read_df.columns.droplevel()
# Sort by good
# Sort sell or buy
if filename_list[1] == 'sold':
sold_over_time_dfs[good] = read_df
elif filename_list[1] == 'bought':
bought_over_time_dfs[good] = read_df
else:
return 'How did we get here?'
#=========== RUN CODE ===========
while True:
try:
start_time = dt.datetime.now()
runtime_elapsed = dt.timedelta(minutes=1)
runtime_cap = dt.timedelta(minutes=15)
# Run this in a loop every interval
# # 1) Testing mode
# count = 0
# while count < 4:
# count += 1
# 2) Run mode
# while True:
# Run a while loop for every interval of 15min.
while runtime_elapsed < runtime_cap:
try:
time_of_scrape = dt.datetime.now()
# print('Hypixel Skyblock Bazaar trading for ' + str(goods_of_interest) + ' running at ' + str(time_of_scrape))
print('Hypixel Skyblock Bazaar trading for ALL GOODS running at ' + str(time_of_scrape))
r = requests.get('https://api.hypixel.net/skyblock/bazaar', params=payload)
#r = requests.get('https://api.hypixel.net/skyblock/product?', params=payload)
json_data = (r.json())
json_products = json_data['products']
# Get everything inside
goods_of_interest = list(json_products.keys())
sell_dfs = {}
buy_dfs = {}
# I'm going to generalise this. I don't know if that's wise
for good in json_products.keys():
# Convert sell_summary and buy_summary into df
# Sell and buy are reversed for some reason
sell_dfs[good] = pd.DataFrame(json_products[good]['sell_summary'])
buy_dfs[good] = pd.DataFrame(json_products[good]['buy_summary'])
# If there are no transactions available at the time
if sell_dfs[good].empty or buy_dfs[good].empty:
print(good + ' has no transactions')
sell_dfs[good]['orders'] = -1
sell_dfs[good][time_of_scrape] = -1
buy_dfs[good]['orders'] = -1
buy_dfs[good][time_of_scrape] = -1
spread_over_time_df.loc[good, time_of_scrape] = -1
else:
# Change price to index
sell_dfs[good].set_index('pricePerUnit', inplace=True)
buy_dfs[good].set_index('pricePerUnit', inplace=True)
# Change amount to timestamp
sell_dfs[good].rename(columns={'amount':time_of_scrape}, inplace=True)
buy_dfs[good].rename(columns={'amount':time_of_scrape}, inplace=True)
# print(good + ' being scraped')
# Set a threshold volume
threshold_vol = 2000
# Find the highest buy price and lowest sell price above the threshold
highest_sell = max(sell_dfs[good].index)
lowest_buy = min(buy_dfs[good].index)
# Find delta and store in a df for each good against time
spread_over_time_df.loc[good, time_of_scrape] = lowest_buy - highest_sell
# If first, then set the main df as equal to this timestamp
# Check if this variable exists first, then concat with the new column
# print(("sold_over_time_dfs['" + good + "']"))
# if ("sold_over_time_dfs['" + good + "']") in locals():
# Change to check if the value exists in keys()
# Only if this is a GOI, then record in a df
if good in goods_of_interest:
if good in sold_over_time_dfs.keys():
sold_over_time_dfs[good] = pd.concat([sold_over_time_dfs[good], sell_dfs[good][time_of_scrape]], axis=1)
bought_over_time_dfs[good] = pd.concat([bought_over_time_dfs[good], buy_dfs[good][time_of_scrape]], axis=1)
# Else set the current df as the template
else:
print('Initialising over_time_dfs for ' + good)
sold_over_time_dfs[good] = sell_dfs[good].copy()
bought_over_time_dfs[good] = buy_dfs[good].copy()
# Temporary until I'm sure the amount is being tabulated properly
sold_over_time_dfs[good].drop(columns='orders', inplace=True)
bought_over_time_dfs[good].drop(columns='orders', inplace=True)
print('Scrape completed at ' + str(dt.datetime.now()))
sleep(10)
# Measure how long the loop has been running
runtime_elapsed = dt.datetime.now() - start_time
# Adds a way to break the loop with user input
except KeyboardInterrupt:
break
# Moved out of the except clause because I want this to also run after the while-loop ends
# Put this here so I don't keep making new sheets every time I run the export section
# Excel can't accept ":" in sheet name
timestamp = str(dt.datetime.now()).replace(':', '_')
print('End scraping')
# Graph the results
# Can't really make sense of it though...
# bought_over_time_dfs['DIAMOND'].T.plot()
# Save data
folder_path = r"C:\Users\Julio Hong\Documents\GitHub\Minecraft\Hypixel Skyblock\\"
print('Exporting results to Excel')
sell_spreadsheets = {}
buy_spreadsheets = {}
goods_of_interest = ['DIAMOND', 'INK_SACK:4']
for good in goods_of_interest:
if ':' in good:
good_name = good.replace(':', '_')
else:
good_name = good
sell_spreadsheets[good] = os.path.join(folder_path, good_name + "_sold_over_time.xlsx")
buy_spreadsheets[good] = os.path.join(folder_path, good_name + "_bought_over_time.xlsx")
export_create_or_update(sell_spreadsheets[good], sold_over_time_dfs[good], good)
export_create_or_update(buy_spreadsheets[good], bought_over_time_dfs[good], good)
# If the loop time is less than the interval time OR If before the halfway mark, revert back to last spread_data
# Elif after the halfway mark, calculate a new truncated spread data (and mark it)
# if runtime_elapsed < dt.timedelta(seconds=interval * 15) / 2:
interval = 50
if runtime_elapsed < dt.timedelta(seconds=interval * 15):
interval = len(spread_over_time_df.columns)
more_spreads = find_best_spread(bought_over_time_dfs, spread_over_time_df, interval)
print(more_spreads.sort_values(by=['weighted_spread']))
# Export the spreads
txn_spread_file = os.path.join(folder_path, "txn_spread_over_time.xlsx")
export_create_or_update(txn_spread_file, more_spreads.sort_values(by=['weighted_spread'], ascending=False))
print('Results successfully exported')
except KeyboardInterrupt:
interval = 50
# If the loop time is less than the interval time OR If before the halfway mark, revert back to last spread_data
# Elif after the halfway mark, calculate a new truncated spread data (and mark it)
# if runtime_elapsed < dt.timedelta(seconds=interval * 15) / 2:
if runtime_elapsed < dt.timedelta(seconds=interval * 15):
interval = len(spread_over_time_df.columns)
more_spreads = find_best_spread(bought_over_time_dfs, spread_over_time_df, interval)
print(more_spreads.sort_values(by=['weighted_spread']))
# Export the spreads
txn_spread_file = os.path.join(folder_path, "txn_spread_over_time.xlsx")
export_create_or_update(txn_spread_file, more_spreads.sort_values(by=['weighted_spread'], ascending=False))
print('Results successfully exported')
break
# Run this entire code until I want it to stop
# Loop it every 10-15 min
print('BEFORE SHARING REMOVE YOUR API KEY')