-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper.py
198 lines (173 loc) · 9.74 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
import asyncio
import datetime
import itertools
import logging
import os
import uuid
import dotenv
from utils.utils import rate_limited_retrying_request
from collections import defaultdict
from clients.fantasy_websocket_client import FantasyWebsocketClient
from clients.leaderboard_websocket_client import LeaderboardWebsocketClient
from clients.result_websocket_client import ResultWebsocketClient
from messages.fantasy_msgs import SubLeague
from messages.leaderboard_msgs import SubLeaderboard, Stat
from messages.result_msgs import SubCompetition, TeamMatchResult, PlayerResult, SubTeam, Match, Series
from utils.constants import DATE_FMT
from data.dota_ids import FANTASY_COMPETITION_ID, FANTASY_USER_LEADERBOARD_ID, FANTASY_PLAYER_LEADERBOARD_ID
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
dotenv.load_dotenv()
APIKEY = os.getenv("STEAMAPIKEY")
if not APIKEY:
raise Exception("Must set STEAMAPIKEY .env var")
LEAGUE_LISTING_URL = "http://api.steampowered.com/IDOTA2Match_570/GetLeagueListing/v0001?key={key}"
MATCH_LISTING_URL = "http://api.steampowered.com/IDOTA2Match_570/GetMatchHistory/v0001?key={key}&league_id={league_id}"
MATCH_DETAILS_URL = "http://api.steampowered.com/IDOTA2Match_570/GetMatchDetails/v0001?key={key}&match_id={match_id}"
MATCH_DETAILS_OPEN_DOTA_URL = "https://api.opendota.com/api/matches/{match_id}"
DOTA_TO_FANTASY_LEAGUE_IDS = {11979: FANTASY_COMPETITION_ID}
async def get_league_results(result_client, fantasy_client, leaderboard_client, league_id, tstamp_from=0):
fantasy_competitition_id = DOTA_TO_FANTASY_LEAGUE_IDS[league_id]
match_list = rate_limited_retrying_request(MATCH_LISTING_URL.format(key=APIKEY, league_id=league_id))["result"]["matches"]
fantasy_competition_hierarchy = (await result_client.send_sub_competitions(
SubCompetition(all=False, sub_competition_ids=[fantasy_competitition_id])
))["data"]
fantasy_teams_and_players = (await result_client.send_sub_teams(
SubTeam(toggle=True)
))["data"]
DOTA_TO_FANTASY_TEAM_IDS = {t["meta"]["dota_id"]: t["team_id"] for t in fantasy_teams_and_players}
DOTA_TO_FANTASY_PLAYER_IDS = {p["player"]["meta"]["dota_id"]: p["player"]["player_id"] for t in fantasy_teams_and_players for p in t["players"]}
fantasy_matches = list(itertools.chain(*[s["matches"] for s in fantasy_competition_hierarchy[0]["series"]]))
fantasy_leagues = await fantasy_client.send_sub_leagues(
SubLeague(all=True)
)
fantasy_league = next(f for f in fantasy_leagues["data"] if f["competition_id"] == fantasy_competitition_id)
user_teams = (await fantasy_client.send_get_latest_teams())["data"]
# Filter out matches that already have their results set
existing_match_ids = (m["meta"]["dota_id"] for m in fantasy_matches if len(m["team_results"]) != 0)
matches_to_scrape = [m['match_id'] for m in match_list if m['match_id'] not in existing_match_ids and m['start_time'] >= tstamp_from]
# TODO handle the dodgy api matches/remakes
match_inserts = []
series_inserts = []
logging.info(f"matches_to_scrape: {matches_to_scrape}")
for match_id in matches_to_scrape:
leaderboards = await leaderboard_client.send_get_latest_leaderboards([FANTASY_USER_LEADERBOARD_ID, FANTASY_PLAYER_LEADERBOARD_ID])
user_points = next(l for l in leaderboards["data"] if "User Points" in l["name"])["leaderboard"]
print(user_points)
user_points_dict = {p["player_id"]: p for p in user_points}
player_points = next(l for l in leaderboards["data"] if "Player Points" in l["name"])["leaderboard"]
player_points_dict = {p["player_id"]: p for p in player_points}
# TODO search for match with correct teams.
# For now dont put matches into db preemptively, just add when we get results.
# throw the team names in meta?
fantasy_match_id = str(uuid.uuid4())
fantasy_series_id = str(uuid.uuid4())
match_resp = rate_limited_retrying_request(MATCH_DETAILS_URL.format(key=APIKEY, match_id=match_id))
odota_match_resp = rate_limited_retrying_request(MATCH_DETAILS_OPEN_DOTA_URL.format(match_id=match_id))
player_results = []
#
# TODO use match start to find period and thus find period-multiplier
team_match_results = []
radiant_fantasy_id = DOTA_TO_FANTASY_TEAM_IDS[odota_match_resp["radiant_team_id"]]
dire_fantasy_id = DOTA_TO_FANTASY_TEAM_IDS[odota_match_resp["dire_team_id"]]
radiant_meta = {"kills": odota_match_resp["radiant_score"], "dota_id": match_id}
dire_meta = {"kills": odota_match_resp["dire_score"], "dota_id": match_id}
team_match_results.append(TeamMatchResult(
fantasy_match_id, radiant_fantasy_id, "1" if odota_match_resp["radiant_win"] else "0", radiant_meta
))
team_match_results.append(TeamMatchResult(
fantasy_match_id, dire_fantasy_id, "0" if odota_match_resp["radiant_win"] else "1", dire_meta
))
period_multiplier = 1.0
now = datetime.datetime.now(datetime.timezone.utc).strftime(DATE_FMT)
start_time = datetime.datetime.fromtimestamp(odota_match_resp["start_time"], datetime.timezone.utc)
end_time = start_time + datetime.timedelta(seconds=odota_match_resp["duration"])
series_inserts.append(Series(
fantasy_series_id, f'{odota_match_resp["radiant_team"]["name"]} v {odota_match_resp["dire_team"]["name"]}',
(start_time.strftime(DATE_FMT), end_time.strftime(DATE_FMT)),
competition_id=FANTASY_COMPETITION_ID
))
match_inserts.append(Match(
fantasy_match_id, f'{odota_match_resp["radiant_team"]["name"]} v {odota_match_resp["dire_team"]["name"]}',
(start_time.strftime(DATE_FMT), end_time.strftime(DATE_FMT)),
fantasy_series_id,
meta={"dota_id": match_id}
))
for player in odota_match_resp['players']:
parse_player(
player, fantasy_league, fantasy_match_id, DOTA_TO_FANTASY_PLAYER_IDS, period_multiplier,
player_results, user_teams,
user_points_dict, player_points_dict
)
for val in user_points_dict.values():
val["timestamp"] = now
for val in player_points_dict.values():
val["timestamp"] = now
await result_client.send_insert_series(series_inserts)
await result_client.send_insert_matches(match_inserts)
# TODO can do 2 below at same time. not sequentially.
await result_client.send_insert_team_match_results(team_match_results)
await result_client.send_insert_player_results(player_results)
# The way we update users/player points means this func shouldnt ever be run concurrently/parallelised across diff matches
logging.info(f"User points: {user_points_dict}")
logging.info(f"Player points: {player_points_dict}")
await leaderboard_client.send_insert_stats(
[Stat(user["leaderboard_id"], user["player_id"], now, user["points"]) for user in user_points_dict.values()]
)
await leaderboard_client.send_insert_stats(
[Stat(x["leaderboard_id"], x["player_id"], now, x["points"]) for x in
player_points_dict.values()]
)
def parse_player(
player, fantasy_league, fantasy_match_id, DOTA_TO_FANTASY_PLAYER_IDS, period_multiplier, player_results, teams,
user_points_dict, player_points_dict
):
fantasy_player_id = DOTA_TO_FANTASY_PLAYER_IDS[player["account_id"]]
player_result = {"points": 0.0}
for stat in fantasy_league["stat_multipliers"]:
odota_stats = {
"first blood": "firstblood_claimed",
"stun": "stuns",
"GPM": "gold_per_min",
"last hits": "last_hits",
"runes": "rune_pickups",
"roshans": "roshans_killed",
"towers": "towers_killed",
"dewards": "observer_kills",
"observer wards": "obs_placed",
}
odota_stat_name = odota_stats.get(stat["name"], stat["name"]).replace(" ", "_")
player_result[stat["name"]] = player[odota_stat_name]
player_result["points"] += (
stat["multiplier"] * player[odota_stat_name] + period_multiplier
)
logger.info(f"Player points {player['name']}: {player_result['points']}")
player_results.append(PlayerResult(
fantasy_match_id, fantasy_player_id, player_result, meta={'name': player['name']}
))
for team_id, team in teams.items():
if fantasy_player_id in team:
logger.info(f"Adding points to team {team_id}: {player_result['points']}")
try:
user_points_dict[team_id]["points"] += player_result["points"]
except KeyError:
new_user = {"player_id": team_id, "points": player_result["points"], "leaderboard_id": FANTASY_USER_LEADERBOARD_ID}
user_points_dict[team_id] = new_user
try:
player_points_dict[fantasy_player_id]["points"] += player_result["points"]
except KeyError:
new_player = {"player_id": fantasy_player_id, "points": player_result["points"], "leaderboard_id": FANTASY_PLAYER_LEADERBOARD_ID}
player_points_dict[fantasy_player_id] = new_player
async def main():
# ran into this fun issue defining them outside main, loose in the file
# https://stackoverflow.com/a/55918049/3920439
result_client = ResultWebsocketClient()
fantasy_client = FantasyWebsocketClient()
leaderboard_client = LeaderboardWebsocketClient()
# can asyncio.gather
asyncio.create_task(result_client.run())
asyncio.create_task(fantasy_client.run())
asyncio.create_task(leaderboard_client.run())
await get_league_results(result_client, fantasy_client, leaderboard_client, 11979)
if __name__ == "__main__":
asyncio.run(main())