-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathladder.py
234 lines (208 loc) · 11.2 KB
/
ladder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import collections
import datetime
import glicko2
import json
import math
import os
import requests
results_dir = 'results'
data_dir = os.path.join('docs', 'data')
ragl_name = 'ragl'
results_urls = {
'kk': None,
'ladder': 'http://oraladder.net/latest-js?mod=ra',
ragl_name: 'https://ragl.org/games/json'
}
def str_to_date(date_string):
return datetime.datetime.fromisoformat(date_string)
def timestamp(game):
return datetime.datetime.timestamp(datetime.datetime.fromisoformat(game['date']))
def to_datetime(timestamp):
return datetime.datetime.isoformat(datetime.datetime.fromtimestamp(int(timestamp)))
def make_results_filename(year, competition):
results_filename = '{}_{}.json'.format(competition, year)
return os.path.join(results_dir, results_filename)
def get_player_id(player_object):
"""Get the player id from the url (which has a format like /player/6010?period=all)"""
return int(player_object['url'].split('/')[2].split('?')[0])
def create_player_dict(game_object, player_reference):
if type(game_object[player_reference]) == str:
return {'name': game_object[player_reference], 'id': game_object[player_reference + '_id']}
return {'name': game_object[player_reference]['name'], 'id': get_player_id(game_object[player_reference])}
for competition, competition_url in results_urls.items():
# Find the latest game already stored.
results_filenames_by_year = collections.defaultdict(dict)
competition_years = []
for results_filename in os.listdir(results_dir):
file_competition, year_str = results_filename.split('.')[0].split('_')
year = int(year_str)
results_filenames_by_year[year][file_competition] = os.path.join(results_dir, results_filename)
if file_competition == competition:
competition_years.append(year)
for year in reversed(sorted(competition_years)):
results_filename = results_filenames_by_year[year][competition]
with open(results_filename) as results_file:
results = json.load(results_file)
if len(results) > 0:
break
last_stored_competition_time = str_to_date(results[-1]['date'])
# Load the results from the url.
if competition_url != None:
competition_games = requests.get(competition_url).json()
new_games_by_year = collections.defaultdict(list)
for competition_game in sorted(competition_games, key=lambda game: game['date']):
# Ignore games involving deleted players.
if competition_game['p0'] == None or competition_game['p1'] == None:
continue
# Store the results of new games.
game_date = str_to_date(competition_game['date'])
if game_date > last_stored_competition_time:
# Restrict to just the minimum data we want.
p0 = create_player_dict(competition_game, 'p0')
p1 = create_player_dict(competition_game, 'p1')
competition_game = {'date': competition_game['date'], 'map': competition_game['map'], 'p0': p0, 'p1': p1}
new_games_by_year[game_date.year].append(competition_game)
# Store any new results.
for year in sorted(new_games_by_year.keys()):
filename = make_results_filename(year, competition)
if year in results_filenames_by_year.keys() and competition in results_filenames_by_year[year].keys():
with open(filename) as results_file:
results = json.load(results_file)
results += new_games_by_year[year]
else:
results = new_games_by_year[year]
results_filenames_by_year[year][competition] = filename
print('Adding {} games to {}'.format(len(new_games_by_year[year]), filename))
with open(filename, 'w') as results_file:
json.dump(results, results_file, indent=4, sort_keys=True)
def load_existing_player_data():
with open(os.path.join(data_dir, 'players.json')) as players_file:
player_data = json.load(players_file)
return {int(player_id): name for player_id, name in player_data.items()}
with open(os.path.join(data_dir, 'replacements.json')) as replacements_file:
player_id_replacements = json.load(replacements_file)
def get_canonical_player_id(player):
player_id = str(player['id'])
while player_id in player_id_replacements:
player_id = str(player_id_replacements[player_id])
return int(player_id)
with open(os.path.join(data_dir, 'ragl.json')) as ragl_file:
ragl_details = json.load(ragl_file)
start_datetime = str_to_date('2016-01-01 00:00:00')
now = datetime.date.today()
# End date is the beginning of this week, so we're only including complete weeks.
end_date = now + datetime.timedelta(days=(10 - now.weekday()) % 7 + 1)
end_datetime = datetime.datetime.combine(end_date, datetime.datetime.min.time())
# Calculate Glicko2 rankings.
BATCH_DURATION = 7*24*60*60
MAX_RD = 350
def glicko2_init():
return glicko2.Player(vol=0.06, rating=1500, rd=MAX_RD)
def glicko2_table(ratings, games):
batch_end = int(start_datetime.timestamp())
played = collections.Counter()
won = collections.Counter()
data = collections.defaultdict(dict)
player_data = load_existing_player_data()
while batch_end < end_datetime.timestamp():
batch_start = batch_end
batch_end += BATCH_DURATION
opponent_ratings = collections.defaultdict(list)
opponent_rds = collections.defaultdict(list)
results = collections.defaultdict(list)
for game in reversed(games):
if timestamp(game) >= batch_start and timestamp(game) < batch_end:
winner = get_canonical_player_id(game['p0'])
loser = get_canonical_player_id(game['p1'])
opponent_ratings[winner].append(ratings[loser].rating)
opponent_rds[winner].append(ratings[loser].rd)
results[winner].append(1)
opponent_ratings[loser].append(ratings[winner].rating)
opponent_rds[loser].append(ratings[winner].rd)
results[loser].append(0)
played[winner] += 1
played[loser] += 1
won[winner] += 1
for player in [game['p0'], game['p1']]:
if player['id'] not in player_data:
player_data[player['id']] = {'name': player['name']}
for player_id in ratings.keys():
if len(results[player_id]) > 0:
ratings[player_id].update_player(opponent_ratings[player_id], opponent_rds[player_id], results[player_id])
else:
ratings[player_id].did_not_compete()
if ratings[player_id].rd > MAX_RD:
ratings[player_id].rd = MAX_RD
else:
data[batch_end][player_id] = {
'i': player_id,
'r': round(ratings[player_id].rating - 3 * ratings[player_id].rd),
'e': round(3 * ratings[player_id].rd),
'p': played[player_id],
'w': won[player_id]
}
return data, player_data
ratings = collections.defaultdict(glicko2_init)
results = []
per_player_ragl_games = collections.defaultdict(lambda: collections.defaultdict(lambda: {'p': 0, 'w': 0}))
for year in sorted(results_filenames_by_year.keys()):
for competition, filename in results_filenames_by_year[year].items():
with open(filename) as results_file:
competition_results = json.load(results_file)
if competition == ragl_name:
for season, seasonDates in ragl_details['seasonDates'].items():
seasonStart = seasonDates['start'].replace('T', ' ')
seasonEnd = seasonDates['end'].replace('T', ' ')
for result in competition_results:
if result['date'] >= seasonStart and result['date'] <= seasonEnd:
for p in ['p0', 'p1']:
per_player_ragl_games[get_canonical_player_id(result[p])][season]['p'] += 1
per_player_ragl_games[get_canonical_player_id(result['p0'])][season]['w'] += 1
results += competition_results
data, player_data = glicko2_table(ratings, results)
for player_id, player_ragl_games in per_player_ragl_games.items():
player_data[player_id]['ragl'] = player_ragl_games
# Create the data files.
with open(os.path.join(data_dir, 'timestamps.json'), 'w') as timestamp_file:
json.dump(sorted(data.keys()), timestamp_file, indent=4, sort_keys=True)
player_rating_data = collections.defaultdict(list)
for week_timestamp, week_data in sorted(data.items(), key=lambda item: -item[0]):
week_table = []
for player_id, player_week_data in sorted(week_data.items(), key=lambda item: -item[1]['r']):
week_table.append(player_week_data)
player_rating_data[player_id].append(dict(player_week_data))
del(player_rating_data[player_id][-1]['i'])
player_rating_data[player_id][-1]['d'] = week_timestamp
frequency = len([row for row in week_data.values() if row['r'] == player_week_data['r']])
rank = len([row for row in week_data.values() if row['r'] > player_week_data['r']]) + 1
player_rating_data[player_id][-1]['o'] = rank
# Rank percentile
player_rating_data[player_id][-1]['c'] = math.ceil(100.0 * ((rank - 1) + 0.5 * frequency) / len(week_data))
with open(os.path.join(data_dir, 'weeks', 'w{}.json'.format(week_timestamp)), 'w') as week_file:
json.dump(week_table, week_file, indent=4, sort_keys=True)
with open(os.path.join(data_dir, 'players.json'), 'w') as players_file:
json.dump(player_data, players_file, indent=4, sort_keys=True)
position_data = []
percentile_data = []
for player_id, entries in player_rating_data.items():
with open(os.path.join(data_dir, 'players', 'p{}.json'.format(player_id)), 'w') as player_rating_file:
json.dump(entries, player_rating_file, indent=4, sort_keys=True)
position_duration = collections.Counter()
percentile_duration = collections.Counter()
# Skip the week in progress and the very first week.
for entry in entries[1:-1]:
position_duration[entry['o']] += 1
percentile_duration[entry['c']] += 1
position_duration = {position: count for position, count in position_duration.items() if position in sorted(position_duration.keys())[:3]}
percentile_duration = {percentile: count for percentile, count in percentile_duration.items() if percentile in sorted(percentile_duration.keys())[:3]}
if len(position_duration) > 0:
position_data.append({'i': player_id, 'O': position_duration})
percentile_data.append({'i': player_id, 'C': percentile_duration})
position_data.sort(key=lambda entry: [[pair[0], -pair[1]] for pair in sorted(entry['O'].items())])
percentile_data.sort(key=lambda entry: [[pair[0], -pair[1]] for pair in sorted(entry['C'].items())])
with open(os.path.join(data_dir, 'records_position.json'), 'w') as records_file:
json.dump(position_data, records_file, indent=4, sort_keys=True)
with open(os.path.join(data_dir, 'records_percentile.json'), 'w') as records_file:
json.dump(percentile_data, records_file, indent=4, sort_keys=True)