Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Aggregated Logbook #41

Closed
wants to merge 6 commits into from
Closed
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 112 additions & 1 deletion src/boardlib/api/aurora.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
import bs4
import requests
import pandas as pd

from datetime import datetime as dt
from dateutil.relativedelta import relativedelta
import boardlib.util.grades

HOST_BASES = {
Expand Down Expand Up @@ -580,5 +581,115 @@ def logbook_entries(board, username, password, grade_type="font", db_path=None):

return full_logbook_df

def bids_logbook_entries_agg(board, token, user_id, db_path=None):
Copy link
Owner

@lemeryfertitta lemeryfertitta Aug 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My suggestion in the other PR was that we could refactor that PR to adjust the params to get the output needed for the Climbdex changes. It looks like this PR just duplicates everything. E.g. we could refactor bids_logbook_entries to take a token instead of user/password and we can parametrize which columns are yielded to get the additional columns that this PR adds.

Seems like there is a lot of repeated code here that will be difficult to maintain in the future. Let me know if I'm missing something here and there is an issue with both features using the same underlying functions with slightly different parameters instead of duplicated functions with slightly altered functionality.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

makes sense, we tried our best to refactor the function, see comment below.

raw_entries = get_bids_logbook(board, token, user_id)

for raw_entry in raw_entries:
yield {
"climb_uuid": raw_entry["climb_uuid"],
"board": board,
"climb_name": get_climb_name_from_db(db_path, raw_entry["climb_uuid"]) if db_path else get_climb_name(board, raw_entry["climb_uuid"]),
"angle": raw_entry["angle"],
"is_mirror": raw_entry["is_mirror"],
"climbed_at": raw_entry["climbed_at"],
"bid_count": raw_entry["bid_count"],
}

def summarize_bids_agg(bids_df):
bids_summary = bids_df.groupby(['climb_uuid', 'climb_name', 'board', bids_df['climbed_at'].dt.date, 'is_mirror', 'angle']).agg({
'bid_count': 'sum'
}).reset_index().rename(columns={'climbed_at': 'date'})
bids_summary['tries'] = bids_summary['bid_count']
return bids_summary

def days_to_years_months_days(days):
start_date = dt.now()
end_date = start_date - pd.Timedelta(days=days)
delta = relativedelta(start_date, end_date)

parts = []
if delta.years > 0:
parts.append(f"{delta.years} year{'s' if delta.years > 1 else ''}")
if delta.months > 0:
parts.append(f"{delta.months} month{'s' if delta.months > 1 else ''}")
parts.append(f"{delta.days} day{'s' if delta.days > 1 else ''}")
return ' and '.join(parts)

def combine_ascents_and_bids_agg(ascents_df, bids_summary):
final_logbook = []

for _, ascent_row in ascents_df.iterrows():
ascent_date = ascent_row['date'].date()
ascent_climb_uuid = ascent_row['climb_uuid']

bid_match = bids_summary[
(bids_summary['climb_uuid'] == ascent_climb_uuid) &
(bids_summary['date'] == ascent_date) &
(bids_summary['is_mirror'] == ascent_row['is_mirror']) &
(bids_summary['angle'] == ascent_row['angle'])
]

if not bid_match.empty:
bid_row = bid_match.iloc[0]
final_logbook.append({
'uid': f"{ascent_row['climb_uuid']}-{ascent_row['angle']}",
'board': ascent_row['board'],
'climb_name': ascent_row['name'],
'date': ascent_row['date'],
'tries': ascent_row['tries'] + bid_row['tries']
})
bids_summary = bids_summary.drop(bid_match.index)
else:
final_logbook.append({
'uid': f"{ascent_row['climb_uuid']}-{ascent_row['angle']}",
'board': ascent_row['board'],
'climb_name': ascent_row['name'],
'date': ascent_row['date'],
'tries': ascent_row['tries']
})

for _, bid_row in bids_summary.iterrows():
final_logbook.append({
'uid': f"{bid_row['climb_uuid']}-{bid_row['angle']}",
'board': bid_row['board'],
'climb_name': bid_row['climb_name'],
'date': bid_row['date'],
'tries': bid_row['tries']
})
return final_logbook

def logbook_entries_agg(board, token, user_id, db_path=None, grade_type="font"):
bids_entries = list(bids_logbook_entries_agg(board, token, user_id, db_path))
raw_ascents_entries = get_logbook(board, token, user_id)

if not bids_entries and not raw_ascents_entries:
return pd.DataFrame(columns=['uid', 'board', 'climb_name'])

bids_summary = pd.DataFrame(columns=['climb_uuid', 'climb_name', 'board', 'date', 'is_mirror', 'angle', 'tries'])
if bids_entries:
bids_df = pd.DataFrame(bids_entries)
bids_df['climbed_at'] = pd.to_datetime(bids_df['climbed_at'])
bids_summary = summarize_bids_agg(bids_df)

ascents_df = pd.DataFrame(columns=['board', 'angle', 'climb_uuid', 'name', 'date', 'tries', 'is_mirror'])
if raw_ascents_entries:
grades = get_grades(board)
grades_dict = {grade['difficulty']: grade for grade in grades}
ascents_entries = process_raw_ascent_entries(raw_ascents_entries, board, db_path, grades_dict, grade_type)
ascents_df = pd.DataFrame(ascents_entries)

final_logbook = combine_ascents_and_bids_agg(ascents_df, bids_summary)

full_logbook_df = pd.DataFrame(final_logbook)
full_logbook_df['date'] = pd.to_datetime(full_logbook_df['date'])

aggregated_logbook = full_logbook_df.groupby(['uid', 'board', 'climb_name']).agg(
date=('date', 'max'),
sessions=('uid', 'count'),
tries=('tries', 'sum')
).reset_index()

current_date = dt.now()
aggregated_logbook['days'] = aggregated_logbook['date'].apply(lambda x: (current_date - pd.to_datetime(x)).days)
aggregated_logbook['time_since'] = aggregated_logbook['days'].apply(days_to_years_months_days)
return aggregated_logbook