From 51dd25c935e3351d2828b3c97af3d75b8a0e24ef Mon Sep 17 00:00:00 2001 From: James Kunstle Date: Thu, 21 Sep 2023 11:37:26 -0400 Subject: [PATCH] updated route redirections Signed-off-by: James Kunstle --- 8Knot/_login.py | 10 +- 8Knot/pages/chaoss/chaoss.py | 4 +- .../contrib_importance_over_time.py | 394 +++++++++--------- 8Knot/pages/index/index_layout.py | 4 +- 8Knot/pages/overview/overview.py | 12 +- 5 files changed, 221 insertions(+), 203 deletions(-) diff --git a/8Knot/_login.py b/8Knot/_login.py index b9b06b92..1895c4c8 100644 --- a/8Knot/_login.py +++ b/8Knot/_login.py @@ -39,7 +39,7 @@ def configure_server_login(server): # create flask-login object login = LoginManager(server) - login.login_view = "index" + login.login_view = "/" class User(UserMixin): def __init__(self, id): @@ -66,7 +66,7 @@ def load_user(id): @server.route("/logout/") def logout(): - users_cache = StrictRedis( + users_cache = redis.StrictRedis( host="redis-users", port=6379, password=os.getenv("REDIS_PASSWORD", ""), @@ -102,7 +102,7 @@ def oauth2_authorize(): provider = os.environ.get("OAUTH_CLIENT_NAME") if not current_user.is_anonymous: - return redirect(url_for("index")) + return redirect("/") provider_data = current_app.config["OAUTH2_PROVIDERS"].get(provider) if provider_data is None: @@ -140,7 +140,7 @@ def oauth2_callback(): provider = os.environ.get("OAUTH_CLIENT_NAME") if not current_user.is_anonymous: - return redirect(url_for("index")) + return redirect("/") provider_data = current_app.config["OAUTH2_PROVIDERS"].get(provider) if provider_data is None: @@ -151,7 +151,7 @@ def oauth2_callback(): for k, v in request.args.items(): if k.startswith("error"): flash(f"{k}: {v}") - return redirect(url_for("index")) + return redirect("/") # make sure that the state parameter matches the one we created in the # authorization request diff --git a/8Knot/pages/chaoss/chaoss.py b/8Knot/pages/chaoss/chaoss.py index dbdb3b16..57530964 100644 --- a/8Knot/pages/chaoss/chaoss.py +++ b/8Knot/pages/chaoss/chaoss.py @@ -10,7 +10,7 @@ from .visualizations.contrib_activity_cycle import gc_contrib_activity_cycle from .visualizations.contribs_by_action import gc_contribs_by_action from .visualizations.project_velocity import gc_project_velocity -from .visualizations.contrib_importance_over_time import gc_contrib_prolificacy_over_time +from .visualizations.contrib_importance_over_time import gc_contrib_prolificacy_over_time from .visualizations.contrib_importance_pie import gc_contrib_importance_pie warnings.filterwarnings("ignore") @@ -49,7 +49,7 @@ [ dbc.Col(gc_contrib_prolificacy_over_time, width=6), dbc.Col(gc_contrib_importance_pie, width=6), - ], + ], align="center", style={"marginBottom": ".5%"}, ), diff --git a/8Knot/pages/chaoss/visualizations/contrib_importance_over_time.py b/8Knot/pages/chaoss/visualizations/contrib_importance_over_time.py index a4263cce..e169f265 100644 --- a/8Knot/pages/chaoss/visualizations/contrib_importance_over_time.py +++ b/8Knot/pages/chaoss/visualizations/contrib_importance_over_time.py @@ -18,8 +18,8 @@ import datetime as dt from scipy import stats -PAGE = "chaoss" -VIZ_ID = "contrib-prolificacy-over-time" +PAGE = "chaoss" +VIZ_ID = "contrib-prolificacy-over-time" gc_contrib_prolificacy_over_time = dbc.Card( [ @@ -33,21 +33,23 @@ dbc.Popover( [ dbc.PopoverHeader("Graph Info:"), - dbc.PopoverBody(""" - This analysis is also referred to as "Bus Factor". For each action type, visualizes - the smallest group of contributors who account for a user-inputted percentage - of the total number of contributions. By default, the threshold is set to 50%. - Thus, the visualization will show the number of contributors who account for + dbc.PopoverBody( + """ + This analysis is also referred to as "Bus Factor". For each action type, visualizes + the smallest group of contributors who account for a user-inputted percentage + of the total number of contributions. By default, the threshold is set to 50%. + Thus, the visualization will show the number of contributors who account for 50% of all contributions made, per action type. Suppose two individuals authored 50% of the commits, then the contributor prolificacy is 2. Analysis is done over - a time range, and snapshots of the time range are set according to window width - and step size. By default, window width and step size are set to 6 months. - Thus, contributor prolificacy is calculated for each non-overlapping 6-month - snapshot of the time range provided. Optionally, contributors who have 'bot' or - any custom keyword(s) in their logins can be filtered out. Please note that gaps - in the graph indicate that no contributions of a specific action type(s) were made - during that time period. - """), + a time range, and snapshots of the time range are set according to window width + and step size. By default, window width and step size are set to 6 months. + Thus, contributor prolificacy is calculated for each non-overlapping 6-month + snapshot of the time range provided. Optionally, contributors who have 'bot' or + any custom keyword(s) in their logins can be filtered out. Please note that gaps + in the graph indicate that no contributions of a specific action type(s) were made + during that time period. + """ + ), ], id=f"popover-{PAGE}-{VIZ_ID}", target=f"popover-target-{PAGE}-{VIZ_ID}", # needs to be the same as dbc.Button id @@ -58,66 +60,67 @@ dcc.Graph(id=f"{PAGE}-{VIZ_ID}"), ), dbc.Form( - [ + [ dbc.Row( - [ dbc.Label( + [ + dbc.Label( "Threshold:", html_for=f"threshold-{PAGE}-{VIZ_ID}", width="auto", ), dbc.Col( - [dcc.Slider( - id=f"threshold-{PAGE}-{VIZ_ID}", - min=10, - max=95, - value=50, - marks={i: f'{i}%' for i in range(10, 100, 5)} - ), - ], + [ + dcc.Slider( + id=f"threshold-{PAGE}-{VIZ_ID}", + min=10, + max=95, + value=50, + marks={i: f"{i}%" for i in range(10, 100, 5)}, + ), + ], className="me-2", width=10, ), ], - align="center", - ), + align="center", + ), dbc.Row( - [ dbc.Label( - "Window Width:", - html_for=f"window-width-{PAGE}-{VIZ_ID}", - width="auto", - ), - + [ + dbc.Label( + "Window Width:", + html_for=f"window-width-{PAGE}-{VIZ_ID}", + width="auto", + ), dbc.Col( dbc.Input( - id=f"window-width-{PAGE}-{VIZ_ID}", - type="number", - min=1, - max=12, - step=1, - value=6, - size="sm", - ), - className="me-2", - width = 1, - ), - + id=f"window-width-{PAGE}-{VIZ_ID}", + type="number", + min=1, + max=12, + step=1, + value=6, + size="sm", + ), + className="me-2", + width=1, + ), dbc.Label( - "Step Size:", - html_for=f"step-size-{PAGE}-{VIZ_ID}", - width="auto", - ), + "Step Size:", + html_for=f"step-size-{PAGE}-{VIZ_ID}", + width="auto", + ), dbc.Col( dbc.Input( - id=f"step-size-{PAGE}-{VIZ_ID}", - type="number", - min=1, - max=12, - step=1, - value=6, - size="sm", - ), + id=f"step-size-{PAGE}-{VIZ_ID}", + type="number", + min=1, + max=12, + step=1, + value=6, + size="sm", + ), className="me-2", - width=1, + width=1, ), dbc.Alert( children="Please ensure that 'Step Size' is less than or equal to 'Window Size'", @@ -127,13 +130,16 @@ is_open=False, color="warning", ), - ], - align="center" - ), + ], + align="center", + ), dbc.Row( - [ dbc.Label("Filter Out Contributors with Keyword(s) in Login:", + [ + dbc.Label( + "Filter Out Contributors with Keyword(s) in Login:", html_for=f"patterns-{PAGE}-{VIZ_ID}", - width="auto",), + width="auto", + ), dbc.Col( [ dmc.MultiSelect( @@ -146,39 +152,40 @@ creatable=True, searchable=True, ), - ], - className = "me-2", + ], + className="me-2", ), ], align="center", ), - dbc.Row( - [ dbc.Col( + [ + dbc.Col( dcc.DatePickerRange( - id=f"date-picker-range-{PAGE}-{VIZ_ID}", - min_date_allowed=dt.date(2005, 1, 1), - max_date_allowed=dt.date.today(), - initial_visible_month=dt.date(dt.date.today().year, 1, 1), - clearable=True, + id=f"date-picker-range-{PAGE}-{VIZ_ID}", + min_date_allowed=dt.date(2005, 1, 1), + max_date_allowed=dt.date.today(), + initial_visible_month=dt.date(dt.date.today().year, 1, 1), + clearable=True, ), width="auto", ), - - dbc.Col( - [dbc.Button( - "About Graph", - id=f"popover-target-{PAGE}-{VIZ_ID}", - color="secondary", - size="sm", - ), ], + dbc.Col( + [ + dbc.Button( + "About Graph", + id=f"popover-target-{PAGE}-{VIZ_ID}", + color="secondary", + size="sm", + ), + ], width="auto", style={"paddingTop": ".5em"}, ), - ], + ], align="center", justify="between", - ), + ), ] ), ] @@ -197,6 +204,7 @@ def toggle_popover(n, is_open): return not is_open return is_open + # callback for dynamically changing the graph title @callback( Output(f"graph-title-{PAGE}-{VIZ_ID}", "children"), @@ -206,11 +214,12 @@ def graph_title(window_width): title = f"Contributor Prolificacy in {window_width} Month Windows" return title + # callback for contrib-prolificacy-over-time graph @callback( Output(f"{PAGE}-{VIZ_ID}", "figure"), Output(f"check-alert-{PAGE}-{VIZ_ID}", "is_open"), - [ + [ Input("repo-choices", "data"), Input(f"patterns-{PAGE}-{VIZ_ID}", "value"), Input(f"threshold-{PAGE}-{VIZ_ID}", "value"), @@ -221,12 +230,13 @@ def graph_title(window_width): ], background=True, ) - -def create_contrib_prolificacy_over_time_graph(repolist, patterns, threshold, window_width, step_size, start_date, end_date): +def create_contrib_prolificacy_over_time_graph( + repolist, patterns, threshold, window_width, step_size, start_date, end_date +): # main function for all data pre processing cache = cm() df = cache.grabm(func=ctq, repos=repolist) - + while df is None: time.sleep(1.0) df = cache.grabm(func=ctq, repos=repolist) @@ -239,31 +249,26 @@ def create_contrib_prolificacy_over_time_graph(repolist, patterns, threshold, wi if df.empty: logging.warning(f"{VIZ_ID} - NO DATA AVAILABLE") return nodata_graph - + # if the step size is greater than window width raise Alert if step_size > window_width: return dash.no_update, True - - df_final = process_data(df, - patterns, - threshold, - window_width, - step_size, - start_date, - end_date) + + df_final = process_data(df, patterns, threshold, window_width, step_size, start_date, end_date) fig = create_figure(df_final, step_size) logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}") return fig, False + def process_data(df, patterns, threshold, window_width, step_size, start_date, end_date): # convert to datetime objects rather than strings df["created_at"] = pd.to_datetime(df["created_at"], utc=True) - + # order values chronologically by created_at date - df = df.sort_values(by='created_at', ascending=True) + df = df.sort_values(by="created_at", ascending=True) # if the start_date and/or the end date is not specified set them to the beginning and most recent created_at date if start_date is None: @@ -273,157 +278,168 @@ def process_data(df, patterns, threshold, window_width, step_size, start_date, e if patterns: # remove rows where Login column value contains the substring 'bot' - patterns_mask = df['login'].str.contains("|".join(patterns), na=False) + patterns_mask = df["login"].str.contains("|".join(patterns), na=False) df = df[~patterns_mask] - + # threshold is an integer value eg. 10, 20,..., 90 since dcc.Slider only accepts integers as values # divide by 100 to convert it to a decimal representation of a percentage eg. 0.10, 0.20,..., 0.90 threshold = threshold / 100 # create bins with a size equivalent to the the step size starting from the start date up to the end date - period_from = pd.date_range(start=start_date, end=end_date, freq=f'{step_size}m', inclusive="both") - # store the period_from dates in a df + period_from = pd.date_range(start=start_date, end=end_date, freq=f"{step_size}m", inclusive="both") + # store the period_from dates in a df df_final = period_from.to_frame(index=False, name="period_from") # calculate the end of each interval and store the values in a column named period_from - df_final['period_to'] = df_final['period_from'] + pd.DateOffset(months = window_width) + df_final["period_to"] = df_final["period_from"] + pd.DateOffset(months=window_width) # dynamically calculate the contributor prolificacy over time for each of the action times and store results in df_final - (df_final['Commit'], - df_final['Issue Opened'], - df_final['PR Opened'], - df_final['PR Comment'], - df_final['PR Review']) = zip(*df_final.apply(lambda row: cntrb_prolificacy_over_time(df, row.period_from, row.period_to, window_width, threshold), axis=1)) - + ( + df_final["Commit"], + df_final["Issue Opened"], + df_final["PR Opened"], + df_final["PR Comment"], + df_final["PR Review"], + ) = zip( + *df_final.apply( + lambda row: cntrb_prolificacy_over_time(df, row.period_from, row.period_to, window_width, threshold), axis=1 + ) + ) + return df_final + def create_figure(df_final, step_size): # create plotly express line graph fig = go.Figure( [ - go.Scatter(name = 'Commit', - x = df_final['period_from'], - y = df_final['Commit'], - mode='lines', - showlegend=True, - marker=dict(color=color_seq[0]) - ), - - go.Scatter(name = 'Issue Opened', - x = df_final['period_from'], - y = df_final['Issue Opened'], - mode='lines', - showlegend=True, - marker=dict(color=color_seq[1]) - ), - - go.Scatter(name = 'PR Opened', - x = df_final['period_from'], - y = df_final['PR Opened'], - mode='lines', - showlegend=True, - marker=dict(color=color_seq[2]) - ), - - go.Scatter(name = 'PR Request Comment', - x = df_final['period_from'], - y = df_final['PR Comment'], - mode='lines', - showlegend=True, - marker=dict(color=color_seq[3]) - ), - - go.Scatter(name = 'PR Request Review', - x = df_final['period_from'], - y = df_final['PR Review'], - mode='lines', - showlegend=True, - marker=dict(color=color_seq[4]) - ) - ], + go.Scatter( + name="Commit", + x=df_final["period_from"], + y=df_final["Commit"], + mode="lines", + showlegend=True, + marker=dict(color=color_seq[0]), + ), + go.Scatter( + name="Issue Opened", + x=df_final["period_from"], + y=df_final["Issue Opened"], + mode="lines", + showlegend=True, + marker=dict(color=color_seq[1]), + ), + go.Scatter( + name="PR Opened", + x=df_final["period_from"], + y=df_final["PR Opened"], + mode="lines", + showlegend=True, + marker=dict(color=color_seq[2]), + ), + go.Scatter( + name="PR Request Comment", + x=df_final["period_from"], + y=df_final["PR Comment"], + mode="lines", + showlegend=True, + marker=dict(color=color_seq[3]), + ), + go.Scatter( + name="PR Request Review", + x=df_final["period_from"], + y=df_final["PR Review"], + mode="lines", + showlegend=True, + marker=dict(color=color_seq[4]), + ), + ], ) - + # define x-axis and y-axis titles and intialize first x-axis tick to start at the user-inputted start_date - start_date = min(df_final['period_from']) - - # update xaxes to display ticks, only show ticks every other year - fig.update_xaxes(showgrid=True, - ticklabelmode="period", - tickangle = 0, - dtick=f"M24", - tickformat="%b %Y",) - - # hover template styling - fig.update_traces(textposition="top right", - hovertemplate= "Date: %{x}" + "
Contributor prolificacy: %{y}
", - ) - + start_date = min(df_final["period_from"]) + + # update xaxes to display ticks, only show ticks every other year + fig.update_xaxes( + showgrid=True, + ticklabelmode="period", + tickangle=0, + dtick=f"M24", + tickformat="%b %Y", + ) + + # hover template styling + fig.update_traces( + textposition="top right", + hovertemplate="Date: %{x}" + "
Contributor prolificacy: %{y}
", + ) + # layout syling fig.update_layout( - xaxis_title=f'Timeline (stepsize = {step_size} months)', + xaxis_title=f"Timeline (stepsize = {step_size} months)", xaxis=dict(tick0=start_date), - yaxis_title='Contributor Prolificacy', + yaxis_title="Contributor Prolificacy", font=dict(size=14), margin_b=40, - legend_title='Action Type' - ) + legend_title="Action Type", + ) return fig + def cntrb_prolificacy_over_time(df, period_from, period_to, window_width, threshold): # subset df such that the rows correspond to the window of time defined by period from and period to - time_mask = (df['created_at'] >= period_from) & (df['created_at'] <= period_to) + time_mask = (df["created_at"] >= period_from) & (df["created_at"] <= period_to) df_in_range = df.loc[time_mask] - + # initialize varibles to store contributor prolificacy accoding to action type commit, issueOpened, prOpened, prReview, prComment = None, None, None, None, None # count the number of contributions each contributor has made according each action type - df_count_cntrbs = df_in_range.groupby(['Action', 'cntrb_id'])['cntrb_id'].count().to_frame() - df_count_cntrbs = df_count_cntrbs.rename(columns={'cntrb_id': 'count'}).reset_index() + df_count_cntrbs = df_in_range.groupby(["Action", "cntrb_id"])["cntrb_id"].count().to_frame() + df_count_cntrbs = df_count_cntrbs.rename(columns={"cntrb_id": "count"}).reset_index() # pivot df such that the column names correspond to the different action types, index is the cntrb_ids, and the values are the number of contributions of each contributor - df_count_cntrbs = df_count_cntrbs.pivot(index='cntrb_id', columns='Action', values='count') + df_count_cntrbs = df_count_cntrbs.pivot(index="cntrb_id", columns="Action", values="count") - commit = calc_cntrb_prolificacy(df_count_cntrbs, 'Commit', threshold) - issueOpened = calc_cntrb_prolificacy(df_count_cntrbs, 'Issue Opened', threshold) - prOpened = calc_cntrb_prolificacy(df_count_cntrbs, 'PR Opened', threshold) - prReview = calc_cntrb_prolificacy(df_count_cntrbs, 'PR Review', threshold) - prComment = calc_cntrb_prolificacy(df_count_cntrbs, 'PR Comment', threshold) + commit = calc_cntrb_prolificacy(df_count_cntrbs, "Commit", threshold) + issueOpened = calc_cntrb_prolificacy(df_count_cntrbs, "Issue Opened", threshold) + prOpened = calc_cntrb_prolificacy(df_count_cntrbs, "PR Opened", threshold) + prReview = calc_cntrb_prolificacy(df_count_cntrbs, "PR Review", threshold) + prComment = calc_cntrb_prolificacy(df_count_cntrbs, "PR Comment", threshold) return commit, issueOpened, prOpened, prReview, prComment + def calc_cntrb_prolificacy(df, action_type, threshold): # if the df is empty return None if df.empty: return None - + # if the specified action type is not in the dfs' cols return None if action_type not in df.columns: - return None - + return None + # sort rows in df based on number of contributions from greatest to least df = df.sort_values(by=action_type, ascending=False) - # calculate the threshold amount of contributions + # calculate the threshold amount of contributions thresh_cntrbs = df[action_type].sum() * threshold - + # drop rows where the cntrb_id is None - mask = df.index.get_level_values('cntrb_id') == None + mask = df.index.get_level_values("cntrb_id") == None df = df[~mask] # initilize running sum of contributors who make up contributor prolificacy - cntrb_prolificacy = 0 + cntrb_prolificacy = 0 # initialize running sum of contributions running_sum = 0 - + for _, row in df.iterrows(): - running_sum += row[action_type] # update the running sum by the number of contributions a contributor has made - cntrb_prolificacy+=1 # update contributor prolificacy + running_sum += row[action_type] # update the running sum by the number of contributions a contributor has made + cntrb_prolificacy += 1 # update contributor prolificacy # if the running sum of contributions is greater than or equal to the threshold amount, break if running_sum >= thresh_cntrbs: break return cntrb_prolificacy - - diff --git a/8Knot/pages/index/index_layout.py b/8Knot/pages/index/index_layout.py index fc8e8523..3848c327 100644 --- a/8Knot/pages/index/index_layout.py +++ b/8Knot/pages/index/index_layout.py @@ -25,7 +25,9 @@ ] ), dbc.NavItem( - dbc.NavLink("Refresh Groups", id="refresh-button", disabled=True), + dcc.Loading( + dbc.NavLink("Refresh Groups", id="refresh-button", disabled=False), + ) ), dbc.NavItem( dbc.NavLink( diff --git a/8Knot/pages/overview/overview.py b/8Knot/pages/overview/overview.py index e224cd92..af947811 100644 --- a/8Knot/pages/overview/overview.py +++ b/8Knot/pages/overview/overview.py @@ -65,12 +65,12 @@ style={"marginBottom": ".5%"}, ), dbc.Row( - [ - dbc.Col(gc_commits_over_time, width=6), - ], - align="center", - style={"marginBottom": ".5%"}, - ), + [ + dbc.Col(gc_commits_over_time, width=6), + ], + align="center", + style={"marginBottom": ".5%"}, + ), ], fluid=True, )