Skip to content

Commit

Permalink
grading: New input difficulty algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
jackrosenthal committed Feb 17, 2024
1 parent f494cd9 commit 4656dd8
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 36 deletions.
45 changes: 19 additions & 26 deletions algobowl/controllers/competition.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,25 +31,21 @@

GradingTuple = recordclass(
"GradingTuple",
["rankings", "fleet", "verification", "input", "contributions", "evaluations"],
["rankings", "fleet", "verification", "input_ones", "contributions", "evaluations"],
)

GradingContributionTuple = recordclass(
"GradingContributionTuple",
["participation", "verification", "input_difficulty", "ranking"],
defaults=[0, 0, 0, 0],
["participation", "verification", "input_submitted", "input_difficulty", "ranking"],
defaults=[0, 0, 0, 0, 0],
)

GradingInputTuple = recordclass("GradingInputTuple", ["scores_l", "scores_s"])

GradingVerificationTuple = recordclass(
"GradingVerificationTuple",
["correct", "false_positives", "false_negatives"],
defaults=[0, 0, 0],
)

CompInfoTuple = recordclass("CompInfoTuple", ["inputs", "best_input_difference"])

CompetitionYearTuple = recordclass("CompetitionYearTuple", ["year", "competitions"])


Expand Down Expand Up @@ -303,14 +299,14 @@ def new_gt(rankings_entry):
rankings_entry,
None,
GradingVerificationTuple(),
GradingInputTuple([], set()),
0,
GradingContributionTuple(),
defaultdict(dict),
)

groups = {k: new_gt(v) for k, v in rankings["groups"].items()}

compinfo = CompInfoTuple(len(rankings["inputs"]), 0)
num_inputs = len(rankings["inputs"])
benchmark_groups = []

# in the case a group submitted an input but has no outputs
Expand All @@ -328,7 +324,7 @@ def new_gt(rankings_entry):
if group not in groups.keys():
rankings_entry = GroupEntry()
# If they submitted nothing, then everything is a "reject"
rankings_entry.reject_count = compinfo.inputs
rankings_entry.reject_count = num_inputs
groups[group] = new_gt(rankings_entry)

benchmark_groups.sort(key=lambda gt: -gt.rankings.adj_score)
Expand All @@ -350,15 +346,12 @@ def new_gt(rankings_entry):
else:
gt.verification.false_negatives += 1

# compute input unique ranks
# Compute input difficulty
for iput, st in gt.rankings.input_ranks.items():
if iput.group.incognito or iput.group.benchmark:
continue
if st.rank is None:
groups[iput.group].input.scores_s.add("R{}".format(id(st)))
else:
groups[iput.group].input.scores_s.add(st.score)
groups[iput.group].input.scores_l.append(st.score)
if st.rank == 1:
groups[iput.group].input_ones += 1

gt.fleet = 0
for bench_gt in benchmark_groups:
Expand All @@ -369,10 +362,6 @@ def new_gt(rankings_entry):
for fleet in fleets:
fleet.sort(key=lambda gt: -gt.rankings.adj_score)

compinfo.best_input_difference = max(
len(g.input.scores_s) for g in groups.values()
)

for group, gt in groups.items():
gt.contributions.ranking = compute_rankings_grade(
gt, gt.fleet, fleets[gt.fleet]
Expand All @@ -383,13 +372,17 @@ def new_gt(rankings_entry):
else 0
)
gt.contributions.participation = (
(compinfo.inputs - gt.rankings.reject_count) / compinfo.inputs * 50
)
gt.contributions.input_difficulty = (
(5 + len(gt.input.scores_s) / compinfo.best_input_difference * 10)
if gt.input.scores_l
else 0
(num_inputs - gt.rankings.reject_count) / num_inputs * 50
)
if group.input:
gt.contributions.input_submitted = 5
gt.contributions.input_difficulty = max(
10 - (gt.input_ones - 1),
0,
)
else:
gt.contributions.input_submitted = 0
gt.contributions.input_difficulty = 0

for group, gt in groups.items():
for from_member in group.users:
Expand Down
9 changes: 7 additions & 2 deletions algobowl/templates/competition/grade.xhtml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
<th>Group</th>
<th>Name</th>
<th>Verification</th>
<th>Input Submitted</th>
<th>Input Difficulty</th>
<th>Participation</th>
<th>Rankings</th>
Expand All @@ -36,9 +37,13 @@
${gt.verification.false_positives}FP,
${gt.verification.false_negatives}FN)
</td>
<td>
${format_percent(gt.contributions.input_submitted)}
(${"Yes" if gt.contributions.input_submitted else "No"})
</td>
<td>
${format_percent(gt.contributions.input_difficulty)}
(${len(gt.input.scores_s)})
(${gt.input_ones})
</td>
<td>
${format_percent(gt.contributions.participation)}
Expand All @@ -57,7 +62,7 @@
if max(gt.evaluations[u].values()) > 0.45
else 'bg-secondary text-light'}"
py:with="mult=sum(gt.evaluations[u].values())">
<td colspan="2">
<td colspan="3">
${u}
</td>
<td colspan="4">
Expand Down
17 changes: 9 additions & 8 deletions docs/rules.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,15 @@ UNIX-style newlines).

* 5 points for uploading any valid input.
* 10 points will be awarded based on how "difficult" your input was.
The more unique outputs get uploaded for your input, the more
difficult we consider it. For example, if all outputs that were
uploaded for your input had a different score, you'd get a full 10
points. If everyone uploads the same output for your input (likely,
it was not difficult to find the optimal solution), you'll get no
points in this category. When designing your input, keep this in
mind. Try to design an input you think will be very hard to find
the optimal solution.
Difficulty is defined by the number of groups which find the best
solution. You will be awarded 10 points if only one group finds
the best solution, and we subtract 1 point for each additional group
which finds the best solution. For example, if 4 groups all get a
rank of 1 on your input, you will be awarded `10-(4-1) = 7` points.
If 11 or more groups get a rank of 1, you'll be awarded no points in
this category. When designing your input, keep this in mind. Try
to design an input you think will be very hard to find the optimal
solution.

### Default

Expand Down

0 comments on commit 4656dd8

Please sign in to comment.