Skip to content

Commit

Permalink
Merge pull request #38 from symflower/refactor-metrics-to-assessments
Browse files Browse the repository at this point in the history
Move existing metrics to assessments
  • Loading branch information
zimmski authored Apr 17, 2024
2 parents cf1e2c1 + a8e7e40 commit e72de12
Show file tree
Hide file tree
Showing 6 changed files with 186 additions and 203 deletions.
8 changes: 4 additions & 4 deletions cmd/eval-dev-quality/cmd/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,12 @@ func (command *Evaluate) Execute(args []string) (err error) {

// Check that models and languages can be evaluated by executing the "plain" repositories.
log.Printf("Checking that models and languages can be used for evaluation")
metricsPerModel := map[string]metrics.Metrics{}
metricsPerModel := map[string]metrics.Assessments{}
problemsPerModel := map[string][]error{}
{
// Ensure we report metrics for every model even if they are excluded.
for _, modelID := range command.Models {
metricsPerModel[modelID] = metrics.Metrics{}
metricsPerModel[modelID] = metrics.NewAssessments()
}

for _, languageID := range command.Languages {
Expand All @@ -114,7 +114,7 @@ func (command *Evaluate) Execute(args []string) (err error) {
language := language.Languages[languageID]

metrics, ps, err := evaluate.EvaluateRepository(model, language, filepath.Join(command.TestdataPath, language.ID(), "plain"))
metricsPerModel[modelID] = metricsPerModel[modelID].Add(metrics)
metricsPerModel[modelID].Add(metrics)
if err != nil {
ps = append(ps, err)
}
Expand Down Expand Up @@ -155,7 +155,7 @@ func (command *Evaluate) Execute(args []string) (err error) {
language := language.Languages[languageID]

metrics, ps, err := evaluate.EvaluateRepository(model, language, filepath.Join(languagePath, repository.Name()))
metricsPerModel[model.ID()] = metricsPerModel[model.ID()].Add(metrics)
metricsPerModel[model.ID()].Add(metrics)
problemsPerModel[modelID] = append(problemsPerModel[modelID], ps...)
if err != nil {
log.Printf("ERROR: Model %q encountered a hard error for language %q, repository %q: %+v", modelID, languageID, repository.Name(), err)
Expand Down
74 changes: 73 additions & 1 deletion evaluate/metrics/assessment.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
package metrics

import (
"encoding/csv"
"fmt"
"sort"
"strings"

pkgerrors "github.com/pkg/errors"
"golang.org/x/exp/maps"
)

// AssessmentKey defines a key for a numerical key-value assessment pair.
type AssessmentKey string

Expand All @@ -20,14 +30,22 @@ func RegisterAssessmentKey(key string) AssessmentKey {
}

var (
// AssessmentKeyFilesExecutes holds the successfully executed files.
AssessmentKeyFilesExecuted = RegisterAssessmentKey("files-executed")
// AssessmentKeyFilesProblems holds the files with problems.
AssessmentKeyFilesProblems = RegisterAssessmentKey("files-problems")

// AssessmentKeyCoverageStatement counts the cases where 100% coverage was reached.
AssessmentKeyCoverageStatement = RegisterAssessmentKey("coverage-statement")

// AssessmentKeyNoExcessResponse indicates that a model did not produce more content as requested.
AssessmentKeyNoExcessResponse = RegisterAssessmentKey("no-excess-response")
)

// Assessments holds a collection of numerical assessment metrics.
type Assessments map[AssessmentKey]uint

// NewAssessments create a new assessment collection.
// NewAssessments creates a new assessment collection.
func NewAssessments() Assessments {
return map[AssessmentKey]uint{}
}
Expand All @@ -51,3 +69,57 @@ func Merge(a Assessments, b Assessments) (c Assessments) {

return c
}

// String returns a string representation of the metrics.
func (a Assessments) String() string {
if a == nil {
a = NewAssessments()
}
metrics := make([]string, len(allAssessmentKeys))

for i, key := range allAssessmentKeys {
metrics[i] = fmt.Sprintf("%s=%d", key, a[key])
}

return strings.Join(metrics, ", ")
}

// StringCSV returns a CSV row string representation of the metrics.
func (a Assessments) StringCSV() (row []string) {
if a == nil {
a = NewAssessments()
}

row = make([]string, len(allAssessmentKeys))
for i, key := range allAssessmentKeys {
row[i] = fmt.Sprintf("%d", a[key])
}

return row
}

func csvHeader() []string {
return append([]string{"model"}, allAssessmentKeysStrings...)
}

// FormatStringCSV formats the given metrics as CSV.
func FormatStringCSV(metricsPerModel map[string]Assessments) (string, error) {
var out strings.Builder
csv := csv.NewWriter(&out)

if err := csv.Write(csvHeader()); err != nil {
return "", err
}
models := maps.Keys(metricsPerModel)
sort.Strings(models)
for _, model := range models {
row := metricsPerModel[model].StringCSV()

if err := csv.Write(append([]string{model}, row...)); err != nil {
return "", pkgerrors.WithStack(err)
}
}
csv.Flush()

return out.String(), nil
}
96 changes: 96 additions & 0 deletions evaluate/metrics/assessment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"testing"

"github.com/stretchr/testify/assert"
"github.com/zimmski/osutil/bytesutil"
)

func TestAssessmentsAdd(t *testing.T) {
Expand Down Expand Up @@ -114,3 +115,98 @@ func TestMerge(t *testing.T) {
},
})
}

func TestAssessmentString(t *testing.T) {
type testCase struct {
Name string

Assessment Assessments

ExpectedString string
}

validate := func(t *testing.T, tc *testCase) {
t.Run(tc.Name, func(t *testing.T) {
actualString := tc.Assessment.String()

assert.Equal(t, tc.ExpectedString, actualString)
})
}

validate(t, &testCase{
Name: "Initial Metrics",

Assessment: NewAssessments(),

ExpectedString: "files-executed=0, files-problems=0, coverage-statement=0, no-excess-response=0",
})

validate(t, &testCase{
Name: "Empty Metrics",

Assessment: Assessments{
AssessmentKeyCoverageStatement: 1,
AssessmentKeyFilesExecuted: 2,
AssessmentKeyFilesProblems: 3,
AssessmentKeyNoExcessResponse: 4,
},

ExpectedString: "files-executed=2, files-problems=3, coverage-statement=1, no-excess-response=4",
})
}

func TestFormatStringCSV(t *testing.T) {
type testCase struct {
Name string

AssessmentPerModel map[string]Assessments

ExpectedString string
}

validate := func(t *testing.T, tc *testCase) {
t.Run(tc.Name, func(t *testing.T) {
actualString, err := FormatStringCSV(tc.AssessmentPerModel)
assert.NoError(t, err)

assert.Equal(t, bytesutil.StringTrimIndentations(tc.ExpectedString), actualString)
})
}

validate(t, &testCase{
Name: "Single Empty Model",

AssessmentPerModel: map[string]Assessments{
"Model": Assessments{},
},

ExpectedString: `
model,files-executed,files-problems,coverage-statement,no-excess-response
Model,0,0,0,0
`,
})
validate(t, &testCase{
Name: "Multiple Models",

AssessmentPerModel: map[string]Assessments{
"ModelA": Assessments{
AssessmentKeyCoverageStatement: 1,
AssessmentKeyFilesExecuted: 2,
AssessmentKeyFilesProblems: 3,
AssessmentKeyNoExcessResponse: 4,
},
"ModelB": Assessments{
AssessmentKeyCoverageStatement: 1,
AssessmentKeyFilesExecuted: 2,
AssessmentKeyFilesProblems: 3,
AssessmentKeyNoExcessResponse: 4,
},
},

ExpectedString: `
model,files-executed,files-problems,coverage-statement,no-excess-response
ModelA,2,3,1,4
ModelB,2,3,1,4
`,
})
}
117 changes: 0 additions & 117 deletions evaluate/metrics/metrics.go

This file was deleted.

Loading

0 comments on commit e72de12

Please sign in to comment.