Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integration testing for the binary #46

Merged
merged 3 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cmd/eval-dev-quality/cmd/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ type Command struct {
}

// Execute executes the root command.
func Execute() {
func Execute(arguments []string) {
var parser = flags.NewNamedParser("eval-dev-quality", flags.Default)
parser.LongDescription = "Command to manage, update and actually execute the `eval-dev-quality` evaluation benchmark."
if _, err := parser.AddGroup("Common command options", "", &Command{}); err != nil {
Expand All @@ -23,7 +23,7 @@ func Execute() {
// Print the help, when there is no active command.
parser.SubcommandsOptional = true

if _, err := parser.Parse(); err != nil {
if _, err := parser.ParseArgs(arguments); err != nil {
if e, ok := err.(*flags.Error); ok && e.Type == flags.ErrHelp {
return
}
Expand Down
49 changes: 49 additions & 0 deletions cmd/eval-dev-quality/cmd/command_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package cmd

import (
"testing"

"github.com/stretchr/testify/assert"
"github.com/zimmski/osutil"
"github.com/zimmski/osutil/bytesutil"
)

func TestExecute(t *testing.T) {
type testCase struct {
Name string

Arguments []string

ExpectedOutput string
ExpectedError error
}

validate := func(t *testing.T, tc *testCase) {
t.Run(tc.Name, func(t *testing.T) {
actualOutput, actualError := osutil.Capture(func() {
Execute(tc.Arguments)
})

assert.Equal(t, tc.ExpectedOutput, string(actualOutput))
assert.Equal(t, tc.ExpectedError, actualError)
})
}

validate(t, &testCase{
Name: "No arguments should show help",

ExpectedOutput: bytesutil.StringTrimIndentations(`
Usage:
eval-dev-quality [OPTIONS] [evaluate]

Command to manage, update and actually execute the ` + "`" + `eval-dev-quality` + "`" + `
evaluation benchmark.

Help Options:
-h, --help Show this help message

Available commands:
evaluate Run an evaluation, by default with all defined models, repositories and tasks.
`),
})
}
64 changes: 64 additions & 0 deletions cmd/eval-dev-quality/cmd/evaluate_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package cmd

import (
"path/filepath"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/zimmski/osutil"
)

func TestEvaluateExecute(t *testing.T) {
type testCase struct {
Name string

Arguments []string

ExpectedOutputContains string
ExpectedError error
ExpectedResultFiles []string
}

validate := func(t *testing.T, tc *testCase) {
t.Run(tc.Name, func(t *testing.T) {
temporaryPath := t.TempDir()

actualOutput, actualError := osutil.Capture(func() {
Execute(append([]string{
"evaluate",
"--result-path", temporaryPath,
"--testdata", "../../../testdata",
}, tc.Arguments...))
})

assert.Contains(t, string(actualOutput), tc.ExpectedOutputContains)
assert.Equal(t, tc.ExpectedError, actualError)

actualResultFiles, err := osutil.FilesRecursive(temporaryPath)
require.NoError(t, err)
for i, p := range actualResultFiles {
actualResultFiles[i], err = filepath.Rel(temporaryPath, p)
require.NoError(t, err)
}
assert.Equal(t, tc.ExpectedResultFiles, actualResultFiles)
})
}

validate(t, &testCase{
Name: "Plain",

Arguments: []string{
"--language", "golang",
"--model", "symflower/symbolic-execution",
"--repository", "golang/plain",
},

ExpectedOutputContains: `Evaluation score for "symflower/symbolic-execution": score=6, coverage-statement=1, files-executed=1, response-no-error=1, response-no-excess=1, response-not-empty=1, response-with-code=1`,
ExpectedResultFiles: []string{
"evaluation.csv",
"evaluation.log",
"symflower_symbolic-execution/golang/golang/plain.log",
},
})
}
4 changes: 3 additions & 1 deletion cmd/eval-dev-quality/main.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package main

import (
"os"

"github.com/symflower/eval-dev-quality/cmd/eval-dev-quality/cmd"
)

func main() {
cmd.Execute()
cmd.Execute(os.Args[1:])
}
1 change: 1 addition & 0 deletions evaluate/metrics/testing/assessments.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"testing"

"github.com/stretchr/testify/assert"

"github.com/symflower/eval-dev-quality/evaluate/metrics"
)

Expand Down
6 changes: 3 additions & 3 deletions evaluate/repository.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,14 @@ func EvaluateRepository(resultPath string, model model.Model, language language.
return nil, problems, pkgerrors.WithStack(err)
}

filePaths, err := language.Files(dataPath)
filePaths, err := language.Files(log, dataPath)
if err != nil {
return nil, problems, pkgerrors.WithStack(err)
}

repositoryAssessment = metrics.NewAssessments()
for _, filePath := range filePaths {
assessments, err := model.GenerateTestsForFile(language, temporaryRepositoryPath, filePath)
assessments, err := model.GenerateTestsForFile(log, language, temporaryRepositoryPath, filePath)
if err != nil {
problems = append(problems, pkgerrors.WithMessage(err, filePath))

Expand All @@ -64,7 +64,7 @@ func EvaluateRepository(resultPath string, model model.Model, language language.
repositoryAssessment.Add(assessments)
repositoryAssessment[metrics.AssessmentKeyResponseNoError]++

coverage, err := language.Execute(temporaryRepositoryPath)
coverage, err := language.Execute(log, temporaryRepositoryPath)
if err != nil {
problems = append(problems, pkgerrors.WithMessage(err, filePath))

Expand Down
7 changes: 4 additions & 3 deletions language/golang.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package language

import (
"errors"
"log"
"os"
"path/filepath"
"regexp"
Expand Down Expand Up @@ -34,7 +35,7 @@ func (language *LanguageGolang) Name() (id string) {
}

// Files returns a list of relative file paths of the repository that should be evaluated.
func (language *LanguageGolang) Files(repositoryPath string) (filePaths []string, err error) {
func (language *LanguageGolang) Files(log *log.Logger, repositoryPath string) (filePaths []string, err error) {
repositoryPath, err = filepath.Abs(repositoryPath)
if err != nil {
return nil, pkgerrors.WithStack(err)
Expand Down Expand Up @@ -62,8 +63,8 @@ var languageGoCoverageMatch = regexp.MustCompile(`(?m)^coverage: (\d+\.?\d+)% of
var languageGoNoCoverageMatch = regexp.MustCompile(`(?m)^coverage: \[no statements\]$`)

// Execute invokes the language specific testing on the given repository.
func (language *LanguageGolang) Execute(repositoryPath string) (coverage float64, err error) {
stdout, _, err := util.CommandWithResult(&util.Command{
func (language *LanguageGolang) Execute(log *log.Logger, repositoryPath string) (coverage float64, err error) {
stdout, _, err := util.CommandWithResult(log, &util.Command{
Command: []string{
"gotestsum",
"--format", "standard-verbose", // Keep formatting consistent.
Expand Down
20 changes: 18 additions & 2 deletions language/golang_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import (
"github.com/stretchr/testify/require"
"github.com/zimmski/osutil"
"github.com/zimmski/osutil/bytesutil"

"github.com/symflower/eval-dev-quality/log"
)

func TestLanguageGolangFiles(t *testing.T) {
Expand All @@ -25,10 +27,17 @@ func TestLanguageGolangFiles(t *testing.T) {

validate := func(t *testing.T, tc *testCase) {
t.Run(tc.Name, func(t *testing.T) {
log, logger := log.Buffer()
defer func() {
if t.Failed() {
t.Log(log.String())
}
}()

if tc.LanguageGolang == nil {
tc.LanguageGolang = &LanguageGolang{}
}
actualFilePaths, actualError := tc.LanguageGolang.Files(tc.RepositoryPath)
actualFilePaths, actualError := tc.LanguageGolang.Files(logger, tc.RepositoryPath)

assert.Equal(t, tc.ExpectedFilePaths, actualFilePaths)
assert.Equal(t, tc.ExpectedError, actualError)
Expand Down Expand Up @@ -62,6 +71,13 @@ func TestLanguageGolangExecute(t *testing.T) {

validate := func(t *testing.T, tc *testCase) {
t.Run(tc.Name, func(t *testing.T) {
log, logger := log.Buffer()
defer func() {
if t.Failed() {
t.Log(log.String())
}
}()

temporaryPath := t.TempDir()
repositoryPath := filepath.Join(temporaryPath, filepath.Base(tc.RepositoryPath))
require.NoError(t, osutil.CopyTree(tc.RepositoryPath, repositoryPath))
Expand All @@ -73,7 +89,7 @@ func TestLanguageGolangExecute(t *testing.T) {
if tc.LanguageGolang == nil {
tc.LanguageGolang = &LanguageGolang{}
}
actualCoverage, actualError := tc.LanguageGolang.Execute(repositoryPath)
actualCoverage, actualError := tc.LanguageGolang.Execute(logger, repositoryPath)

if tc.ExpectedError != nil {
assert.ErrorIs(t, actualError, tc.ExpectedError)
Expand Down
6 changes: 4 additions & 2 deletions language/language.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package language

import (
"log"

pkgerrors "github.com/pkg/errors"
)

Expand All @@ -12,10 +14,10 @@ type Language interface {
Name() (id string)

// Files returns a list of relative file paths of the repository that should be evaluated.
Files(repositoryPath string) (filePaths []string, err error)
Files(log *log.Logger, repositoryPath string) (filePaths []string, err error)

// Execute invokes the language specific testing on the given repository.
Execute(repositoryPath string) (coverage float64, err error)
Execute(log *log.Logger, repositoryPath string) (coverage float64, err error)
}

// Languages holds a register of all languages.
Expand Down
9 changes: 9 additions & 0 deletions log/logger.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package log

import (
"bytes"
"io"
"log"
"os"
Expand All @@ -9,6 +10,14 @@ import (
pkgerrors "github.com/pkg/errors"
)

// Buffer returns a logger that writes to a buffer.
func Buffer() (buffer *bytes.Buffer, logger *log.Logger) {
buffer = new(bytes.Buffer)
logger = log.New(buffer, "", log.LstdFlags)

return buffer, logger
}

// File returns a logger that writes to a file.
func File(path string) (logger *log.Logger, loggerClose func(), err error) {
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
Expand Down
2 changes: 1 addition & 1 deletion model/llm/llm.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ func (m *llm) ID() (id string) {
}

// GenerateTestsForFile generates test files for the given implementation file in a repository.
func (m *llm) GenerateTestsForFile(language language.Language, repositoryPath string, filePath string) (assessment metrics.Assessments, err error) {
func (m *llm) GenerateTestsForFile(log *log.Logger, language language.Language, repositoryPath string, filePath string) (assessment metrics.Assessments, err error) {
data, err := os.ReadFile(filepath.Join(repositoryPath, filePath))
if err != nil {
return nil, pkgerrors.WithStack(err)
Expand Down
13 changes: 10 additions & 3 deletions model/llm/llm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ import (
"github.com/zimmski/osutil/bytesutil"

"github.com/symflower/eval-dev-quality/evaluate/metrics"
"github.com/symflower/eval-dev-quality/language"

metricstesting "github.com/symflower/eval-dev-quality/evaluate/metrics/testing"
"github.com/symflower/eval-dev-quality/language"
"github.com/symflower/eval-dev-quality/log"
providertesting "github.com/symflower/eval-dev-quality/provider/testing"
)

Expand All @@ -36,6 +36,13 @@ func TestModelLLMGenerateTestsForFile(t *testing.T) {

validate := func(t *testing.T, tc *testCase) {
t.Run(tc.Name, func(t *testing.T) {
log, logger := log.Buffer()
defer func() {
if t.Failed() {
t.Log(log.String())
}
}()

temporaryPath := t.TempDir()
temporaryPath = filepath.Join(temporaryPath, "native")
require.NoError(t, os.Mkdir(temporaryPath, 0755))
Expand All @@ -46,7 +53,7 @@ func TestModelLLMGenerateTestsForFile(t *testing.T) {
tc.SetupMock(mock)
llm := NewLLMModel(mock, tc.ModelID)

actualAssessment, actualError := llm.GenerateTestsForFile(tc.Language, temporaryPath, tc.SourceFilePath)
actualAssessment, actualError := llm.GenerateTestsForFile(logger, tc.Language, temporaryPath, tc.SourceFilePath)
assert.NoError(t, actualError)
metricstesting.AssertAssessmentsEqual(t, tc.ExpectedAssessment, actualAssessment)

Expand Down
3 changes: 2 additions & 1 deletion model/llm/prompt/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ import (
"regexp"
"strings"

"github.com/symflower/eval-dev-quality/evaluate/metrics"
"github.com/zimmski/osutil/bytesutil"

"github.com/symflower/eval-dev-quality/evaluate/metrics"
)

var (
Expand Down
2 changes: 1 addition & 1 deletion model/llm/prompt/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ import (
"testing"

"github.com/stretchr/testify/assert"
"github.com/symflower/eval-dev-quality/evaluate/metrics"
"github.com/zimmski/osutil/bytesutil"

"github.com/symflower/eval-dev-quality/evaluate/metrics"
metricstesting "github.com/symflower/eval-dev-quality/evaluate/metrics/testing"
)

Expand Down
4 changes: 3 additions & 1 deletion model/model.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package model

import (
"log"

"github.com/symflower/eval-dev-quality/evaluate/metrics"
"github.com/symflower/eval-dev-quality/language"
)
Expand All @@ -11,5 +13,5 @@ type Model interface {
ID() (id string)

// GenerateTestsForFile generates test files for the given implementation file in a repository.
GenerateTestsForFile(language language.Language, repositoryPath string, filePath string) (assessments metrics.Assessments, err error)
GenerateTestsForFile(log *log.Logger, language language.Language, repositoryPath string, filePath string) (assessments metrics.Assessments, err error)
}
Loading
Loading