From 8b1e58a1c3205022a9e79b31fb1162a608f92950 Mon Sep 17 00:00:00 2001 From: Markus Zimmermann Date: Wed, 17 Apr 2024 15:03:10 +0200 Subject: [PATCH 1/3] refactor, Allow to set OS arguments for CLI commands to make them testable --- cmd/eval-dev-quality/cmd/command.go | 4 ++-- cmd/eval-dev-quality/main.go | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cmd/eval-dev-quality/cmd/command.go b/cmd/eval-dev-quality/cmd/command.go index eacfbda2..0597441b 100644 --- a/cmd/eval-dev-quality/cmd/command.go +++ b/cmd/eval-dev-quality/cmd/command.go @@ -13,7 +13,7 @@ type Command struct { } // Execute executes the root command. -func Execute() { +func Execute(arguments []string) { var parser = flags.NewNamedParser("eval-dev-quality", flags.Default) parser.LongDescription = "Command to manage, update and actually execute the `eval-dev-quality` evaluation benchmark." if _, err := parser.AddGroup("Common command options", "", &Command{}); err != nil { @@ -23,7 +23,7 @@ func Execute() { // Print the help, when there is no active command. parser.SubcommandsOptional = true - if _, err := parser.Parse(); err != nil { + if _, err := parser.ParseArgs(arguments); err != nil { if e, ok := err.(*flags.Error); ok && e.Type == flags.ErrHelp { return } diff --git a/cmd/eval-dev-quality/main.go b/cmd/eval-dev-quality/main.go index 2c6d89f3..8f604996 100644 --- a/cmd/eval-dev-quality/main.go +++ b/cmd/eval-dev-quality/main.go @@ -1,9 +1,11 @@ package main import ( + "os" + "github.com/symflower/eval-dev-quality/cmd/eval-dev-quality/cmd" ) func main() { - cmd.Execute() + cmd.Execute(os.Args[1:]) } From 3647eb99cbdd8829f1c3a23690d2e2ea991e3637 Mon Sep 17 00:00:00 2001 From: Markus Zimmermann Date: Thu, 18 Apr 2024 15:57:26 +0200 Subject: [PATCH 2/3] Integration testing for the binary --- cmd/eval-dev-quality/cmd/command_test.go | 49 +++++++++++++++++ cmd/eval-dev-quality/cmd/evaluate_test.go | 64 +++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 cmd/eval-dev-quality/cmd/command_test.go create mode 100644 cmd/eval-dev-quality/cmd/evaluate_test.go diff --git a/cmd/eval-dev-quality/cmd/command_test.go b/cmd/eval-dev-quality/cmd/command_test.go new file mode 100644 index 00000000..adb17bc5 --- /dev/null +++ b/cmd/eval-dev-quality/cmd/command_test.go @@ -0,0 +1,49 @@ +package cmd + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/zimmski/osutil" + "github.com/zimmski/osutil/bytesutil" +) + +func TestExecute(t *testing.T) { + type testCase struct { + Name string + + Arguments []string + + ExpectedOutput string + ExpectedError error + } + + validate := func(t *testing.T, tc *testCase) { + t.Run(tc.Name, func(t *testing.T) { + actualOutput, actualError := osutil.Capture(func() { + Execute(tc.Arguments) + }) + + assert.Equal(t, tc.ExpectedOutput, string(actualOutput)) + assert.Equal(t, tc.ExpectedError, actualError) + }) + } + + validate(t, &testCase{ + Name: "No arguments should show help", + + ExpectedOutput: bytesutil.StringTrimIndentations(` + Usage: + eval-dev-quality [OPTIONS] [evaluate] + + Command to manage, update and actually execute the ` + "`" + `eval-dev-quality` + "`" + ` + evaluation benchmark. + + Help Options: + -h, --help Show this help message + + Available commands: + evaluate Run an evaluation, by default with all defined models, repositories and tasks. + `), + }) +} diff --git a/cmd/eval-dev-quality/cmd/evaluate_test.go b/cmd/eval-dev-quality/cmd/evaluate_test.go new file mode 100644 index 00000000..d1e5bb4c --- /dev/null +++ b/cmd/eval-dev-quality/cmd/evaluate_test.go @@ -0,0 +1,64 @@ +package cmd + +import ( + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/zimmski/osutil" +) + +func TestEvaluateExecute(t *testing.T) { + type testCase struct { + Name string + + Arguments []string + + ExpectedOutputContains string + ExpectedError error + ExpectedResultFiles []string + } + + validate := func(t *testing.T, tc *testCase) { + t.Run(tc.Name, func(t *testing.T) { + temporaryPath := t.TempDir() + + actualOutput, actualError := osutil.Capture(func() { + Execute(append([]string{ + "evaluate", + "--result-path", temporaryPath, + "--testdata", "../../../testdata", + }, tc.Arguments...)) + }) + + assert.Contains(t, string(actualOutput), tc.ExpectedOutputContains) + assert.Equal(t, tc.ExpectedError, actualError) + + actualResultFiles, err := osutil.FilesRecursive(temporaryPath) + require.NoError(t, err) + for i, p := range actualResultFiles { + actualResultFiles[i], err = filepath.Rel(temporaryPath, p) + require.NoError(t, err) + } + assert.Equal(t, tc.ExpectedResultFiles, actualResultFiles) + }) + } + + validate(t, &testCase{ + Name: "Plain", + + Arguments: []string{ + "--language", "golang", + "--model", "symflower/symbolic-execution", + "--repository", "golang/plain", + }, + + ExpectedOutputContains: `Evaluation score for "symflower/symbolic-execution": score=6, coverage-statement=1, files-executed=1, response-no-error=1, response-no-excess=1, response-not-empty=1, response-with-code=1`, + ExpectedResultFiles: []string{ + "evaluation.csv", + "evaluation.log", + "symflower_symbolic-execution/golang/golang/plain.log", + }, + }) +} From 249890b2a319324bdefbf0ae74c573e88e46d04e Mon Sep 17 00:00:00 2001 From: Markus Zimmermann Date: Thu, 18 Apr 2024 15:58:08 +0200 Subject: [PATCH 3/3] Forward logger objects to languages and models to forward their entries into the right log --- evaluate/metrics/testing/assessments.go | 1 + evaluate/repository.go | 6 +++--- language/golang.go | 7 ++++--- language/golang_test.go | 20 ++++++++++++++++++-- language/language.go | 6 ++++-- log/logger.go | 9 +++++++++ model/llm/llm.go | 2 +- model/llm/llm_test.go | 13 ++++++++++--- model/llm/prompt/parse.go | 3 ++- model/llm/prompt/parse_test.go | 2 +- model/model.go | 4 +++- model/symflower/symflower.go | 6 ++++-- model/symflower/symflower_test.go | 15 +++++++++++---- provider/provider.go | 1 + util/exec.go | 2 +- 15 files changed, 73 insertions(+), 24 deletions(-) diff --git a/evaluate/metrics/testing/assessments.go b/evaluate/metrics/testing/assessments.go index 1fa16369..7e36e257 100644 --- a/evaluate/metrics/testing/assessments.go +++ b/evaluate/metrics/testing/assessments.go @@ -4,6 +4,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/symflower/eval-dev-quality/evaluate/metrics" ) diff --git a/evaluate/repository.go b/evaluate/repository.go index cc97685a..a3c069bc 100644 --- a/evaluate/repository.go +++ b/evaluate/repository.go @@ -48,14 +48,14 @@ func EvaluateRepository(resultPath string, model model.Model, language language. return nil, problems, pkgerrors.WithStack(err) } - filePaths, err := language.Files(dataPath) + filePaths, err := language.Files(log, dataPath) if err != nil { return nil, problems, pkgerrors.WithStack(err) } repositoryAssessment = metrics.NewAssessments() for _, filePath := range filePaths { - assessments, err := model.GenerateTestsForFile(language, temporaryRepositoryPath, filePath) + assessments, err := model.GenerateTestsForFile(log, language, temporaryRepositoryPath, filePath) if err != nil { problems = append(problems, pkgerrors.WithMessage(err, filePath)) @@ -64,7 +64,7 @@ func EvaluateRepository(resultPath string, model model.Model, language language. repositoryAssessment.Add(assessments) repositoryAssessment[metrics.AssessmentKeyResponseNoError]++ - coverage, err := language.Execute(temporaryRepositoryPath) + coverage, err := language.Execute(log, temporaryRepositoryPath) if err != nil { problems = append(problems, pkgerrors.WithMessage(err, filePath)) diff --git a/language/golang.go b/language/golang.go index 23f30998..9854cbdf 100644 --- a/language/golang.go +++ b/language/golang.go @@ -2,6 +2,7 @@ package language import ( "errors" + "log" "os" "path/filepath" "regexp" @@ -34,7 +35,7 @@ func (language *LanguageGolang) Name() (id string) { } // Files returns a list of relative file paths of the repository that should be evaluated. -func (language *LanguageGolang) Files(repositoryPath string) (filePaths []string, err error) { +func (language *LanguageGolang) Files(log *log.Logger, repositoryPath string) (filePaths []string, err error) { repositoryPath, err = filepath.Abs(repositoryPath) if err != nil { return nil, pkgerrors.WithStack(err) @@ -62,8 +63,8 @@ var languageGoCoverageMatch = regexp.MustCompile(`(?m)^coverage: (\d+\.?\d+)% of var languageGoNoCoverageMatch = regexp.MustCompile(`(?m)^coverage: \[no statements\]$`) // Execute invokes the language specific testing on the given repository. -func (language *LanguageGolang) Execute(repositoryPath string) (coverage float64, err error) { - stdout, _, err := util.CommandWithResult(&util.Command{ +func (language *LanguageGolang) Execute(log *log.Logger, repositoryPath string) (coverage float64, err error) { + stdout, _, err := util.CommandWithResult(log, &util.Command{ Command: []string{ "gotestsum", "--format", "standard-verbose", // Keep formatting consistent. diff --git a/language/golang_test.go b/language/golang_test.go index dd0d6212..809b6231 100644 --- a/language/golang_test.go +++ b/language/golang_test.go @@ -9,6 +9,8 @@ import ( "github.com/stretchr/testify/require" "github.com/zimmski/osutil" "github.com/zimmski/osutil/bytesutil" + + "github.com/symflower/eval-dev-quality/log" ) func TestLanguageGolangFiles(t *testing.T) { @@ -25,10 +27,17 @@ func TestLanguageGolangFiles(t *testing.T) { validate := func(t *testing.T, tc *testCase) { t.Run(tc.Name, func(t *testing.T) { + log, logger := log.Buffer() + defer func() { + if t.Failed() { + t.Log(log.String()) + } + }() + if tc.LanguageGolang == nil { tc.LanguageGolang = &LanguageGolang{} } - actualFilePaths, actualError := tc.LanguageGolang.Files(tc.RepositoryPath) + actualFilePaths, actualError := tc.LanguageGolang.Files(logger, tc.RepositoryPath) assert.Equal(t, tc.ExpectedFilePaths, actualFilePaths) assert.Equal(t, tc.ExpectedError, actualError) @@ -62,6 +71,13 @@ func TestLanguageGolangExecute(t *testing.T) { validate := func(t *testing.T, tc *testCase) { t.Run(tc.Name, func(t *testing.T) { + log, logger := log.Buffer() + defer func() { + if t.Failed() { + t.Log(log.String()) + } + }() + temporaryPath := t.TempDir() repositoryPath := filepath.Join(temporaryPath, filepath.Base(tc.RepositoryPath)) require.NoError(t, osutil.CopyTree(tc.RepositoryPath, repositoryPath)) @@ -73,7 +89,7 @@ func TestLanguageGolangExecute(t *testing.T) { if tc.LanguageGolang == nil { tc.LanguageGolang = &LanguageGolang{} } - actualCoverage, actualError := tc.LanguageGolang.Execute(repositoryPath) + actualCoverage, actualError := tc.LanguageGolang.Execute(logger, repositoryPath) if tc.ExpectedError != nil { assert.ErrorIs(t, actualError, tc.ExpectedError) diff --git a/language/language.go b/language/language.go index 0cca2583..f1549442 100644 --- a/language/language.go +++ b/language/language.go @@ -1,6 +1,8 @@ package language import ( + "log" + pkgerrors "github.com/pkg/errors" ) @@ -12,10 +14,10 @@ type Language interface { Name() (id string) // Files returns a list of relative file paths of the repository that should be evaluated. - Files(repositoryPath string) (filePaths []string, err error) + Files(log *log.Logger, repositoryPath string) (filePaths []string, err error) // Execute invokes the language specific testing on the given repository. - Execute(repositoryPath string) (coverage float64, err error) + Execute(log *log.Logger, repositoryPath string) (coverage float64, err error) } // Languages holds a register of all languages. diff --git a/log/logger.go b/log/logger.go index 4179b158..c9dde8dc 100644 --- a/log/logger.go +++ b/log/logger.go @@ -1,6 +1,7 @@ package log import ( + "bytes" "io" "log" "os" @@ -9,6 +10,14 @@ import ( pkgerrors "github.com/pkg/errors" ) +// Buffer returns a logger that writes to a buffer. +func Buffer() (buffer *bytes.Buffer, logger *log.Logger) { + buffer = new(bytes.Buffer) + logger = log.New(buffer, "", log.LstdFlags) + + return buffer, logger +} + // File returns a logger that writes to a file. func File(path string) (logger *log.Logger, loggerClose func(), err error) { if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { diff --git a/model/llm/llm.go b/model/llm/llm.go index 70b8d67f..94f2b4f6 100644 --- a/model/llm/llm.go +++ b/model/llm/llm.go @@ -78,7 +78,7 @@ func (m *llm) ID() (id string) { } // GenerateTestsForFile generates test files for the given implementation file in a repository. -func (m *llm) GenerateTestsForFile(language language.Language, repositoryPath string, filePath string) (assessment metrics.Assessments, err error) { +func (m *llm) GenerateTestsForFile(log *log.Logger, language language.Language, repositoryPath string, filePath string) (assessment metrics.Assessments, err error) { data, err := os.ReadFile(filepath.Join(repositoryPath, filePath)) if err != nil { return nil, pkgerrors.WithStack(err) diff --git a/model/llm/llm_test.go b/model/llm/llm_test.go index 5085e824..af2a7fa1 100644 --- a/model/llm/llm_test.go +++ b/model/llm/llm_test.go @@ -12,9 +12,9 @@ import ( "github.com/zimmski/osutil/bytesutil" "github.com/symflower/eval-dev-quality/evaluate/metrics" - "github.com/symflower/eval-dev-quality/language" - metricstesting "github.com/symflower/eval-dev-quality/evaluate/metrics/testing" + "github.com/symflower/eval-dev-quality/language" + "github.com/symflower/eval-dev-quality/log" providertesting "github.com/symflower/eval-dev-quality/provider/testing" ) @@ -36,6 +36,13 @@ func TestModelLLMGenerateTestsForFile(t *testing.T) { validate := func(t *testing.T, tc *testCase) { t.Run(tc.Name, func(t *testing.T) { + log, logger := log.Buffer() + defer func() { + if t.Failed() { + t.Log(log.String()) + } + }() + temporaryPath := t.TempDir() temporaryPath = filepath.Join(temporaryPath, "native") require.NoError(t, os.Mkdir(temporaryPath, 0755)) @@ -46,7 +53,7 @@ func TestModelLLMGenerateTestsForFile(t *testing.T) { tc.SetupMock(mock) llm := NewLLMModel(mock, tc.ModelID) - actualAssessment, actualError := llm.GenerateTestsForFile(tc.Language, temporaryPath, tc.SourceFilePath) + actualAssessment, actualError := llm.GenerateTestsForFile(logger, tc.Language, temporaryPath, tc.SourceFilePath) assert.NoError(t, actualError) metricstesting.AssertAssessmentsEqual(t, tc.ExpectedAssessment, actualAssessment) diff --git a/model/llm/prompt/parse.go b/model/llm/prompt/parse.go index 85274379..afbb8501 100644 --- a/model/llm/prompt/parse.go +++ b/model/llm/prompt/parse.go @@ -4,8 +4,9 @@ import ( "regexp" "strings" - "github.com/symflower/eval-dev-quality/evaluate/metrics" "github.com/zimmski/osutil/bytesutil" + + "github.com/symflower/eval-dev-quality/evaluate/metrics" ) var ( diff --git a/model/llm/prompt/parse_test.go b/model/llm/prompt/parse_test.go index fae61ff1..1ee99af5 100644 --- a/model/llm/prompt/parse_test.go +++ b/model/llm/prompt/parse_test.go @@ -5,9 +5,9 @@ import ( "testing" "github.com/stretchr/testify/assert" - "github.com/symflower/eval-dev-quality/evaluate/metrics" "github.com/zimmski/osutil/bytesutil" + "github.com/symflower/eval-dev-quality/evaluate/metrics" metricstesting "github.com/symflower/eval-dev-quality/evaluate/metrics/testing" ) diff --git a/model/model.go b/model/model.go index 01210a72..c159a6e4 100644 --- a/model/model.go +++ b/model/model.go @@ -1,6 +1,8 @@ package model import ( + "log" + "github.com/symflower/eval-dev-quality/evaluate/metrics" "github.com/symflower/eval-dev-quality/language" ) @@ -11,5 +13,5 @@ type Model interface { ID() (id string) // GenerateTestsForFile generates test files for the given implementation file in a repository. - GenerateTestsForFile(language language.Language, repositoryPath string, filePath string) (assessments metrics.Assessments, err error) + GenerateTestsForFile(log *log.Logger, language language.Language, repositoryPath string, filePath string) (assessments metrics.Assessments, err error) } diff --git a/model/symflower/symflower.go b/model/symflower/symflower.go index eb706d1e..c04044bf 100644 --- a/model/symflower/symflower.go +++ b/model/symflower/symflower.go @@ -1,6 +1,8 @@ package symflower import ( + "log" + pkgerrors "github.com/pkg/errors" "github.com/symflower/eval-dev-quality/evaluate/metrics" @@ -21,8 +23,8 @@ func (m *ModelSymflower) ID() (id string) { } // GenerateTestsForFile generates test files for the given implementation file in a repository. -func (m *ModelSymflower) GenerateTestsForFile(language language.Language, repositoryPath string, filePath string) (assessment metrics.Assessments, err error) { - _, _, err = util.CommandWithResult(&util.Command{ +func (m *ModelSymflower) GenerateTestsForFile(log *log.Logger, language language.Language, repositoryPath string, filePath string) (assessment metrics.Assessments, err error) { + _, _, err = util.CommandWithResult(log, &util.Command{ Command: []string{ "symflower", "unit-tests", "--workspace", repositoryPath, diff --git a/model/symflower/symflower_test.go b/model/symflower/symflower_test.go index e3477e18..988e7e95 100644 --- a/model/symflower/symflower_test.go +++ b/model/symflower/symflower_test.go @@ -9,9 +9,9 @@ import ( "github.com/zimmski/osutil" "github.com/symflower/eval-dev-quality/evaluate/metrics" - "github.com/symflower/eval-dev-quality/language" - metricstesting "github.com/symflower/eval-dev-quality/evaluate/metrics/testing" + "github.com/symflower/eval-dev-quality/language" + "github.com/symflower/eval-dev-quality/log" ) func TestModelSymflowerGenerateTestsForFile(t *testing.T) { @@ -33,6 +33,13 @@ func TestModelSymflowerGenerateTestsForFile(t *testing.T) { validate := func(t *testing.T, tc *testCase) { t.Run(tc.Name, func(t *testing.T) { + log, logger := log.Buffer() + defer func() { + if t.Failed() { + t.Log(log.String()) + } + }() + temporaryPath := t.TempDir() repositoryPath := filepath.Join(temporaryPath, filepath.Base(tc.RepositoryPath)) require.NoError(t, osutil.CopyTree(tc.RepositoryPath, repositoryPath)) @@ -44,7 +51,7 @@ func TestModelSymflowerGenerateTestsForFile(t *testing.T) { if tc.ModelSymflower == nil { tc.ModelSymflower = &ModelSymflower{} } - actualAssessment, actualError := tc.ModelSymflower.GenerateTestsForFile(tc.Language, repositoryPath, tc.FilePath) + actualAssessment, actualError := tc.ModelSymflower.GenerateTestsForFile(logger, tc.Language, repositoryPath, tc.FilePath) if tc.ExpectedError != nil { assert.ErrorIs(t, tc.ExpectedError, actualError) @@ -53,7 +60,7 @@ func TestModelSymflowerGenerateTestsForFile(t *testing.T) { } metricstesting.AssertAssessmentsEqual(t, tc.ExpectedAssessment, actualAssessment) - actualCoverage, err := tc.Language.Execute(repositoryPath) + actualCoverage, err := tc.Language.Execute(logger, repositoryPath) require.NoError(t, err) assert.Equal(t, tc.ExpectedCoverage, actualCoverage) }) diff --git a/provider/provider.go b/provider/provider.go index 10da3345..211ea752 100644 --- a/provider/provider.go +++ b/provider/provider.go @@ -4,6 +4,7 @@ import ( "context" pkgerrors "github.com/pkg/errors" + "github.com/symflower/eval-dev-quality/model" ) diff --git a/util/exec.go b/util/exec.go index f99dc4c2..92951645 100644 --- a/util/exec.go +++ b/util/exec.go @@ -21,7 +21,7 @@ type Command struct { } // CommandWithResult executes a command, and prints and returns STDERR/STDOUT. -func CommandWithResult(command *Command) (stdout string, stderr string, err error) { +func CommandWithResult(log *log.Logger, command *Command) (stdout string, stderr string, err error) { log.Printf("$ %s", strings.Join(command.Command, " ")) var stdoutWriter bytes.Buffer