Skip to content

Commit

Permalink
Automatically install Symflower with a fixed version for the CI, deve…
Browse files Browse the repository at this point in the history
…lopment environment and evaluation benchmark

Closes #47
  • Loading branch information
zimmski committed Apr 19, 2024
1 parent 4e2f0c1 commit dab0ecb
Show file tree
Hide file tree
Showing 12 changed files with 326 additions and 18 deletions.
18 changes: 5 additions & 13 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,13 @@ jobs:
with:
go-version: '1.21'

- name: Install Symflower
shell: bash
run: |
mkdir -p $PWD/bin/
export OS=${OS:-"$(uname -s | awk '{print tolower($0)}')"}
export ARCH=${ARCH:-"$(uname -m)"}
curl -SLf -o $PWD/bin/symflower https://download.symflower.com/local/latest/symflower-$OS-$ARCH
chmod +x $PWD/bin/symflower
echo "PATH=$PWD/bin:$PATH" >> $GITHUB_ENV
- name: Install testing tools
run: make install-tools-testing

- name: Build
run: make install

- name: Install testing tools
run: |
make install-tools-testing
echo "PATH=$HOME/.eval-dev-quality/bin:$PATH" >> $GITHUB_ENV
- name: Test
run: make test
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,12 @@ install: # [<Go package] - # Build and install everything, or only the specified
go install -v $(PACKAGE)
.PHONY: install

install-all: install-tools-testing install # Install everything for and of this repository.
install-all: install install-tools-testing # Install everything for and of this repository.
.PHONY: install-all

install-tools-testing: # Install tools that are used for testing.
go install -v gotest.tools/[email protected]
eval-dev-quality install-tools
.PHONY: install-tools-testing

test: # [<Go package] - # Test everything, or only the specified package.
Expand Down
3 changes: 2 additions & 1 deletion cmd/eval-dev-quality/cmd/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ import (

// Command holds the root command.
type Command struct {
Evaluate `command:"evaluate" description:"Run an evaluation, by default with all defined models, repositories and tasks."`
Evaluate `command:"evaluate" description:"Run an evaluation, by default with all defined models, repositories and tasks."`
InstallTools `command:"install-tools" description:"Checks and installs all tools required for the evaluation benchmark."`
}

// Execute executes the root command.
Expand Down
5 changes: 3 additions & 2 deletions cmd/eval-dev-quality/cmd/command_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func TestExecute(t *testing.T) {

ExpectedOutput: bytesutil.StringTrimIndentations(`
Usage:
eval-dev-quality [OPTIONS] [evaluate]
eval-dev-quality [OPTIONS] [evaluate | install-tools]
Command to manage, update and actually execute the ` + "`" + `eval-dev-quality` + "`" + `
evaluation benchmark.
Expand All @@ -43,7 +43,8 @@ func TestExecute(t *testing.T) {
-h, --help Show this help message
Available commands:
evaluate Run an evaluation, by default with all defined models, repositories and tasks.
evaluate Run an evaluation, by default with all defined models, repositories and tasks.
install-tools Checks and installs all tools required for the evaluation benchmark.
`),
})
}
21 changes: 20 additions & 1 deletion cmd/eval-dev-quality/cmd/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,14 @@ import (
"github.com/symflower/eval-dev-quality/provider"
_ "github.com/symflower/eval-dev-quality/provider/openrouter"
_ "github.com/symflower/eval-dev-quality/provider/symflower"
"github.com/symflower/eval-dev-quality/tools"
)

// Evaluate holds the "evaluation" command.
type Evaluate struct {
// InstallToolsPath determines where tools for the evaluation are installed.
InstallToolsPath string `long:"install-tools-path" description:"Install tools for the evaluation into this path."`

// Languages determines which language should be used for the evaluation, or empty if all languages should be used.
Languages []string `long:"language" description:"Evaluate with this language. By default all languages are used."`
// Models determines which models should be used for the evaluation, or empty if all models should be used.
Expand All @@ -41,6 +45,7 @@ type Evaluate struct {
// repositoryPlainName holds the name of the plain repository.
const repositoryPlainName = "plain"

// Execute executes the command.
func (command *Evaluate) Execute(args []string) (err error) {
command.ResultPath = strings.ReplaceAll(command.ResultPath, "%datetime%", time.Now().Format("2006-01-02-15:04:05")) // REMARK Use a datetime format with a dash, so directories can be easily marked because they are only one group.

Expand Down Expand Up @@ -116,6 +121,20 @@ func (command *Evaluate) Execute(args []string) (err error) {
log.Fatalf("ERROR: could not resolve testdata path %q to an absolute path: %s", command.TestdataPath, err)
}

// Install required tools for the basic evaluation.
{
if command.InstallToolsPath == "" {
command.InstallToolsPath, err = tools.InstallPathDefault()
if err != nil {
log.Fatalf("ERROR: %s", err)
}
}

if err := tools.Install(log, command.InstallToolsPath); err != nil {
log.Fatalf("ERROR: %s", err)
}
}

// Check that models and languages can be evaluated by executing the "plain" repositories.
log.Printf("Checking that models and languages can be used for evaluation")
// Ensure we report metrics for every model even if they are excluded.
Expand Down Expand Up @@ -188,7 +207,7 @@ func (command *Evaluate) Execute(args []string) (err error) {
totalScore := uint(0)
// Set the total score to the number of evaluated languages if we are just checking the "plain" repositories since there is only one task to solve per language.
isOnlyPlainRepositories := true
for repository, _ := range commandRepositories {
for repository := range commandRepositories {
if filepath.Base(repository) != repositoryPlainName {
isOnlyPlainRepositories = false

Expand Down
32 changes: 32 additions & 0 deletions cmd/eval-dev-quality/cmd/install-tools.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package cmd

import (
"github.com/symflower/eval-dev-quality/log"
_ "github.com/symflower/eval-dev-quality/provider/openrouter"
_ "github.com/symflower/eval-dev-quality/provider/symflower"
"github.com/symflower/eval-dev-quality/tools"
)

// InstallTools holds the "install-tools" command.
type InstallTools struct {
// InstallToolsPath determines where tools for the evaluation are installed.
InstallToolsPath string `long:"install-tools-path" description:"Install tools for the evaluation into this path."`
}

// Execute executes the command.
func (command *InstallTools) Execute(args []string) (err error) {
log := log.STDOUT()

if command.InstallToolsPath == "" {
command.InstallToolsPath, err = tools.InstallPathDefault()
if err != nil {
log.Fatalf("ERROR: %s", err)
}
}

if err := tools.Install(log, command.InstallToolsPath); err != nil {
log.Fatalf("ERROR: %s", err)
}

return nil
}
56 changes: 56 additions & 0 deletions cmd/eval-dev-quality/cmd/install-tools_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package cmd

import (
"os"
"os/exec"
"path/filepath"
"strings"
"testing"

"github.com/stretchr/testify/require"
"github.com/zimmski/osutil"
)

func TestInstallToolsExecute(t *testing.T) {
temporaryPath := t.TempDir()

chmodPath, err := exec.LookPath("chmod")
require.NoError(t, err)
t.Setenv("PATH", strings.Join([]string{temporaryPath, filepath.Dir(chmodPath)}, string(os.PathListSeparator)))

t.Run("Tools are not yet installed", func(t *testing.T) {
symflowerPath, err := exec.LookPath("symflower")
require.Error(t, err)
require.Empty(t, symflowerPath)
})

t.Run("Install tools for first time which should install all tools", func(t *testing.T) {
output, err := osutil.Capture(func() {
Execute([]string{
"install-tools",
"--install-tools-path", temporaryPath,
})
})
require.NoError(t, err)

require.Contains(t, string(output), `Install "symflower" to`)
symflowerPath, err := exec.LookPath("symflower")
require.NoError(t, err)
require.NotEmpty(t, symflowerPath)
})

t.Run("Install tools a second time which should install no new tools", func(t *testing.T) {
output, err := osutil.Capture(func() {
Execute([]string{
"install-tools",
"--install-tools-path", temporaryPath,
})
})
require.NoError(t, err)

require.NotContains(t, string(output), `Install "symflower" to`)
symflowerPath, err := exec.LookPath("symflower")
require.NoError(t, err)
require.NotEmpty(t, symflowerPath)
})
}
5 changes: 5 additions & 0 deletions log/logger.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,8 @@ func FileAndSTDOUT(filePath string) (logger *log.Logger, loggerClose func(), err

return logger, loggerClose, nil
}

// STDOUT returns a logger that writes to STDOUT.
func STDOUT() (logger *log.Logger) {
return log.New(os.Stdout, "", log.LstdFlags)
}
114 changes: 114 additions & 0 deletions tools/symflower.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
package tools

import (
"errors"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
"regexp"
"runtime"
"strconv"
"strings"

pkgerrors "github.com/pkg/errors"
"github.com/symflower/eval-dev-quality/util"
"github.com/zimmski/osutil"
)

// SymflowerVersion holds the version of Symflower required for this revision of the evaluation.
const SymflowerVersion = "35657"

// SymflowerInstall checks if the "symflower" binary has been installed, and if yes, updates it if necessary and possible.
func SymflowerInstall(log *log.Logger, installPath string) (err error) {
installPath, err = filepath.Abs(installPath)
if err != nil {
return pkgerrors.WithStack(err)
}

// Check if install path is already used for binaries, or add it if not.
installPathUsed := false
for _, p := range strings.Split(os.Getenv(osutil.EnvironmentPathIdentifier), string(os.PathListSeparator)) {
p = filepath.Clean(p)
if p == installPath {
installPathUsed = true

break
}
}
if !installPathUsed {
os.Setenv(osutil.EnvironmentPathIdentifier, strings.Join([]string{os.Getenv(osutil.EnvironmentPathIdentifier), installPath}, string(os.PathListSeparator))) // Add the install path last, so we are not overwriting other binaries.
}

// Check if the "symflower" binary can already be used.
symflowerPath, err := exec.LookPath("symflower")
if err == nil {
log.Printf("Checking \"symflower\" binary %s", symflowerPath)

symflowerVersionOutput, _, err := util.CommandWithResult(log, &util.Command{
Command: []string{symflowerPath, "version"},
})
if err != nil {
return pkgerrors.WithStack(err)
}

m := regexp.MustCompile(`symflower v(\d+) on`).FindStringSubmatch(symflowerVersionOutput)
if m == nil {
return pkgerrors.WithStack(pkgerrors.WithMessage(errors.New("cannot find version"), symflowerVersionOutput))
}

// Currently the Symflower version is only one integer, so do a poor-man's version comparision.
symflowerVersionInstalled, err := strconv.ParseUint(m[1], 10, 64)
if err != nil {
return pkgerrors.WithStack(err)
}
symflowerVersionWanted, err := strconv.ParseUint(m[1], 10, 64)
if err != nil {
return pkgerrors.WithStack(err)
}

// Binary is installed in a compatible verison.
if symflowerVersionInstalled >= symflowerVersionWanted {
return nil
}

// If the binary got installed by the user, let the user handle the update.
if filepath.Dir(symflowerPath) != installPath {
return pkgerrors.WithStack(fmt.Errorf("Found \"symflower\" binary with version %d but need at least %d", symflowerVersionInstalled, symflowerVersionWanted))
}
}

// Install Symflower, as it is either outdated or not installed at all.
symflowerInstallPath := filepath.Join(installPath, "symflower")
osIdentifier := runtime.GOOS
var architectureIdentifier string
switch a := runtime.GOARCH; a {
case "386":
architectureIdentifier = "x86"
case "amd64":
architectureIdentifier = "x86_64"
case "arm":
architectureIdentifier = "arm"
case "arm64":
architectureIdentifier = "arm64"
default:
return pkgerrors.WithStack(pkgerrors.WithMessage(err, fmt.Sprintf("unkown architecture %s", a)))
}

if err := os.MkdirAll(installPath, 0755); err != nil {
return pkgerrors.WithStack(err)
}

log.Printf("Install \"symflower\" to %s", symflowerInstallPath)
if err := osutil.DownloadFileWithProgress("https://download.symflower.com/local/v"+SymflowerVersion+"/symflower-"+osIdentifier+"-"+architectureIdentifier, symflowerInstallPath); err != nil {
return pkgerrors.WithStack(pkgerrors.WithMessage(err, fmt.Sprintf("cannot download to %s", symflowerInstallPath)))
}
if _, _, err := util.CommandWithResult(log, &util.Command{
Command: []string{"chmod", "+x", symflowerInstallPath},
}); err != nil {
return pkgerrors.WithStack(pkgerrors.WithMessage(err, fmt.Sprintf("cannot make %s executable", symflowerInstallPath)))
}

return nil
}
46 changes: 46 additions & 0 deletions tools/symflower_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package tools

import (
"os"
"os/exec"
"path/filepath"
"strings"
"testing"

"github.com/stretchr/testify/require"
"github.com/symflower/eval-dev-quality/log"
)

func TestSymflowerInstall(t *testing.T) {
temporaryPath := t.TempDir()

chmodPath, err := exec.LookPath("chmod")
require.NoError(t, err)
t.Setenv("PATH", strings.Join([]string{temporaryPath, filepath.Dir(chmodPath)}, string(os.PathListSeparator)))

t.Run("Tools are not yet installed", func(t *testing.T) {
symflowerPath, err := exec.LookPath("symflower")
require.Error(t, err)
require.Empty(t, symflowerPath)
})

t.Run("Install tools for first time which should install all tools", func(t *testing.T) {
logOutput, log := log.Buffer()
require.NoError(t, SymflowerInstall(log, temporaryPath))

require.Contains(t, logOutput.String(), `Install "symflower" to`)
symflowerPath, err := exec.LookPath("symflower")
require.NoError(t, err)
require.NotEmpty(t, symflowerPath)
})

t.Run("Install tools a second time which should install no new tools", func(t *testing.T) {
logOutput, log := log.Buffer()
require.NoError(t, SymflowerInstall(log, temporaryPath))

require.NotContains(t, logOutput.String(), `Install "symflower" to`)
symflowerPath, err := exec.LookPath("symflower")
require.NoError(t, err)
require.NotEmpty(t, symflowerPath)
})
}
Loading

0 comments on commit dab0ecb

Please sign in to comment.