Skip to content

Commit

Permalink
fixing promptfoo output (#876)
Browse files Browse the repository at this point in the history
* upgrade promptfoo

* test: βž• Add simple math test for 1+1 calculation

* feat: πŸš€ add vision model option to CLI commands

* refactor: improve model configuration labels βš™οΈ

* proper format of options

* refactor: ♻️ redirect stdout/stderr in logging module

* better passing of test options
  • Loading branch information
pelikhan authored Nov 19, 2024
1 parent 903ba97 commit e4567b6
Show file tree
Hide file tree
Showing 15 changed files with 433 additions and 375 deletions.
14 changes: 7 additions & 7 deletions THIRD_PARTY_LICENSES.md
Original file line number Diff line number Diff line change
Expand Up @@ -1244,8 +1244,8 @@ Apache License

The following npm packages may be included in this product:

- playwright-core@1.48.2
- playwright@1.48.2
- playwright-core@1.49.0
- playwright@1.49.0

These packages each contain the following license:

Expand Down Expand Up @@ -1702,7 +1702,7 @@ MIT License

The following npm package may be included in this product:

- genaiscript-vscode@1.75.3
- genaiscript-vscode@1.76.0

This package contains the following license:

Expand Down Expand Up @@ -4862,9 +4862,9 @@ The following npm packages may be included in this product:
- @tokenizer/token@0.3.0
- [email protected]
- [email protected]
- genaiscript-core-internal@1.75.3
- genaiscript-sample@1.75.3
- genaiscript@1.75.3
- genaiscript-core-internal@1.76.0
- genaiscript-sample@1.76.0
- genaiscript@1.76.0
- [email protected]
- [email protected]
- [email protected]
Expand Down Expand Up @@ -8019,7 +8019,7 @@ THE SOFTWARE.

The following npm package may be included in this product:

- [email protected].5
- [email protected].6

This package contains the following license:

Expand Down
3 changes: 2 additions & 1 deletion docs/src/content/docs/reference/cli/commands.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ Arguments:
Options:
-m, --model <string> model for the run
-sm, --small-model <string> small model for the run
-vm, --vision-model <string> 'vision' alias model
--models <models...> models to test where mode is the key
value pair list of m (model), s (small
model), t (temperature), p (top-p)
Expand All @@ -95,7 +96,7 @@ Options:
-td, --test-delay <string> delay between tests in seconds
--cache enable LLM result cache
-v, --verbose verbose output
-pv, --promptfoo-version [version] promptfoo version, default is 0.94.5
-pv, --promptfoo-version [version] promptfoo version, default is 0.97.0
-os, --out-summary <file> append output summary in file
--groups <groups...> groups to include or exclude. Use :!
prefix to exclude
Expand Down
311 changes: 151 additions & 160 deletions docs/yarn.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion packages/cli/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"node": ">=20.0.0"
},
"peerDependencies": {
"promptfoo": "0.94.5"
"promptfoo": "0.97.0"
},
"devDependencies": {
"@types/diff": "^6.0.0",
Expand Down
1 change: 1 addition & 0 deletions packages/cli/src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ export async function cli() {
)
.option("-m, --model <string>", "model for the run")
.option("-sm, --small-model <string>", "small model for the run")
.option("-vm, --vision-model <string>", "'vision' alias model")
.option(
"--models <models...>",
"models to test where mode is the key value pair list of m (model), s (small model), t (temperature), p (top-p)"
Expand Down
31 changes: 18 additions & 13 deletions packages/cli/src/run.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ import {
CONSOLE_COLOR_DEBUG,
DOCS_CONFIGURATION_URL,
TRACE_DETAILS,
CLI_ENV_VAR_RX,
STATS_DIR_NAME,
GENAI_ANYTS_REGEX,
CONSOLE_TOKEN_COLORS,
Expand Down Expand Up @@ -83,6 +82,11 @@ import { appendFile } from "node:fs/promises"
import { parseOptionsVars } from "./vars"
import { logprobColor } from "../../core/src/logprob"
import { structuralMerge } from "../../core/src/merge"
import {
overrideStdoutWithStdErr,
stderr,
stdout,
} from "../../core/src/logging"

async function setupTraceWriting(trace: MarkdownTrace, filename: string) {
logVerbose(`trace: ${filename}`)
Expand Down Expand Up @@ -203,6 +207,7 @@ export async function runScript(
const logprobs = options.logprobs
const topLogprobs = normalizeInt(options.topLogprobs)

if (options.json || options.yaml) overrideStdoutWithStdErr()
if (options.model) host.defaultModelOptions.model = options.model
if (options.smallModel)
host.defaultModelOptions.smallModel = options.smallModel
Expand Down Expand Up @@ -330,28 +335,26 @@ export async function runScript(
logprobColor(token),
token.token
)
process.stdout.write(c)
stdout.write(c)
} else {
tokenColor =
(tokenColor + 1) % colors.length
const c = colors[tokenColor]
process.stdout.write(
wrapColor(c, token.token)
)
stdout.write(wrapColor(c, token.token))
}
}
} else {
if (!inner) process.stdout.write(responseChunk)
if (!inner) stdout.write(responseChunk)
else
process.stderr.write(
stderr.write(
wrapColor(
CONSOLE_COLOR_DEBUG,
responseChunk
)
)
}
} else if (!isQuiet)
process.stderr.write(
stderr.write(
wrapColor(CONSOLE_COLOR_DEBUG, responseChunk)
)
}
Expand Down Expand Up @@ -488,13 +491,15 @@ export async function runScript(
edits.after
)
}
} else {
if (options.json && result !== undefined)
console.log(JSON.stringify(result, null, 2))
if (options.yaml && result !== undefined)
console.log(YAMLStringify(result))
}

if (options.json && result !== undefined)
// needs to go to process.stdout
process.stdout.write(JSON.stringify(result, null, 2))
if (options.yaml && result !== undefined)
// needs to go to process.stdout
process.stdout.write(YAMLStringify(result))

let _ghInfo: GithubConnectionInfo = undefined
const resolveGitHubInfo = async () => {
if (!_ghInfo)
Expand Down
11 changes: 7 additions & 4 deletions packages/cli/src/test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,6 @@ export async function runPromptScriptTests(
promptfooVersion?: string
outSummary?: string
testDelay?: string
model?: string
smallModel?: string
visionModel?: string
}
): Promise<PromptScriptTestRunResponse> {
if (options.model) host.defaultModelOptions.model = options.model
Expand All @@ -116,6 +113,10 @@ export async function runPromptScriptTests(
if (options.visionModel)
host.defaultModelOptions.visionModel = options.visionModel

logVerbose(
`model: ${host.defaultModelOptions.model}, small model: ${host.defaultModelOptions.smallModel}, vision model: ${host.defaultModelOptions.visionModel}`
)

const scripts = await listTests({ ids, ...(options || {}) })
if (!scripts.length)
return {
Expand Down Expand Up @@ -144,7 +145,9 @@ export async function runPromptScriptTests(
? join(out, `${script.id}.promptfoo.yaml`)
: script.filename.replace(GENAI_ANY_REGEX, ".promptfoo.yaml")
logInfo(` ${fn}`)
const { info } = await resolveModelConnectionInfo(script)
const { info } = await resolveModelConnectionInfo(script, {
model: host.defaultModelOptions.model,
})
if (info.error) throw new Error(info.error)
const config = generatePromptFooConfiguration(script, {
out,
Expand Down
5 changes: 3 additions & 2 deletions packages/core/src/genaiscript-api-provider.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ class GenAIScriptApiProvider {
constructor(options) {
this.config = options.config
this.providerId =
options.id || `genaiscript:${this.config.model || "default"}`
this.label = `genaiscript ${this.config.model || "default"}`
options.id ||
`genaiscript/${this.config.model || "large"}/${this.config.smallModel || "small"}/${this.config.visionModel || "vision"}`
this.label = `genaiscript ${this.config.model || "large"}, ${this.config.smallModel || "small"}, ${this.config.visionModel || "vision"}`
}

id() {
Expand Down
8 changes: 8 additions & 0 deletions packages/core/src/logging.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
import inspect from "object-inspect"

export let stdout: NodeJS.WriteStream = process.stdout

export let stderr: NodeJS.WriteStream = process.stderr

export function overrideStdoutWithStdErr() {
stdout = stderr
}

export function consoleLogFormat(...args: any[]) {
let line = ""
for (let i = 0; i < args.length; ++i) {
Expand Down
5 changes: 2 additions & 3 deletions packages/core/src/runpromptcontext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import {
parametersToVars,
promptParametersSchemaToJSONSchema,
} from "./parameters"
import { consoleLogFormat } from "./logging"
import { consoleLogFormat, stdout } from "./logging"
import { isGlobMatch } from "./glob"
import { arrayify, logError, logVerbose, logWarn } from "./util"
import { renderShellOutput } from "./chatrender"
Expand All @@ -54,7 +54,6 @@ import {
TOKEN_MISSING_INFO,
TOKEN_NO_ANSWER,
MODEL_PROVIDER_AICI,
SYSTEM_FENCE,
DOCS_DEF_FILES_IS_EMPTY_URL,
} from "./constants"
import { renderAICI } from "./aici"
Expand Down Expand Up @@ -86,7 +85,7 @@ export function createChatTurnGenerationContext(
const line = consoleLogFormat(...args)
if (line) {
trace.log(line)
process.stdout.write(line + "\n")
stdout.write(line + "\n")
}
},
debug: (...args: any[]) => {
Expand Down
9 changes: 8 additions & 1 deletion packages/core/src/server/messages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,19 @@ export interface ServerEnv extends RequestMessage {
type: "server.env"
}

export interface PromptScriptTestRunOptions {
export interface PromptScriptTestRunOptions
extends PromptScriptModelRunOptions {
testProvider?: string
models?: string[]
groups?: string[]
}

export interface PromptScriptModelRunOptions {
model?: string
smallModel?: string
visionModel?: string
}

export interface PromptScriptTestRun extends RequestMessage {
type: "tests.run"
scripts?: string[]
Expand Down
46 changes: 32 additions & 14 deletions packages/core/src/test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import {
LARGE_MODEL_ID,
MODEL_PROVIDER_AZURE_OPENAI,
MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI,
MODEL_PROVIDER_GITHUB,
SMALL_MODEL_ID,
VISION_MODEL_ID,
} from "./constants"
Expand All @@ -17,32 +18,47 @@ import { ModelConnectionInfo } from "./models"
*/
function resolveTestProvider(info: ModelConnectionInfo) {
const { provider, model, base } = info
const apiHost = base
.replace(HTTPS_REGEX, "")
.replace(/\/openai\/deployments$/i, "")
switch (provider) {
case MODEL_PROVIDER_AZURE_OPENAI:
case MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI:
return {
text: {
id: "azureopenai:chat:gpt-4",
config: {
apiHost: base
.replace(HTTPS_REGEX, "")
.replace(/\/openai\/deployments$/i, ""),
apiHost,
},
},
embedding: {
id: "azureopenai:embeddings:text-embedding-ada-002",
config: {
apiHost: base
.replace(HTTPS_REGEX, "")
.replace(/\/openai\/deployments$/i, ""),
apiHost,
},
},
}
case MODEL_PROVIDER_GITHUB:
return {
text: {
id: provider + ":" + model,
},
}
// openai
default:
return {
text: provider + ":chat:" + model,
embedding: provider + ":embeddings:" + base,
text: {
id: provider + ":chat:" + model,
config: {
apiHost,
},
},
embedding: {
id: provider + ":embeddings:" + model,
config: {
apiHost,
},
},
}
}
}
Expand Down Expand Up @@ -92,6 +108,9 @@ export function generatePromptFooConfiguration(
: m

const testProvider = resolveTestProvider(info)
const defaultTest = deleteUndefinedValues({
options: deleteUndefinedValues({ provider: testProvider }),
})

// Create configuration object
const res = {
Expand All @@ -117,13 +136,13 @@ export function generatePromptFooConfiguration(
id: provider,
label: [
model,
smallModel,
visionModel,
`t=${temperature}`,
`small=${smallModel}`,
`vision=${visionModel}`,
`temp=${temperature}`,
top_p !== undefined ? `p=${top_p}` : undefined,
]
.filter((v) => v !== undefined)
.join(":"),
.join(", "),
config: {
model,
smallModel,
Expand All @@ -133,8 +152,7 @@ export function generatePromptFooConfiguration(
cli,
},
})),
// Default test configuration if testProvider is present
defaultTest: testProvider ? { provider: testProvider } : undefined,
defaultTest,
// Map tests to configuration format
tests: arrayify(tests).map(
({
Expand Down
22 changes: 22 additions & 0 deletions packages/sample/genaisrc/add.test.genai.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
script({
title: 'Simple Math Test',
description: 'Validates that the model correctly calculates 1+1.',
group: 'Basic Tests',
temperature: 0,
maxTokens: 10,
tests: [
{
files: [],
rubrics: ['output correctly calculates 1+1 as 2'],
facts: [`The model should return "2".`],
asserts: [
{
type: 'equals',
value: '2',
},
],
},
],
});

$`What is 1 + 1?`;
Loading

0 comments on commit e4567b6

Please sign in to comment.