Skip to content

Commit

Permalink
my tests pas
Browse files Browse the repository at this point in the history
  • Loading branch information
bracesproul committed Apr 24, 2024
1 parent 5f5571e commit 9cd5680
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 117 deletions.
231 changes: 124 additions & 107 deletions js/src/evaluation/runner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,63 +27,57 @@ type EvaluatorT =
| RunEvaluator
| ((run: Run, example?: Example) => EvaluationResult);

interface EvaluateOptions {
/**
* The dataset to evaluate on. Can be a dataset name, a list of
* examples, or a generator of examples.
*/
data: DataT;
/**
* A list of evaluators to run on each example.
* @default undefined
*/
evaluators?: Array<EvaluatorT>;
/**
* A list of summary evaluators to run on the entire dataset.
* @default undefined
*/
summaryEvaluators?: Array<SummaryEvaluatorT>;
/**
* Metadata to attach to the experiment.
* @default undefined
*/
metadata?: Record<string, any>;
/**
* A prefix to provide for your experiment name.
* @default undefined
*/
experimentPrefix?: string;
/**
* The maximum number of concurrent evaluations to run.
* @default undefined
*/
maxConcurrency?: number;
/**
* The LangSmith client to use.
* @default undefined
*/
client?: Client;
/**
* Whether to block until the evaluation is complete.
* @default true
*/
blocking?: boolean;
}

export function evaluate(
/**
* The target system or function to evaluate.
*/
target: TargetT,
/**
* The dataset to evaluate on. Can be a dataset name, a list of
* examples, or a generator of examples.
*/
data: DataT,
options?: {
/**
* A list of evaluators to run on each example.
* @default undefined
*/
evaluators?: Array<EvaluatorT>;
/**
* A list of summary evaluators to run on the entire dataset.
* @default undefined
*/
summaryEvaluator?: Array<SummaryEvaluatorT>;
/**
* Metadata to attach to the experiment.
* @default undefined
*/
metadata?: Record<string, any>;
/**
* A prefix to provide for your experiment name.
* @default undefined
*/
experimentPrefix?: string;
/**
* The maximum number of concurrent evaluations to run.
* @default undefined
*/
maxConcurrency?: number;
/**
* The LangSmith client to use.
* @default undefined
*/
client?: Client;
/**
* Whether to block until the evaluation is complete.
* @default true
*/
blocking?: boolean;
}
options: EvaluateOptions
): Promise<ExperimentResults> {
return _evaluate(target, {
data,
evaluators: options?.evaluators,
summaryEvaluators: options?.summaryEvaluator,
metadata: options?.metadata,
experimentPrefix: options?.experimentPrefix,
maxConcurrency: options?.maxConcurrency,
client: options?.client,
});
return _evaluate(target, options);
}

interface ExperimentResultRow {
Expand Down Expand Up @@ -160,14 +154,7 @@ const _isCallable = (target: TargetT | AsyncIterable<Run>): boolean =>

async function _evaluate(
target: TargetT | AsyncIterable<Run>,
fields: {
data: DataT;
evaluators?: Array<EvaluatorT>;
summaryEvaluators?: Array<SummaryEvaluatorT>;
metadata?: Record<string, any>;
experimentPrefix?: string;
maxConcurrency?: number;
client?: Client;
fields: EvaluateOptions & {
experiment?: TracerSession;
}
): Promise<ExperimentResults> {
Expand Down Expand Up @@ -362,11 +349,6 @@ class _ExperimentManager extends _ExperimentManagerMixin {
} else {
return this._examples;
}
return async function* (this: _ExperimentManager) {
for await (const example of this._examples!) {
yield example;
}
}.call(this);
}

get datasetId(): Promise<string> {
Expand All @@ -389,21 +371,19 @@ class _ExperimentManager extends _ExperimentManagerMixin {
yield { results: [] };
}
}.call(this);
} else {
return this._evaluationResults;
}
return this._evaluationResults;
}

get runs(): AsyncIterable<Run> {
if (this._runs === undefined) {
throw new Error(
"Runs not provided in this experiment. Please predict first."
);
} else {
return this._runs;
}
return async function* (this: _ExperimentManager) {
for await (const run of this._runs!) {
yield run;
}
}.call(this);
}

async start(): Promise<_ExperimentManager> {
Expand Down Expand Up @@ -431,7 +411,7 @@ class _ExperimentManager extends _ExperimentManagerMixin {
const experimentResults = this._predict(target, options);

const results: AsyncIterable<any>[] = [];
for await (const item of asyncTee(experimentResults, 2)) {
for await (const item of asyncTee(experimentResults)) {
results.push(item);
}
const [r1, r2] = results;
Expand Down Expand Up @@ -509,26 +489,50 @@ class _ExperimentManager extends _ExperimentManagerMixin {
}

async *getResults(): AsyncIterable<ExperimentResultRow> {
const runsIter = this.runs[Symbol.asyncIterator]();
const examplesIter = this.examples[Symbol.asyncIterator]();
const evaluationResultsIter =
this.evaluationResults[Symbol.asyncIterator]();
// const runsIter = this.runs[Symbol.asyncIterator]();
// const examplesIter = this.examples[Symbol.asyncIterator]();
// const evaluationResultsIter =
// this.evaluationResults[Symbol.asyncIterator]();

while (true) {
const runResult = await runsIter.next();
const exampleResult = await examplesIter.next();
const evaluationResult = await evaluationResultsIter.next();

if (runResult.done || exampleResult.done || evaluationResult.done) {
break;
}
let runs = [];
let examples = [];
let evaluationResults = [];
for await (const run of this.runs) {
runs.push(run);
}
for await (const example of this.examples) {
examples.push(example);
}
for await (const evaluationResult of this.evaluationResults) {
evaluationResults.push(evaluationResult);
}

// return an array of objects with run, example, and evaluationResults
for (let i = 0; i < runs.length; i++) {
yield {
run: runResult.value,
example: exampleResult.value,
evaluationResults: evaluationResult.value,
run: runs[i],
example: examples[i],
evaluationResults: evaluationResults[i],
};
}

// while (true) {
// const runResult = await runsIter.next();
// const exampleResult = await examplesIter.next();
// const evaluationResult = await evaluationResultsIter.next();

// console.log("Yielding")
// yield {
// run: runResult.value,
// example: exampleResult.value,
// evaluationResults: evaluationResult.value,
// };

// if (runResult.done || exampleResult.done) {
// console.log("Done")
// break;
// }
// }
}

async getSummaryScores(): Promise<EvaluationResults> {
Expand Down Expand Up @@ -558,13 +562,13 @@ class _ExperimentManager extends _ExperimentManagerMixin {
maxConcurrency?: number;
}
): AsyncGenerator<_ForwardResults> {
const fn = wrapFunctionAndEnsureTraceable(target);
// const fn = wrapFunctionAndEnsureTraceable(target, this.experimentName);
const maxConcurrency = options?.maxConcurrency ?? 0;

if (maxConcurrency === 0) {
for await (const example of this.examples) {
yield await _forward(
fn,
target,
example,
this.experimentName,
this._metadata,
Expand All @@ -582,7 +586,7 @@ class _ExperimentManager extends _ExperimentManagerMixin {
futures.push(
caller.call(
_forward,
fn,
target,
example,
this.experimentName,
this._metadata,
Expand All @@ -597,7 +601,7 @@ class _ExperimentManager extends _ExperimentManagerMixin {
}

// Close out the project.
this._end();
await this._end();
}

async _runEvaluators(
Expand Down Expand Up @@ -676,11 +680,13 @@ class _ExperimentManager extends _ExperimentManagerMixin {
const runsIterator = this.runs[Symbol.asyncIterator]();
const examplesIterator = this.examples[Symbol.asyncIterator]();

while (true) {
let shouldContinue = true;
while (shouldContinue) {
const runResult = await runsIterator.next();
const exampleResult = await examplesIterator.next();

if (runResult.done || exampleResult.done) {
shouldContinue = false;
break;
}

Expand Down Expand Up @@ -754,15 +760,16 @@ class _ExperimentManager extends _ExperimentManagerMixin {
}
const projectMetadata = await this._getExperimentMetadata();
projectMetadata["dataset_version"] = this._getDatasetVersion();
this.client.updateProject(experiment.id, {

await this.client.updateProject(experiment.id, {
endTime: new Date().toISOString(),
metadata: projectMetadata,
});
}
}

async function _forward(
fn: (...args: any[]) => Promise<any>, // TODO fix this type. What is `rh.SupportsLangsmithExtra`?
fn: (...args: any[]) => Promise<any> | any, // TODO fix this type. What is `rh.SupportsLangsmithExtra`?
example: Example,
experimentName: string,
metadata: Record<string, any>,
Expand All @@ -774,19 +781,25 @@ async function _forward(
run = r;
};

const options = {
reference_example_id: example.id,
on_end: _getRun,
project_name: experimentName,
metadata: {
...metadata,
example_version: example.modified_at
? new Date(example.modified_at).toISOString()
: new Date(example.created_at).toISOString(),
},
client,
};

const wrappedFn = wrapFunctionAndEnsureTraceable(fn, options) as ReturnType<
typeof traceable
>;

try {
await fn(example.inputs, {
reference_example_id: example.id,
on_end: _getRun,
project_name: experimentName,
metadata: {
...metadata,
example_version: example.modified_at
? new Date(example.modified_at).toISOString()
: new Date(example.created_at).toISOString(),
},
client,
});
await wrappedFn(example.inputs);
} catch (e) {
console.error(
`Error running target function: ${JSON.stringify(e, null, 2)}`
Expand Down Expand Up @@ -860,7 +873,7 @@ async function wrapSummaryEvaluators(

async function* asyncTee<T>(
iterable: AsyncIterable<T>,
n: number = 2
n = 2
): AsyncGenerator<AsyncIterable<T>, void, undefined> {
const iterators: Array<AsyncIterable<T>> = [];
const cache: T[][] = Array.from({ length: n }, () => []);
Expand Down Expand Up @@ -895,12 +908,16 @@ interface SupportsLangSmithExtra<R> {
(target: TargetT, langSmithExtra?: Partial<RunTreeConfig>): R;
}

function wrapFunctionAndEnsureTraceable(target: TargetT) {
function wrapFunctionAndEnsureTraceable(
target: TargetT,
options?: Partial<RunTreeConfig>
) {
if (typeof target === "function") {
if (isTraceableFunction(target)) {
return target as SupportsLangSmithExtra<ReturnType<typeof target>>;
} else {
return traceable(target, {
...options,
name: "target",
});
}
Expand Down
1 change: 1 addition & 0 deletions js/src/run_trees.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ export interface RunTreeConfig {
outputs?: KVMap;
reference_example_id?: string;
client?: Client;
on_end?: (runTree: RunTree) => void;
}

export interface RunnableConfigLike {
Expand Down
Loading

0 comments on commit 9cd5680

Please sign in to comment.