Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MS-644][FE] feature/total prompts percentage #74

Open
wants to merge 14 commits into
base: dev_main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 19 additions & 38 deletions actions/createRun.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,27 +10,15 @@ export async function createRun(
_: FormState<BenchmarkRunFormValues>,
formData: FormData
) {
let runAll = false;
try {
runAll = formData.get('run_all') === 'true';
} catch (error) {
return {
formStatus: 'error',
formErrors: {
error: ['Failed to parse run_all'],
},
};
}

// Dynamically create the schema based on runAll
const dynamicRunSchema = z.object({
const benchmarkRunSchema = z.object({
run_name: z.string().min(1, 'Name is required'),
description: z.string().optional(),
num_of_prompts: z.preprocess(
prompt_selection_percentage: z.preprocess(
(val) => Number(val),
runAll
? z.number()
: z.number().min(1, 'Number of prompts must be at least 1')
z
.number()
.min(0, 'Prompt selection percentage must be at least 0')
.max(100, 'Prompt selection percentage must be at most 100')
),
inputs: z.array(z.string()).min(1, 'At least one cookbook is required'),
endpoints: z.array(z.string()).min(1, 'At least one endpoint is required'),
Expand All @@ -42,28 +30,21 @@ export async function createRun(
.string()
.min(1, 'Runner processing module is required'),
system_prompt: z.string(),
run_all: z.preprocess((val) => val === 'true', z.boolean()),
});

let newRunData: z.infer<typeof dynamicRunSchema>;

try {
newRunData = dynamicRunSchema.parse({
run_name: formData.get('run_name'),
description: formData.get('description'),
num_of_prompts: formData.get('num_of_prompts'),
inputs: formData.getAll('inputs'),
endpoints: formData.getAll('endpoints'),
random_seed: formData.get('random_seed'),
runner_processing_module: formData.get('runner_processing_module'),
system_prompt: formData.get('system_prompt'),
});
} catch (error) {
return formatZodSchemaErrors(error as ZodError);
}
const result = benchmarkRunSchema.safeParse({
run_name: formData.get('run_name'),
description: formData.get('description'),
prompt_selection_percentage: formData.get('prompt_selection_percentage'),
inputs: formData.getAll('inputs'),
endpoints: formData.getAll('endpoints'),
random_seed: formData.get('random_seed'),
runner_processing_module: formData.get('runner_processing_module'),
system_prompt: formData.get('system_prompt'),
});

if (runAll) {
newRunData.num_of_prompts = 0;
if (!result.success) {
return formatZodSchemaErrors(result.error as ZodError);
}

const response = await fetch(
Expand All @@ -73,7 +54,7 @@ export async function createRun(
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(newRunData),
body: JSON.stringify(result.data),
}
);

Expand Down
2 changes: 1 addition & 1 deletion app/api/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ export type RunnerWebApiModel = {
export type RunnerDetailWebApiModel = {
run_id: number;
runner_id: string;
runner_args: CookbooksRunnerArgs | RecipesRunnerArgs;
runner_args: CookbooksRunnerArgs;
endpoints: string[];
start_time: number;
};
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,10 @@ const mockFormState: FormState<BenchmarkRunFormValues> = {
description: '',
inputs: [],
endpoints: [],
num_of_prompts: '',
prompt_selection_percentage: '1',
system_prompt: '',
runner_processing_module: 'benchmarking',
random_seed: '0',
run_all: 'false',
};

//We are not asserting anything on the form action. In React, form action is a reference to a function (server action). There is no way to stub the action.
Expand Down
138 changes: 78 additions & 60 deletions app/benchmarking/components/__tests__/benchmarkRunForm.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ const mockRecipesStats: RecipeStats[] = [
},
];

// formula: total_prompt_in_cookbook * num_of_metrics + num_of_prompt_templates
// total prompts to run = total_prompt_in_cookbook * num_of_prompt_templates

// mock mockRecipesStats[0] has 0 prompt templates, so not multiplying by num_of_prompt_templates
const totalPromptsForStat0 =
Expand All @@ -64,13 +64,58 @@ const totalPromptForStat1 =

const GRAND_TOTAL_PROMPTS = totalPromptsForStat0 + totalPromptForStat1;

const USER_INPUT_NUM_OF_PROMPTS = 5;

const USER_INPUT_PERCENTAGE_OF_PROMPTS = 5;
const DECIMAL_FRACTION_OF_PROMPTS = USER_INPUT_PERCENTAGE_OF_PROMPTS / 100;
const SMALLER_SET_TOTAL_PROMPTS =
USER_INPUT_NUM_OF_PROMPTS * mockRecipesStats[0].num_of_datasets +
USER_INPUT_NUM_OF_PROMPTS *
mockRecipesStats[1].num_of_prompt_templates *
mockRecipesStats[1].num_of_datasets;
Math.floor(
DECIMAL_FRACTION_OF_PROMPTS *
mockRecipesStats[0].num_of_datasets_prompts.dataset1 //mockRecipesStats[0] has 0 prompt templates
) +
Math.floor(
DECIMAL_FRACTION_OF_PROMPTS *
mockRecipesStats[0].num_of_datasets_prompts.dataset2 //mockRecipesStats[0] has 0 prompt templates
) +
Math.floor(
DECIMAL_FRACTION_OF_PROMPTS *
mockRecipesStats[1].num_of_datasets_prompts.dataset1
) *
mockRecipesStats[1].num_of_prompt_templates +
Math.floor(
DECIMAL_FRACTION_OF_PROMPTS *
mockRecipesStats[1].num_of_datasets_prompts.dataset2
) *
mockRecipesStats[1].num_of_prompt_templates +
Math.floor(
DECIMAL_FRACTION_OF_PROMPTS *
mockRecipesStats[1].num_of_datasets_prompts.dataset3
) *
mockRecipesStats[1].num_of_prompt_templates;

const ONE_PERCENT_DECIMAL_FRACTION = 1 / 100;
const ONE_PERCENT_TOTAL_PROMPTS =
Math.floor(
ONE_PERCENT_DECIMAL_FRACTION *
mockRecipesStats[0].num_of_datasets_prompts.dataset1 //mockRecipesStats[0] has 0 prompt templates
) +
Math.floor(
ONE_PERCENT_DECIMAL_FRACTION *
mockRecipesStats[0].num_of_datasets_prompts.dataset2
) +
Math.floor(
ONE_PERCENT_DECIMAL_FRACTION *
mockRecipesStats[1].num_of_datasets_prompts.dataset1
) *
mockRecipesStats[1].num_of_prompt_templates +
Math.floor(
ONE_PERCENT_DECIMAL_FRACTION *
mockRecipesStats[1].num_of_datasets_prompts.dataset2
) *
mockRecipesStats[1].num_of_prompt_templates +
Math.floor(
ONE_PERCENT_DECIMAL_FRACTION *
mockRecipesStats[1].num_of_datasets_prompts.dataset3
) *
mockRecipesStats[1].num_of_prompt_templates;

const mockCookbooks: Cookbook[] = [
{
Expand Down Expand Up @@ -146,11 +191,10 @@ describe('BenchmarkRunForm', () => {
description: '',
inputs: [],
endpoints: [],
num_of_prompts: '',
prompt_selection_percentage: '1',
system_prompt: '',
runner_processing_module: 'benchmarking',
random_seed: '0',
run_all: 'false',
};

//We are not asserting anything on the form action. In React, form action is a reference to a function (server action). There is no way to stub the action.
Expand Down Expand Up @@ -185,17 +229,25 @@ describe('BenchmarkRunForm', () => {
);
const form = container.querySelector('form');
expect(form).toHaveFormValues({
num_of_prompts: null,
prompt_selection_percentage: Number(
mockFormState.prompt_selection_percentage
),
inputs: mockCookbooks.map((cb) => cb.id),
endpoints: mockEndpoints.map((ep) => ep.id),
random_seed: Number(mockFormState.random_seed),
runner_processing_module: mockFormState.runner_processing_module,
system_prompt: mockFormState.system_prompt,
run_all: false,
});
expect(screen.getByRole('button', { name: /Run/i })).toBeDisabled();
const runBtn = screen.getByRole('button', { name: /Run/i });
expect(runBtn).toBeDisabled();
await userEvent.type(screen.getByLabelText(/Name/i), 'Test Run');
expect(screen.getByText(/will be run: 0/i)).toBeInTheDocument();
expect(runBtn).toBeEnabled();
expect(
screen.getByText(
`Number of prompts that will be run: ${ONE_PERCENT_TOTAL_PROMPTS}`
)
).toBeInTheDocument();
expect(
screen.getByText(new RegExp(`${GRAND_TOTAL_PROMPTS}`))
).toBeInTheDocument();
Expand All @@ -208,51 +260,38 @@ describe('BenchmarkRunForm', () => {
selectedEndpoints={mockEndpoints}
/>
);
const hiddenPercentInputField = container.querySelector(
'input[name="prompt_selection_percentage"]'
) as HTMLInputElement;
hiddenPercentInputField.style.display = 'block'; // temporarily unhide percet input for testing
await userEvent.type(screen.getByLabelText(/Name/i), 'Test Run');
await userEvent.clear(hiddenPercentInputField);
await userEvent.type(
screen.getByLabelText(/Run a smaller set/i),
USER_INPUT_NUM_OF_PROMPTS.toString()
hiddenPercentInputField,
USER_INPUT_PERCENTAGE_OF_PROMPTS.toString()
);
const form = container.querySelector('form');
expect(form).toHaveFormValues({
num_of_prompts: USER_INPUT_NUM_OF_PROMPTS,
prompt_selection_percentage: USER_INPUT_PERCENTAGE_OF_PROMPTS,
inputs: mockCookbooks.map((cb) => cb.id),
endpoints: mockEndpoints.map((ep) => ep.id),
random_seed: Number(mockFormState.random_seed),
runner_processing_module: mockFormState.runner_processing_module,
system_prompt: mockFormState.system_prompt,
run_all: false,
});
hiddenPercentInputField.style.display = 'none';
expect(screen.getByRole('button', { name: /Run/i })).toBeEnabled();
expect(
screen.getByText(new RegExp(`${GRAND_TOTAL_PROMPTS}`))
).toBeInTheDocument();
expect(
screen.getByText(new RegExp(`${SMALLER_SET_TOTAL_PROMPTS}`))
screen.getByText(
`Number of prompts that will be run: ${SMALLER_SET_TOTAL_PROMPTS}`
)
).toBeInTheDocument();
});

it('should not set num_of_prompts when "Run All" is checked', async () => {
const { container } = renderWithProviders(
<BenchmarkRunForm
selectedCookbooks={mockCookbooks}
selectedEndpoints={mockEndpoints}
/>
);
await userEvent.type(screen.getByLabelText(/Name/i), 'Test Run');
await userEvent.click(screen.getByRole('toggle-switch'));
const form = container.querySelector('form');
expect(form).toHaveFormValues({
inputs: mockCookbooks.map((cb) => cb.id),
endpoints: mockEndpoints.map((ep) => ep.id),
random_seed: Number(mockFormState.random_seed),
runner_processing_module: mockFormState.runner_processing_module,
system_prompt: mockFormState.system_prompt,
run_all: true,
});
expect(screen.getByRole('button', { name: /Run/i })).toBeEnabled();
});

it('should display form errors', async () => {
const { rerender } = renderWithProviders(
<BenchmarkRunForm
Expand All @@ -278,7 +317,7 @@ describe('BenchmarkRunForm', () => {
formStatus: 'error',
formErrors: {
run_name: ['mock error 1'],
num_of_prompts: ['mock error 2'],
prompt_selection_percentage: ['mock error 2'],
description: ['mock error 3'],
},
};
Expand All @@ -298,28 +337,7 @@ describe('BenchmarkRunForm', () => {
);
});
expect(screen.getAllByText('mock error 1')).toHaveLength(2);
expect(screen.getAllByText('mock error 2')).toHaveLength(2);
expect(screen.getAllByText('mock error 2')).toHaveLength(1);
expect(screen.getAllByText('mock error 3')).toHaveLength(2);
});

it('should validate num of prompts', async () => {
(useFormStatus as jest.Mock).mockImplementation(() => ({
pending: false,
}));
renderWithProviders(
<BenchmarkRunForm
selectedCookbooks={mockCookbooks}
selectedEndpoints={mockEndpoints}
/>
);
await userEvent.type(screen.getByLabelText(/Name/i), 'Test Run');
await userEvent.type(screen.getByLabelText(/Run a smaller set/i), '0');
expect(screen.getByRole('button', { name: /Run/i })).toBeDisabled();
expect(screen.getByText(/.* must be greater than 0/i)).toBeInTheDocument();
expect(screen.getByText(/will be run: 0/i)).toBeInTheDocument();
await userEvent.type(screen.getByLabelText(/Run a smaller set/i), '1.5');
expect(screen.getByRole('button', { name: /Run/i })).toBeDisabled();
expect(screen.getByText(/.* must be an integer/i)).toBeInTheDocument();
expect(screen.getByText(/will be run: 0/i)).toBeInTheDocument();
});
});
Loading
Loading