Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a mock server option for the api server for dev #604

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified api-server/jobs.db
Binary file not shown.
6 changes: 5 additions & 1 deletion api-server/jobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@ import (
// and ensures a jobs table exists.
func (srv *ILabServer) initDB() {
var err error
srv.db, err = sql.Open("sqlite3", "jobs.db")
dbName := "jobs.db"
if srv.mockServer {
dbName = "mock-jobs.db"
}
srv.db, err = sql.Open("sqlite3", dbName)
if err != nil {
srv.log.Fatalf("Failed to open SQLite database: %v", err)
}
Expand Down
48 changes: 44 additions & 4 deletions api-server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,15 @@ type ILabServer struct {
modelProcessBase *exec.Cmd
modelProcessLatest *exec.Cmd

// Base model reference
baseModel string

// Map of "pre-train"/"post-train" => jobID for VLLM serving
servedModelJobIDs map[string]string
jobIDsMutex sync.RWMutex

// Cache variables
modelCache ModelCache

mockServer bool
}

func main() {
Expand All @@ -143,6 +143,7 @@ func main() {
rootCmd.Flags().BoolVar(&srv.isCuda, "cuda", false, "Enable Cuda (default: false)")
rootCmd.Flags().BoolVar(&srv.useVllm, "vllm", false, "Enable VLLM model serving using podman containers")
rootCmd.Flags().StringVar(&srv.pipelineType, "pipeline", "", "Pipeline type (simple, accelerated, full)")
rootCmd.Flags().BoolVar(&srv.mockServer, "mock-server", false, "Enable mock mode: simulate backend jobs for development (jobs run for 30s)")
rootCmd.Flags().BoolVar(&srv.debugEnabled, "debug", false, "Enable debug logging")

// PreRun to validate flags
Expand Down Expand Up @@ -214,9 +215,15 @@ func (srv *ILabServer) runServer(cmd *cobra.Command, args []string) {
srv.ilabCmd = ilabPath
} else {
// Use ilab from virtual environment
// First attempt: baseDir/bin/ilab
srv.ilabCmd = filepath.Join(srv.baseDir, "bin", "ilab")
if _, err := os.Stat(srv.ilabCmd); os.IsNotExist(err) {
srv.log.Fatalf("ilab binary not found at %s. Please ensure the virtual environment is set up correctly.", srv.ilabCmd)
// Second attempt: baseDir/venv/bin/ilab
altCmd := filepath.Join(srv.baseDir, "venv", "bin", "ilab")
if _, err := os.Stat(altCmd); os.IsNotExist(err) {
srv.log.Fatalf("ilab binary not found at %s or %s. Please ensure the virtual environment is set up correctly.", srv.ilabCmd, altCmd)
}
srv.ilabCmd = altCmd
}
}

Expand Down Expand Up @@ -394,6 +401,15 @@ func (srv *ILabServer) reconstructServedModelJobIDs() {

// startGenerateJob launches a job to run "ilab data generate" and tracks it.
func (srv *ILabServer) startGenerateJob() (string, error) {
if srv.mockServer {
jobID, err := srv.simulateJob("generate")
if err != nil {
return "", err
}
srv.log.Infof("Started mock generate job: %s", jobID)
return jobID, nil
}

ilabPath := srv.getIlabCommand()

// Hard-coded pipeline choice for data generate, or we could use srv.pipelineType
Expand Down Expand Up @@ -470,6 +486,15 @@ func (srv *ILabServer) startGenerateJob() (string, error) {

// startTrainJob starts a training job with the given parameters.
func (srv *ILabServer) startTrainJob(modelName, branchName string, epochs *int) (string, error) {
if srv.mockServer {
jobID, err := srv.simulateJob("train")
if err != nil {
return "", err
}
srv.log.Infof("Started mock train job: %s", jobID)
return jobID, nil
}

srv.log.Infof("Starting training job for model: '%s', branch: '%s'", modelName, branchName)

jobID := fmt.Sprintf("t-%d", time.Now().UnixNano())
Expand Down Expand Up @@ -681,7 +706,15 @@ func (srv *ILabServer) generateTrainPipelineHandler(w http.ResponseWriter, r *ht

// runPipelineJob orchestrates data generate + model train steps in sequence.
func (srv *ILabServer) runPipelineJob(job *Job, modelName, branchName string, epochs *int) {
// Open the pipeline job log
if srv.mockServer {
jobID, err := srv.simulateJob("generate")
if err != nil {
return
}
srv.log.Infof("Started mock generate job: %s", jobID)
return
}

logFile, err := os.Create(job.LogFile)
if err != nil {
srv.log.Errorf("Error creating pipeline log file for job %s: %v", job.JobID, err)
Expand Down Expand Up @@ -829,6 +862,13 @@ func (srv *ILabServer) getFullModelPath(modelName string) (string, error) {

// runIlabCommand executes the ilab command with the provided arguments and returns combined output.
func (srv *ILabServer) runIlabCommand(args ...string) (string, error) {
if srv.mockServer {
if len(args) >= 2 && args[0] == "model" && args[1] == "list" {
return "Mock Model A\nMock Model B\n", nil
}
return "mock output", nil
}

cmdPath := srv.getIlabCommand()
cmd := exec.Command(cmdPath, args...)
if !srv.rhelai {
Expand Down
58 changes: 58 additions & 0 deletions api-server/mock.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// mock_mode.go
package main

import (
"fmt"
"os"
"time"
)

// simulateJob simulates a job of the given type (e.g., "generate" or "train").
// It creates a job record with a unique job ID, writes a log file, and schedules
// a goroutine that waits 30s before marking the job as finished.
func (srv *ILabServer) simulateJob(jobType string) (string, error) {
// Generate a unique job ID
jobID := fmt.Sprintf("mock-%s-%d", jobType, time.Now().UnixNano())
logFilePath := fmt.Sprintf("logs/%s.log", jobID)

// Create and write an initial log file
f, err := os.Create(logFilePath)
if err != nil {
srv.log.Errorf("Mock: failed to create log file: %v", err)
return "", err
}
_, _ = f.WriteString(fmt.Sprintf("Mock %s job started...\n", jobType))
f.Close()

// Create a new job record
newJob := &Job{
JobID: jobID,
Cmd: fmt.Sprintf("mock-%s", jobType),
Args: []string{},
Status: "running",
PID: 0,
LogFile: logFilePath,
StartTime: time.Now(),
}
if err := srv.createJob(newJob); err != nil {
srv.log.Errorf("Mock: failed to create job record: %v", err)
return "", err
}

// Simulate the job: after (n) seconds, mark it as finished.
go func(j *Job) {
srv.log.Infof("Mock job %s running (simulated 30s delay)...", j.JobID)
time.Sleep(30 * time.Second)
j.Lock.Lock()
defer j.Lock.Unlock()
j.Status = "finished"
now := time.Now()
j.EndTime = &now
if err := srv.updateJob(j); err != nil {
srv.log.Errorf("Mock: failed to update job %s: %v", j.JobID, err)
}
srv.log.Infof("Mock job %s finished successfully", j.JobID)
}(newJob)

return jobID, nil
}
Loading