Skip to content

Commit

Permalink
Hotfix to allow re-registering of ttld machines (#810)
Browse files Browse the repository at this point in the history
  • Loading branch information
luke-lombardi authored Dec 26, 2024
1 parent b36b742 commit c789b7e
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 4 deletions.
7 changes: 6 additions & 1 deletion pkg/api/v1/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,19 @@ func (g *MachineGroup) RegisterMachine(ctx echo.Context) error {
hostName = fmt.Sprintf("%s.%s.%s", request.HostName, g.config.Tailscale.User, g.config.Tailscale.HostName)
}

poolConfig, ok := g.config.Worker.Pools[request.PoolName]
if !ok {
return HTTPInternalServerError("Invalid pool name")
}

err = g.providerRepo.RegisterMachine(request.ProviderName, request.PoolName, request.MachineID, &types.ProviderMachineState{
MachineId: request.MachineID,
Token: request.Token,
HostName: hostName,
Cpu: cpu,
Memory: memory,
GpuCount: uint32(gpuCount),
})
}, &poolConfig)
if err != nil {
return HTTPInternalServerError("Failed to register machine")
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/repository/base.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ type ProviderRepository interface {
RemoveMachine(providerName, poolName, machineId string) error
SetMachineKeepAlive(providerName, poolName, machineId, agentVersion string, metrics *types.ProviderMachineMetrics) error
SetLastWorkerSeen(providerName, poolName, machineId string) error
RegisterMachine(providerName, poolName, machineId string, newMachineInfo *types.ProviderMachineState) error
RegisterMachine(providerName, poolName, machineId string, newMachineInfo *types.ProviderMachineState, poolConfig *types.WorkerPoolConfig) error
WaitForMachineRegistration(providerName, poolName, machineId string) (*types.ProviderMachineState, error)
ListAllMachines(providerName, poolName string, useLock bool) ([]*types.ProviderMachine, error)
SetMachineLock(providerName, poolName, machineId string) error
Expand Down
12 changes: 10 additions & 2 deletions pkg/repository/provider_redis.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,12 +299,20 @@ func (r *ProviderRedisRepository) RemoveMachine(providerName, poolName, machineI
return nil
}

func (r *ProviderRedisRepository) RegisterMachine(providerName, poolName, machineId string, newMachineInfo *types.ProviderMachineState) error {
func (r *ProviderRedisRepository) RegisterMachine(providerName, poolName, machineId string, newMachineInfo *types.ProviderMachineState, poolConfig *types.WorkerPoolConfig) error {
stateKey := common.RedisKeys.ProviderMachineState(providerName, poolName, machineId)

machineInfo, err := r.getMachineStateFromKey(stateKey)
if err != nil {
return fmt.Errorf("failed to get machine state <%v>: %w", stateKey, err)
// TODO: This is a temporary fix to allow the machine to be registered
// without having to update the machine state, in the future we should tie
// registration token to machine ID and store that somewhere else persistently
machineInfo = &types.ProviderMachineState{}
machineInfo.Gpu = poolConfig.GPUType
machineInfo.Created = fmt.Sprintf("%d", time.Now().UTC().Unix())
machineInfo.LastKeepalive = fmt.Sprintf("%d", time.Now().UTC().Unix())
machineInfo.PoolName = newMachineInfo.PoolName
machineInfo.MachineId = newMachineInfo.MachineId
}

machineInfo.HostName = newMachineInfo.HostName
Expand Down

0 comments on commit c789b7e

Please sign in to comment.