diff --git a/swift/StableDiffusion/pipeline/EulerAncestralDiscreteScheduler.swift b/swift/StableDiffusion/pipeline/EulerAncestralDiscreteScheduler.swift new file mode 100644 index 00000000..6f4041f3 --- /dev/null +++ b/swift/StableDiffusion/pipeline/EulerAncestralDiscreteScheduler.swift @@ -0,0 +1,90 @@ +// For licensing see accompanying LICENSE.md file. +// Copyright (C) 2022 Apple Inc. and The HuggingFace Team. All Rights Reserved. + +import Accelerate +import CoreML + +/// A Scheduler used to compute a de-noised image +/// +/// This inplementation matches: +/// [Hugging Face Diffusers EulerAncestralDiscreteScheduler](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py) +/// +/// It is based on the [original k-diffusion implementation by Katherine Crowson](https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L72) +/// Limitations: +/// - Only implemented for Euler A algorithm (not Euler) +/// - Assumes model predicts epsilon +@available(iOS 16.2, macOS 13.1, *) +public final class EulerAncestralDiscreteScheduler: Scheduler { + public let trainStepCount: Int + public let inferenceStepCount: Int + public let betas: [Float] + public let timeSteps: [Int] + public let alphas: [Float] + public let alphasCumProd: [Float] + public let sigmas: [Float] + public let initNoiseSigma: Float + private(set) var randomSource: RandomSource + + public init( + randomSource: RandomSource, + stepCount: Int = 50, + trainStepCount: Int = 1000, + betaSchedule: BetaSchedule = .linear, + betaStart: Float = 0.0001, + betaEnd: Float = 0.02 + ) { + self.randomSource = randomSource + self.trainStepCount = trainStepCount + self.inferenceStepCount = stepCount + + switch betaSchedule { + case .linear: + self.betas = linspace(betaStart, betaEnd, trainStepCount) + case .scaledLinear: + self.betas = linspace(pow(betaStart, 0.5), pow(betaEnd, 0.5), trainStepCount).map({ $0 * $0 }) + } + + self.alphas = betas.map({ 1.0 - $0 }) + + var alphasCumProd = self.alphas + for i in 1.., timeStep t: Int, sample s: MLShapedArray) -> MLShapedArray { + let stepIndex = timeSteps.firstIndex(of: t)! + let sigma = sigmas[stepIndex] + + // compute predicted original sample (x0) from sigma-scaled predicted noise (for epsilon): + // sample - sigma * output + let predOriginalSample = weightedSum([1.0, Double(-1.0 * sigma)], [s, output]) + + let sigmaFrom = sigmas[stepIndex] + let sigmaTo = sigmas[stepIndex + 1] + let sigmaUp = sqrt(pow(sigmaTo, 2) * (pow(sigmaFrom, 2) - pow(sigmaTo, 2)) / pow(sigmaFrom, 2)) + let sigmaDown = sqrt(pow(sigmaTo, 2) - pow(sigmaUp, 2)) + + // Convert to an ODE derivative: + // derivative = (sample - predOriginalSample) / sigma + // prevSample = sample + derivative * dt + let derivative = weightedSum([Double(1 / sigma), Double(-1 / sigma)], [s, predOriginalSample]) + let dt = sigmaDown - sigma + let prevSample = weightedSum([1.0, Double(dt)], [s, derivative]) + + // Introduce noise + let noise = MLShapedArray(converting: randomSource.normalShapedArray(output.shape, mean: 0.0, stdev: Double(initNoiseSigma))) + + return weightedSum([1, Double(sigmaUp)], [prevSample, noise]) // output = prevSample + noise * sigmaUp + } +} diff --git a/swift/StableDiffusion/pipeline/StableDiffusionPipeline.swift b/swift/StableDiffusion/pipeline/StableDiffusionPipeline.swift index 6290e8a5..ee8a62cf 100644 --- a/swift/StableDiffusion/pipeline/StableDiffusionPipeline.swift +++ b/swift/StableDiffusion/pipeline/StableDiffusionPipeline.swift @@ -12,6 +12,8 @@ public enum StableDiffusionScheduler { case pndmScheduler /// Scheduler that uses a second order DPM-Solver++ algorithm case dpmSolverMultistepScheduler + /// Scheduler that uses an Euler Ancestral discrete algorithm + case eulerAncestralDiscreteScheduler } /// RNG compatible with StableDiffusionPipeline @@ -160,6 +162,7 @@ public struct StableDiffusionPipeline: ResourceManaging { switch config.schedulerType { case .pndmScheduler: return PNDMScheduler(stepCount: config.stepCount) case .dpmSolverMultistepScheduler: return DPMSolverMultistepScheduler(stepCount: config.stepCount) + case .eulerAncestralDiscreteScheduler: return EulerAncestralDiscreteScheduler(randomSource: randomSource(from: config.rngType, seed: config.seed), stepCount: config.stepCount) } } diff --git a/swift/StableDiffusionCLI/main.swift b/swift/StableDiffusionCLI/main.swift index e567beb0..62d916f6 100644 --- a/swift/StableDiffusionCLI/main.swift +++ b/swift/StableDiffusionCLI/main.swift @@ -67,7 +67,7 @@ struct StableDiffusionSample: ParsableCommand { @Option(help: "Compute units to load model with {all,cpuOnly,cpuAndGPU,cpuAndNeuralEngine}") var computeUnits: ComputeUnits = .all - @Option(help: "Scheduler to use, one of {pndm, dpmpp}") + @Option(help: "Scheduler to use, one of {pndm, dpmpp, ead}") var scheduler: SchedulerOption = .pndm @Option(help: "Random number generator to use, one of {numpy, torch}") @@ -279,11 +279,12 @@ enum ComputeUnits: String, ExpressibleByArgument, CaseIterable { @available(iOS 16.2, macOS 13.1, *) enum SchedulerOption: String, ExpressibleByArgument { - case pndm, dpmpp + case pndm, dpmpp, ead var stableDiffusionScheduler: StableDiffusionScheduler { switch self { case .pndm: return .pndmScheduler case .dpmpp: return .dpmSolverMultistepScheduler + case .ead: return .eulerAncestralDiscreteScheduler } } }