From dd8a24b02feccc8f1bef180e7419fb03a61ba82f Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Tue, 6 Jun 2023 17:15:32 +0100 Subject: [PATCH] Added formatting (#260) * addd formatting settings * formatting * aded formatting to workflows --- .JuliaFormatter.toml | 2 + .github/workflows/Format.yml | 38 +++ docs/make.jl | 17 +- docs/src/distributions.md | 7 +- docs/src/examples.md | 22 +- docs/src/index.md | 39 ++-- docs/src/transforms.md | 6 +- src/Bijectors.jl | 169 +++++++------- src/bijectors/corr.jl | 34 +-- src/bijectors/coupling.jl | 39 ++-- src/bijectors/exp_log.jl | 4 +- src/bijectors/logit.jl | 4 +- src/bijectors/named_bijector.jl | 47 ++-- src/bijectors/normalise.jl | 47 ++-- src/bijectors/ordered.jl | 16 +- src/bijectors/pd.jl | 6 +- src/bijectors/permute.jl | 11 +- src/bijectors/planar_layer.jl | 11 +- src/bijectors/radial_layer.jl | 21 +- src/bijectors/rational_quadratic_spline.jl | 80 +++---- src/bijectors/scale.jl | 2 +- src/bijectors/shift.jl | 2 +- src/bijectors/simplex.jl | 118 +++++----- src/bijectors/stacked.jl | 34 +-- src/bijectors/truncated.jl | 10 +- src/chainrules.jl | 26 +-- src/compat/distributionsad.jl | 52 ++--- src/compat/forwarddiff.jl | 4 +- src/compat/reversediff.jl | 73 ++++-- src/compat/tracker.jl | 142 +++++++----- src/compat/zygote.jl | 84 ++++--- src/interface.jl | 14 +- src/transformed_distribution.jl | 44 ++-- test/ad/chainrules.jl | 8 +- test/ad/utils.jl | 22 +- test/bijectors/coupling.jl | 28 +-- test/bijectors/leaky_relu.jl | 4 +- test/bijectors/named_bijector.jl | 24 +- test/bijectors/permute.jl | 14 +- test/bijectors/rational_quadratic_spline.jl | 6 +- test/bijectors/utils.jl | 23 +- test/interface.jl | 111 +++++---- test/norm_flows.jl | 26 ++- test/runtests.jl | 14 +- test/transform.jl | 241 +++++++++++--------- 45 files changed, 975 insertions(+), 771 deletions(-) create mode 100644 .JuliaFormatter.toml create mode 100644 .github/workflows/Format.yml diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml new file mode 100644 index 00000000..ba942b23 --- /dev/null +++ b/.JuliaFormatter.toml @@ -0,0 +1,2 @@ +style="blue" +format_markdown=true \ No newline at end of file diff --git a/.github/workflows/Format.yml b/.github/workflows/Format.yml new file mode 100644 index 00000000..6a6df765 --- /dev/null +++ b/.github/workflows/Format.yml @@ -0,0 +1,38 @@ +name: Format + +on: + push: + branches: + - master + pull_request: + branches: + - master + merge_group: + types: [checks_requested] + +concurrency: + # Skip intermediate builds: always. + # Cancel intermediate builds: only if it is a pull request build. + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} + +jobs: + format: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@latest + with: + version: 1 + - name: Format code + run: | + using Pkg + Pkg.add(; name="JuliaFormatter", uuid="98e50ef6-434e-11e9-1051-2b60c6c9e899") + using JuliaFormatter + format("."; verbose=true) + shell: julia --color=yes {0} + - uses: reviewdog/action-suggester@v1 + if: github.event_name == 'pull_request' + with: + tool_name: JuliaFormatter + fail_on_error: true \ No newline at end of file diff --git a/docs/make.jl b/docs/make.jl index e1138577..5f3322a8 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -4,13 +4,18 @@ using Bijectors # Doctest setup DocMeta.setdocmeta!(Bijectors, :DocTestSetup, :(using Bijectors); recursive=true) -makedocs( - sitename = "Bijectors", - format = Documenter.HTML(), - modules = [Bijectors], - pages = ["Home" => "index.md", "Transforms" => "transforms.md", "Distributions.jl integration" => "distributions.md", "Examples" => "examples.md"], +makedocs(; + sitename="Bijectors", + format=Documenter.HTML(), + modules=[Bijectors], + pages=[ + "Home" => "index.md", + "Transforms" => "transforms.md", + "Distributions.jl integration" => "distributions.md", + "Examples" => "examples.md", + ], strict=false, checkdocs=:exports, ) -deploydocs(repo = "github.com/TuringLang/Bijectors.jl.git", push_preview=true) +deploydocs(; repo="github.com/TuringLang/Bijectors.jl.git", push_preview=true) diff --git a/docs/src/distributions.md b/docs/src/distributions.md index 514fc4bf..a1135516 100644 --- a/docs/src/distributions.md +++ b/docs/src/distributions.md @@ -1,10 +1,13 @@ ## Basic usage + Other than the `logpdf_with_trans` methods, the package also provides a more composable interface through the `Bijector` types. Consider for example the one from above with `Beta(2, 2)`. ```julia -julia> using Random; Random.seed!(42); +julia> using Random; + Random.seed!(42); -julia> using Bijectors; using Bijectors: Logit +julia> using Bijectors; + using Bijectors: Logit; julia> dist = Beta(2, 2) Beta{Float64}(α=2.0, β=2.0) diff --git a/docs/src/examples.md b/docs/src/examples.md index fff55f28..a29e6875 100644 --- a/docs/src/examples.md +++ b/docs/src/examples.md @@ -3,9 +3,11 @@ using Bijectors ``` ## Univariate ADVI example + But the real utility of `TransformedDistribution` becomes more apparent when using `transformed(dist, b)` for any bijector `b`. To get the transformed distribution corresponding to the `Beta(2, 2)`, we called `transformed(dist)` before. This is simply an alias for `transformed(dist, bijector(dist))`. Remember `bijector(dist)` returns the constrained-to-constrained bijector for that particular `Distribution`. But we can of course construct a `TransformedDistribution` using different bijectors with the same `dist`. This is particularly useful in something called _Automatic Differentiation Variational Inference (ADVI)_.[2] An important part of ADVI is to approximate a constrained distribution, e.g. `Beta`, as follows: -1. Sample `x` from a `Normal` with parameters `μ` and `σ`, i.e. `x ~ Normal(μ, σ)`. -2. Transform `x` to `y` s.t. `y ∈ support(Beta)`, with the transform being a differentiable bijection with a differentiable inverse (a "bijector") + + 1. Sample `x` from a `Normal` with parameters `μ` and `σ`, i.e. `x ~ Normal(μ, σ)`. + 2. Transform `x` to `y` s.t. `y ∈ support(Beta)`, with the transform being a differentiable bijection with a differentiable inverse (a "bijector") This then defines a probability density with same _support_ as `Beta`! Of course, it's unlikely that it will be the same density, but it's an _approximation_. Creating such a distribution becomes trivial with `Bijector` and `TransformedDistribution`: @@ -16,7 +18,7 @@ dist = Beta(2, 2) b = bijector(dist) # (0, 1) → ℝ b⁻¹ = inverse(b) # ℝ → (0, 1) td = transformed(Normal(), b⁻¹) # x ∼ 𝓝(0, 1) then b(x) ∈ (0, 1) - x = rand(rng, td) # ∈ (0, 1) +x = rand(rng, td) # ∈ (0, 1) ``` It's worth noting that `support(Beta)` is the _closed_ interval `[0, 1]`, while the constrained-to-unconstrained bijection, `Logit` in this case, is only well-defined as a map `(0, 1) → ℝ` for the _open_ interval `(0, 1)`. This is of course not an implementation detail. `ℝ` is itself open, thus no continuous bijection exists from a _closed_ interval to `ℝ`. But since the boundaries of a closed interval has what's known as measure zero, this doesn't end up affecting the resulting density with support on the entire real line. In practice, this means that @@ -29,25 +31,22 @@ inverse(td.transform)(rand(rng, td)) will never result in `0` or `1` though any sample arbitrarily close to either `0` or `1` is possible. _Disclaimer: numerical accuracy is limited, so you might still see `0` and `1` if you're lucky._ ## Multivariate ADVI example + We can also do _multivariate_ ADVI using the `Stacked` bijector. `Stacked` gives us a way to combine univariate and/or multivariate bijectors into a singe multivariate bijector. Say you have a vector `x` of length 2 and you want to transform the first entry using `Exp` and the second entry using `Log`. `Stacked` gives you an easy and efficient way of representing such a bijector. ```@repl advi using Bijectors: SimplexBijector # Original distributions -dists = ( - Beta(), - InverseGamma(), - Dirichlet(2, 3) -); +dists = (Beta(), InverseGamma(), Dirichlet(2, 3)); # Construct the corresponding ranges ranges = []; idx = 1; -for i = 1:length(dists) +for i in 1:length(dists) d = dists[i] - push!(ranges, idx:idx + length(d) - 1) + push!(ranges, idx:(idx + length(d) - 1)) global idx idx += length(d) @@ -74,6 +73,7 @@ sum(y[3:4]) ≈ 1.0 ``` ## Normalizing flows + A very interesting application is that of _normalizing flows_.[1] Usually this is done by sampling from a multivariate normal distribution, and then transforming this to a target distribution using invertible neural networks. Currently there are two such transforms available in Bijectors.jl: `PlanarLayer` and `RadialLayer`. Let's create a flow with a single `PlanarLayer`: ```@setup normalizing-flows @@ -144,7 +144,7 @@ f = NLLObjective(reconstruct, MvNormal(2, 1), xs); # Train using gradient descent. ε = 1e-3; -for i = 1:100 +for i in 1:100 ∇s = Zygote.gradient(f, θs...) θs = map(θs, ∇s) do θ, ∇ θ - ε .* ∇ diff --git a/docs/src/index.md b/docs/src/index.md index 7f7ed0a4..a347da07 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,31 +1,32 @@ # Bijectors.jl This package implements a set of functions for transforming constrained random variables (e.g. simplexes, intervals) to Euclidean space. The 3 main functions implemented in this package are the `link`, `invlink` and `logpdf_with_trans` for a number of distributions. The distributions supported are: -1. `RealDistribution`: `Union{Cauchy, Gumbel, Laplace, Logistic, NoncentralT, Normal, NormalCanon, TDist}`, -2. `PositiveDistribution`: `Union{BetaPrime, Chi, Chisq, Erlang, Exponential, FDist, Frechet, Gamma, InverseGamma, InverseGaussian, Kolmogorov, LogNormal, NoncentralChisq, NoncentralF, Rayleigh, Weibull}`, -3. `UnitDistribution`: `Union{Beta, KSOneSided, NoncentralBeta}`, -4. `SimplexDistribution`: `Union{Dirichlet}`, -5. `PDMatDistribution`: `Union{InverseWishart, Wishart}`, and -6. `TransformDistribution`: `Union{T, Truncated{T}} where T<:ContinuousUnivariateDistribution`. + + 1. `RealDistribution`: `Union{Cauchy, Gumbel, Laplace, Logistic, NoncentralT, Normal, NormalCanon, TDist}`, + 2. `PositiveDistribution`: `Union{BetaPrime, Chi, Chisq, Erlang, Exponential, FDist, Frechet, Gamma, InverseGamma, InverseGaussian, Kolmogorov, LogNormal, NoncentralChisq, NoncentralF, Rayleigh, Weibull}`, + 3. `UnitDistribution`: `Union{Beta, KSOneSided, NoncentralBeta}`, + 4. `SimplexDistribution`: `Union{Dirichlet}`, + 5. `PDMatDistribution`: `Union{InverseWishart, Wishart}`, and + 6. `TransformDistribution`: `Union{T, Truncated{T}} where T<:ContinuousUnivariateDistribution`. All exported names from the [Distributions.jl](https://github.com/TuringLang/Bijectors.jl) package are reexported from `Bijectors`. Bijectors.jl also provides a nice interface for working with these maps: composition, inversion, etc. The following table lists mathematical operations for a bijector and the corresponding code in Bijectors.jl. -| Operation | Method | Automatic | -|:------------------------------------:|:-----------------:|:-----------:| -| `b ↦ b⁻¹` | `inverse(b)` | ✓ | -| `(b₁, b₂) ↦ (b₁ ∘ b₂)` | `b₁ ∘ b₂` | ✓ | -| `(b₁, b₂) ↦ [b₁, b₂]` | `stack(b₁, b₂)` | ✓ | -| `x ↦ b(x)` | `b(x)` | × | -| `y ↦ b⁻¹(y)` | `inverse(b)(y)` | × | -| `x ↦ log|det J(b, x)|` | `logabsdetjac(b, x)` | AD | -| `x ↦ b(x), log|det J(b, x)|` | `with_logabsdet_jacobian(b, x)` | ✓ | -| `p ↦ q := b_* p` | `q = transformed(p, b)` | ✓ | -| `y ∼ q` | `y = rand(q)` | ✓ | -| `p ↦ b` such that `support(b_* p) = ℝᵈ` | `bijector(p)` | ✓ | -| `(x ∼ p, b(x), log|det J(b, x)|, log q(y))` | `forward(q)` | ✓ | +| Operation | Method | Automatic | +|:-------------------------------------------:|:-------------------------------:|:---------:| +| `b ↦ b⁻¹` | `inverse(b)` | ✓ | +| `(b₁, b₂) ↦ (b₁ ∘ b₂)` | `b₁ ∘ b₂` | ✓ | +| `(b₁, b₂) ↦ [b₁, b₂]` | `stack(b₁, b₂)` | ✓ | +| `x ↦ b(x)` | `b(x)` | × | +| `y ↦ b⁻¹(y)` | `inverse(b)(y)` | × | +| `x ↦ log|det J(b, x)|` | `logabsdetjac(b, x)` | AD | +| `x ↦ b(x), log|det J(b, x)|` | `with_logabsdet_jacobian(b, x)` | ✓ | +| `p ↦ q := b_* p` | `q = transformed(p, b)` | ✓ | +| `y ∼ q` | `y = rand(q)` | ✓ | +| `p ↦ b` such that `support(b_* p) = ℝᵈ` | `bijector(p)` | ✓ | +| `(x ∼ p, b(x), log|det J(b, x)|, log q(y))` | `forward(q)` | ✓ | In this table, `b` denotes a `Bijector`, `J(b, x)` denotes the Jacobian of `b` evaluated at `x`, `b_*` denotes the [push-forward](https://www.wikiwand.com/en/Pushforward_measure) of `p` by `b`, and `x ∼ p` denotes `x` sampled from the distribution with density `p`. diff --git a/docs/src/transforms.md b/docs/src/transforms.md index cf9223aa..8617a828 100644 --- a/docs/src/transforms.md +++ b/docs/src/transforms.md @@ -1,8 +1,9 @@ ## Usage A very simple example of a "bijector"/diffeomorphism, i.e. a differentiable transformation with a differentiable inverse, is the `exp` function: -- The inverse of `exp` is `log`. -- The derivative of `exp` at an input `x` is simply `exp(x)`, hence `logabsdetjac` is simply `x`. + + - The inverse of `exp` is `log`. + - The derivative of `exp` at an input `x` is simply `exp(x)`, hence `logabsdetjac` is simply `x`. ```@repl usage using Bijectors @@ -100,4 +101,3 @@ Bijectors.OrderedBijector Bijectors.NamedTransform Bijectors.NamedCoupling ``` - diff --git a/src/Bijectors.jl b/src/Bijectors.jl index d934381f..92e00b92 100644 --- a/src/Bijectors.jl +++ b/src/Bijectors.jl @@ -40,41 +40,41 @@ using InverseFunctions: InverseFunctions import ChangesOfVariables: ChangesOfVariables, with_logabsdet_jacobian import InverseFunctions: inverse -import ChainRulesCore -import Functors -import IrrationalConstants -import LogExpFunctions -import Roots - -export TransformDistribution, - PositiveDistribution, - UnitDistribution, - SimplexDistribution, - PDMatDistribution, - link, - invlink, - logpdf_with_trans, - isclosedform, - transform, - transform!, - with_logabsdet_jacobian, - with_logabsdet_jacobian!, - inverse, - logabsdetjac, - logabsdetjac!, - logabsdetjacinv, - Bijector, - Inverse, - Stacked, - bijector, - transformed, - UnivariateTransformed, - MultivariateTransformed, - PlanarLayer, - RadialLayer, - Coupling, - InvertibleBatchNorm, - elementwise +using ChainRulesCore: ChainRulesCore +using Functors: Functors +using IrrationalConstants: IrrationalConstants +using LogExpFunctions: LogExpFunctions +using Roots: Roots + +export TransformDistribution, + PositiveDistribution, + UnitDistribution, + SimplexDistribution, + PDMatDistribution, + link, + invlink, + logpdf_with_trans, + isclosedform, + transform, + transform!, + with_logabsdet_jacobian, + with_logabsdet_jacobian!, + inverse, + logabsdetjac, + logabsdetjac!, + logabsdetjacinv, + Bijector, + Inverse, + Stacked, + bijector, + transformed, + UnivariateTransformed, + MultivariateTransformed, + PlanarLayer, + RadialLayer, + Coupling, + InvertibleBatchNorm, + elementwise if VERSION < v"1.1" using Compat: eachcol @@ -98,27 +98,27 @@ end function mapvcat(f, args...) out = map(f, args...) init = vcat(out[1]) - return reduce(vcat, drop(out, 1); init = init) + return reduce(vcat, drop(out, 1); init=init) end function maphcat(f, args...) out = map(f, args...) init = reshape(out[1], :, 1) - return reduce(hcat, drop(out, 1); init = init) + return reduce(hcat, drop(out, 1); init=init) end function eachcolmaphcat(f, x1, x2) - out = [f(x1[:,i], x2[i]) for i in 1:size(x1, 2)] + out = [f(x1[:, i], x2[i]) for i in 1:size(x1, 2)] init = reshape(out[1], :, 1) - return reduce(hcat, drop(out, 1); init = init) + return reduce(hcat, drop(out, 1); init=init) end function eachcolmaphcat(f, x) out = map(f, eachcol(x)) init = reshape(out[1], :, 1) - return reduce(hcat, drop(out, 1); init = init) + return reduce(hcat, drop(out, 1); init=init) end function sumeachcol(f, x1, x2) # Using a view below for x1 breaks Tracker - return sum(f(x1[:,i], x2[i]) for i in 1:size(x1, 2)) + return sum(f(x1[:, i], x2[i]) for i in 1:size(x1, 2)) end # Distributions @@ -129,14 +129,21 @@ invlink(d::Distribution, y) = inverse(bijector(d))(y) # To still allow `logpdf_with_trans` to work with "batches" in a similar way # as `logpdf` can. _logabsdetjac_dist(d::UnivariateDistribution, x::Real) = logabsdetjac(bijector(d), x) -_logabsdetjac_dist(d::UnivariateDistribution, x::AbstractArray) = logabsdetjac.((bijector(d),), x) +function _logabsdetjac_dist(d::UnivariateDistribution, x::AbstractArray) + return logabsdetjac.((bijector(d),), x) +end -_logabsdetjac_dist(d::MultivariateDistribution, x::AbstractVector) = logabsdetjac(bijector(d), x) -_logabsdetjac_dist(d::MultivariateDistribution, x::AbstractMatrix) = logabsdetjac.((bijector(d),), eachcol(x)) +function _logabsdetjac_dist(d::MultivariateDistribution, x::AbstractVector) + return logabsdetjac(bijector(d), x) +end +function _logabsdetjac_dist(d::MultivariateDistribution, x::AbstractMatrix) + return logabsdetjac.((bijector(d),), eachcol(x)) +end _logabsdetjac_dist(d::MatrixDistribution, x::AbstractMatrix) = logabsdetjac(bijector(d), x) -_logabsdetjac_dist(d::MatrixDistribution, x::AbstractVector{<:AbstractMatrix}) = logabsdetjac.((bijector(d),), x) - +function _logabsdetjac_dist(d::MatrixDistribution, x::AbstractVector{<:AbstractMatrix}) + return logabsdetjac.((bijector(d),), x) +end function logpdf_with_trans(d::Distribution, x, transform::Bool) if ispd(d) @@ -155,15 +162,27 @@ end ## Univariate -const TransformDistribution = Union{ - T, - Truncated{T}, -} where T <: ContinuousUnivariateDistribution +const TransformDistribution = + Union{T,Truncated{T}} where {T<:ContinuousUnivariateDistribution} const PositiveDistribution = Union{ - BetaPrime, Chi, Chisq, Erlang, Exponential, FDist, Frechet, Gamma, InverseGamma, - InverseGaussian, Kolmogorov, LogNormal, NoncentralChisq, NoncentralF, Rayleigh, Weibull, + BetaPrime, + Chi, + Chisq, + Erlang, + Exponential, + FDist, + Frechet, + Gamma, + InverseGamma, + InverseGaussian, + Kolmogorov, + LogNormal, + NoncentralChisq, + NoncentralF, + Rayleigh, + Weibull, } -const UnitDistribution = Union{Beta, KSOneSided, NoncentralBeta} +const UnitDistribution = Union{Beta,KSOneSided,NoncentralBeta} function logpdf_with_trans(d::UnivariateDistribution, x, transform::Bool) if transform @@ -183,33 +202,23 @@ isdirichlet(::Distribution) = false # ∑xᵢ = 1 # ########### -function link( - d::Dirichlet, - x::AbstractVecOrMat{<:Real}, - ::Val{proj}=Val(true), -) where {proj} +function link(d::Dirichlet, x::AbstractVecOrMat{<:Real}, ::Val{proj}=Val(true)) where {proj} return SimplexBijector{proj}()(x) end function link_jacobian( - d::Dirichlet, - x::AbstractVector{<:Real}, - ::Val{proj}=Val(true), + d::Dirichlet, x::AbstractVector{<:Real}, ::Val{proj}=Val(true) ) where {proj} return jacobian(SimplexBijector{proj}(), x) end function invlink( - d::Dirichlet, - y::AbstractVecOrMat{<:Real}, - ::Val{proj}=Val(true), + d::Dirichlet, y::AbstractVecOrMat{<:Real}, ::Val{proj}=Val(true) ) where {proj} return inverse(SimplexBijector{proj}())(y) end function invlink_jacobian( - d::Dirichlet, - y::AbstractVector{<:Real}, - ::Val{proj}=Val(true), + d::Dirichlet, y::AbstractVector{<:Real}, ::Val{proj}=Val(true) ) where {proj} return jacobian(inverse(SimplexBijector{proj}()), y) end @@ -220,14 +229,12 @@ end # Positive definite # ##################### -const PDMatDistribution = Union{MatrixBeta, InverseWishart, Wishart} +const PDMatDistribution = Union{MatrixBeta,InverseWishart,Wishart} ispd(::Distribution) = false ispd(::PDMatDistribution) = true function logpdf_with_trans( - d::MatrixDistribution, - X::AbstractArray{<:AbstractMatrix{<:Real}}, - transform::Bool, + d::MatrixDistribution, X::AbstractArray{<:AbstractMatrix{<:Real}}, transform::Bool ) return map(X) do x logpdf_with_trans(d, x, transform) @@ -235,7 +242,7 @@ function logpdf_with_trans( end function pd_logpdf_with_trans(d, X::AbstractMatrix{<:Real}, transform::Bool) T = eltype(X) - Xcf = cholesky(X, check = false) + Xcf = cholesky(X; check=false) if !issuccess(Xcf) Xcf = cholesky(X + max(eps(T), eps(T) * norm(X)) * I) end @@ -265,7 +272,7 @@ include("interface.jl") include("chainrules.jl") # Broadcasting here breaks Tracker for some reason -maporbroadcast(f, x::AbstractArray{<:Any, N}...) where {N} = map(f, x...) +maporbroadcast(f, x::AbstractArray{<:Any,N}...) where {N} = map(f, x...) maporbroadcast(f, x::AbstractArray...) = f.(x...) # optional dependencies @@ -281,11 +288,17 @@ function __init__() return copy(f.(x1, x2, x3, x...)) end end - @require ForwardDiff="f6369f11-7733-5829-9624-2563aa707210" include("compat/forwarddiff.jl") - @require Tracker="9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" include("compat/tracker.jl") - @require Zygote="e88e6eb3-aa80-5325-afca-941959d7151f" include("compat/zygote.jl") - @require ReverseDiff="37e2e3b7-166d-5795-8a7a-e32c996b4267" include("compat/reversediff.jl") - @require DistributionsAD="ced4e74d-a319-5a8a-b0ac-84af2272839c" include("compat/distributionsad.jl") + @require ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" include( + "compat/forwarddiff.jl" + ) + @require Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" include("compat/tracker.jl") + @require Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" include("compat/zygote.jl") + @require ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" include( + "compat/reversediff.jl" + ) + @require DistributionsAD = "ced4e74d-a319-5a8a-b0ac-84af2272839c" include( + "compat/distributionsad.jl" + ) end end # module diff --git a/src/bijectors/corr.jl b/src/bijectors/corr.jl index 252ecc68..995708dd 100644 --- a/src/bijectors/corr.jl +++ b/src/bijectors/corr.jl @@ -67,8 +67,8 @@ with_logabsdet_jacobian(b::CorrBijector, x) = transform(b, x), logabsdetjac(b, x function transform(b::CorrBijector, x::AbstractMatrix{<:Real}) w = cholesky(x).U # keep LowerTriangular until here can avoid some computation - r = _link_chol_lkj(w) - return r + zero(x) + r = _link_chol_lkj(w) + return r + zero(x) # This dense format itself is required by a test, though I can't get the point. # https://github.com/TuringLang/Bijectors.jl/blob/b0aaa98f90958a167a0b86c8e8eca9b95502c42d/test/transform.jl#L67 end @@ -80,15 +80,17 @@ end function logabsdetjac(::Inverse{CorrBijector}, y::AbstractMatrix{<:Real}) K = LinearAlgebra.checksquare(y) - + result = float(zero(eltype(y))) for j in 2:K, i in 1:(j - 1) @inbounds abs_y_i_j = abs(y[i, j]) - result += (K - i + 1) * ( - IrrationalConstants.logtwo - (abs_y_i_j + LogExpFunctions.log1pexp(-2 * abs_y_i_j)) - ) + result += + (K - i + 1) * ( + IrrationalConstants.logtwo - + (abs_y_i_j + LogExpFunctions.log1pexp(-2 * abs_y_i_j)) + ) end - + return result end function logabsdetjac(b::CorrBijector, X::AbstractMatrix{<:Real}) @@ -98,27 +100,27 @@ function logabsdetjac(b::CorrBijector, X::AbstractMatrix{<:Real}) `logabsdetjac(::Inverse{CorrBijector}, y::AbstractMatrix{<:Real})` if possible. =# - return -logabsdetjac(inverse(b), (b(X))) + return -logabsdetjac(inverse(b), (b(X))) end function _inv_link_chol_lkj(y) K = LinearAlgebra.checksquare(y) w = similar(y) - + @inbounds for j in 1:K w[1, j] = 1 for i in 2:j - z = tanh(y[i-1, j]) - tmp = w[i-1, j] - w[i-1, j] = z * tmp + z = tanh(y[i - 1, j]) + tmp = w[i - 1, j] + w[i - 1, j] = z * tmp w[i, j] = tmp * sqrt(1 - z^2) end - for i in (j+1):K + for i in (j + 1):K w[i, j] = 0 end end - + return w end @@ -163,7 +165,7 @@ function _link_chol_lkj(w) # This block can't be integrated with loop below, because w[1,1] != 0. @inbounds z[1, 1] = 0 - @inbounds for j=2:K + @inbounds for j in 2:K z[1, j] = atanh(w[1, j]) tmp = sqrt(1 - w[1, j]^2) for i in 2:(j - 1) @@ -173,6 +175,6 @@ function _link_chol_lkj(w) end z[j, j] = 0 end - + return z end diff --git a/src/bijectors/coupling.jl b/src/bijectors/coupling.jl index 9aaaf829..b907583d 100644 --- a/src/bijectors/coupling.jl +++ b/src/bijectors/coupling.jl @@ -48,13 +48,15 @@ PartitionMask{Float32,SparseArrays.SparseMatrixCSC{Float32,Int64}}( [3, 1] = 1.0) ``` """ -struct PartitionMask{T, A} +struct PartitionMask{T,A} A_1::A A_2::A A_3::A # Only make it possible to construct using matrices - PartitionMask(A_1::A, A_2::A, A_3::A) where {T<:Real, A <: AbstractMatrix{T}} = new{T, A}(A_1, A_2, A_3) + function PartitionMask(A_1::A, A_2::A, A_3::A) where {T<:Real,A<:AbstractMatrix{T}} + return new{T,A}(A_1, A_2, A_3) + end end PartitionMask(args...) = PartitionMask{Bool}(args...) @@ -63,7 +65,7 @@ function PartitionMask{T}( n::Int, indices_1::AbstractVector{Int}, indices_2::AbstractVector{Int}, - indices_3::AbstractVector{Int} + indices_3::AbstractVector{Int}, ) where {T<:Real} A_1 = sparse(indices_1, 1:length(indices_1), one(T), n, length(indices_1)) A_2 = sparse(indices_2, 1:length(indices_2), one(T), n, length(indices_2)) @@ -72,25 +74,29 @@ function PartitionMask{T}( return PartitionMask(A_1, A_2, A_3) end -PartitionMask{T}( - n::Int, - indices_1::AbstractVector{Int}, - indices_2::AbstractVector{Int}; -) where {T} = PartitionMask{T}(n, indices_1, indices_2, nothing) +function PartitionMask{T}( + n::Int, indices_1::AbstractVector{Int}, indices_2::AbstractVector{Int}; +) where {T} + return PartitionMask{T}(n, indices_1, indices_2, nothing) +end -PartitionMask{T}( +function PartitionMask{T}( n::Int, indices_1::AbstractVector{Int}, indices_2::AbstractVector{Int}, indices_3::Nothing, -) where {T} = PartitionMask{T}(n, indices_1, indices_2, setdiff(1:n, indices_1, indices_2)) +) where {T} + return PartitionMask{T}(n, indices_1, indices_2, setdiff(1:n, indices_1, indices_2)) +end -PartitionMask{T}( +function PartitionMask{T}( n::Int, indices_1::AbstractVector{Int}, indices_2::Nothing, indices_3::AbstractVector{Int}, -) where {T} = PartitionMask{T}(n, indices_1, setdiff(1:n, indices_1, indices_3), indices_3) +) where {T} + return PartitionMask{T}(n, indices_1, setdiff(1:n, indices_1, indices_3), indices_3) +end """ PartitionMask(n::Int, indices) @@ -123,8 +129,9 @@ Combines `x_1`, `x_2`, and `x_3` into a single vector. Partitions `x` into 3 disjoint subvectors. """ -@inline partition(m::PartitionMask, x) = (transpose(m.A_1) * x, transpose(m.A_2) * x, transpose(m.A_3) * x) - +@inline function partition(m::PartitionMask, x) + return (transpose(m.A_1) * x, transpose(m.A_2) * x, transpose(m.A_3) * x) +end # Coupling @@ -168,7 +175,7 @@ julia> with_logabsdet_jacobian(cl, x) # References [1] Kobyzev, I., Prince, S., & Brubaker, M. A., Normalizing flows: introduction and ideas, CoRR, (), (2019). """ -struct Coupling{F, M} <: Bijector where {F, M <: PartitionMask} +struct Coupling{F,M} <: Bijector where {F,M<:PartitionMask} θ::F mask::M end @@ -233,7 +240,7 @@ end function transform(icl::Inverse{<:Coupling}, y::AbstractVector) cl = icl.orig - + y_1, y_2, y_3 = partition(cl.mask, y) b = cl.θ(y_2) diff --git a/src/bijectors/exp_log.jl b/src/bijectors/exp_log.jl index 7236a74e..8af505b3 100644 --- a/src/bijectors/exp_log.jl +++ b/src/bijectors/exp_log.jl @@ -1,4 +1,6 @@ -transform!(b::Union{Elementwise{typeof(log)}, Elementwise{typeof(exp)}}, x, y) = broadcast!(b.x, y, x) +function transform!(b::Union{Elementwise{typeof(log)},Elementwise{typeof(exp)}}, x, y) + return broadcast!(b.x, y, x) +end logabsdetjac(b::typeof(exp), x::Real) = x logabsdetjac(b::Elementwise{typeof(exp)}, x) = sum(x) diff --git a/src/bijectors/logit.jl b/src/bijectors/logit.jl index 1df73514..946228bf 100644 --- a/src/bijectors/logit.jl +++ b/src/bijectors/logit.jl @@ -25,4 +25,6 @@ logit_logabsdetjac(x, a, b) = -log((x - a) * (b - x) / (b - a)) logabsdetjac(b::Logit, x) = sum(logit_logabsdetjac.(x, b.a, b.b)) # `with_logabsdet_jacobian` -with_logabsdet_jacobian(b::Logit, x) = _logit.(x, b.a, b.b), sum(logit_logabsdetjac.(x, b.a, b.b)) +function with_logabsdet_jacobian(b::Logit, x) + return _logit.(x, b.a, b.b), sum(logit_logabsdetjac.(x, b.a, b.b)) +end diff --git a/src/bijectors/named_bijector.jl b/src/bijectors/named_bijector.jl index d147afc6..85e53e9b 100644 --- a/src/bijectors/named_bijector.jl +++ b/src/bijectors/named_bijector.jl @@ -24,16 +24,16 @@ julia> (a = 2 * x.a, b = exp(x.b), c = x.c) (a = 2.0, b = 1.0, c = 42.0) ``` """ -struct NamedTransform{names, Bs<:NamedTuple{names}} <: AbstractNamedTransform +struct NamedTransform{names,Bs<:NamedTuple{names}} <: AbstractNamedTransform bs::Bs end # fields contain nested numerical parameters -function Functors.functor(::Type{<:NamedTransform{names}}, x) where names +function Functors.functor(::Type{<:NamedTransform{names}}, x) where {names} function reconstruct_namedbijector(xs) return NamedTransform{names,typeof(xs.bs)}(xs.bs) end - return (bs = x.bs,), reconstruct_namedbijector + return (bs=x.bs,), reconstruct_namedbijector end # TODO: Use recursion instead of `@generated`? @@ -43,9 +43,8 @@ inverse(t::NamedTransform) = NamedTransform(map(inverse, t.bs)) isinvertible(t::NamedTransform) = all(isinvertible, t.bs) @generated function transform( - b::NamedTransform{names1}, - x::NamedTuple{names2} -) where {names1, names2} + b::NamedTransform{names1}, x::NamedTuple{names2} +) where {names1,names2} exprs = [] for n in names2 if n in names1 @@ -56,7 +55,7 @@ isinvertible(t::NamedTransform) = all(isinvertible, t.bs) push!(exprs, :($n = x.$n)) end end - return :($(exprs...), ) + return :($(exprs...),) end @generated function logabsdetjac(b::NamedTransform{names}, x::NamedTuple) where {names} @@ -65,18 +64,20 @@ end end @generated function with_logabsdet_jacobian( - b::NamedTransform{names1}, - x::NamedTuple{names2} -) where {names1, names2} + b::NamedTransform{names1}, x::NamedTuple{names2} +) where {names1,names2} body_exprs = [] logjac_expr = Expr(:call, :+) - val_expr = Expr(:tuple, ) + val_expr = Expr(:tuple) for n in names2 if n in names1 val_sym = Symbol("y_$n") logjac_sym = Symbol("logjac_$n") - push!(body_exprs, :(($val_sym, $logjac_sym) = with_logabsdet_jacobian(b.bs.$n, x.$n))) + push!( + body_exprs, + :(($val_sym, $logjac_sym) = with_logabsdet_jacobian(b.bs.$n, x.$n)), + ) push!(logjac_expr.args, logjac_sym) push!(val_expr.args, :($n = $val_sym)) else @@ -115,13 +116,13 @@ julia> (a = x.a, b = (x.a + x.c) * x.b, c = x.c) (a = 1.0, b = 8.0, c = 3.0) ``` """ -struct NamedCoupling{target, deps, F} <: AbstractNamedBijector where {F, target} +struct NamedCoupling{target,deps,F} <: AbstractNamedBijector where {F,target} f::F end -NamedCoupling(target, deps, f::F) where {F} = NamedCoupling{target, deps, F}(f) -function NamedCoupling(::Val{target}, ::Val{deps}, f::F) where {target, deps, F} - return NamedCoupling{target, deps, F}(f) +NamedCoupling(target, deps, f::F) where {F} = NamedCoupling{target,deps,F}(f) +function NamedCoupling(::Val{target}, ::Val{deps}, f::F) where {target,deps,F} + return NamedCoupling{target,deps,F}(f) end isinvertible(::NamedCoupling) = true @@ -130,20 +131,24 @@ coupling(b::NamedCoupling) = b.f # For some reason trying to use the parameteric types doesn't always work # so we have to do this weird approach of extracting type and then index `parameters`. target(b::NamedCoupling{Target}) where {Target} = Target -deps(b::NamedCoupling{<:Any, Deps}) where {Deps} = Deps +deps(b::NamedCoupling{<:Any,Deps}) where {Deps} = Deps -@generated function with_logabsdet_jacobian(nc::NamedCoupling{target, deps, F}, x::NamedTuple) where {target, deps, F} +@generated function with_logabsdet_jacobian( + nc::NamedCoupling{target,deps,F}, x::NamedTuple +) where {target,deps,F} return quote b = nc.f($([:(x.$d) for d in deps]...)) x_target, logjac = with_logabsdet_jacobian(b, x.$target) - return merge(x, ($target = x_target, )), logjac + return merge(x, ($target=x_target,)), logjac end end -@generated function with_logabsdet_jacobian(ni::Inverse{<:NamedCoupling{target, deps, F}}, x::NamedTuple) where {target, deps, F} +@generated function with_logabsdet_jacobian( + ni::Inverse{<:NamedCoupling{target,deps,F}}, x::NamedTuple +) where {target,deps,F} return quote ib = inverse(ni.orig.f($([:(x.$d) for d in deps]...))) x_target, logjac = with_logabsdet_jacobian(ib, x.$target) - return merge(x, ($target = x_target, )), logjac + return merge(x, ($target=x_target,)), logjac end end diff --git a/src/bijectors/normalise.jl b/src/bijectors/normalise.jl index 6b131d27..5ff91f6d 100644 --- a/src/bijectors/normalise.jl +++ b/src/bijectors/normalise.jl @@ -7,32 +7,30 @@ using Statistics: mean istraining() = false mutable struct InvertibleBatchNorm{T1,T2,T3} <: Bijector - b :: T1 # bias - logs :: T1 # log-scale - m :: T2 # moving mean - v :: T2 # moving variance - eps :: T3 - mtm :: T3 # momentum + b::T1 # bias + logs::T1 # log-scale + m::T2 # moving mean + v::T2 # moving variance + eps::T3 + mtm::T3 # momentum end function Base.:(==)(b1::InvertibleBatchNorm, b2::InvertibleBatchNorm) - return b1.b == b2.b && - b1.logs == b2.logs && - b1.m == b2.m && - b1.v == b2.v && - b1.eps == b2.eps && - b1.mtm == b2.mtm + return b1.b == b2.b && + b1.logs == b2.logs && + b1.m == b2.m && + b1.v == b2.v && + b1.eps == b2.eps && + b1.mtm == b2.mtm end function InvertibleBatchNorm( - chs::Int; - eps::T=1f-5, - mtm::T=1f-1, + chs::Int; eps::T=1.0f-5, mtm::T=1.0f-1 ) where {T<:AbstractFloat} return InvertibleBatchNorm( zeros(T, chs), zeros(T, chs), # logs = 0 means s = 1 zeros(T, chs), - ones(T, chs), + ones(T, chs), eps, mtm, ) @@ -42,8 +40,9 @@ Functors.@functor InvertibleBatchNorm (b, logs) function with_logabsdet_jacobian(bn::InvertibleBatchNorm, x) dims = ndims(x) - size(x, dims - 1) == length(bn.b) || - error("InvertibleBatchNorm expected $(length(bn.b)) channels, got $(size(x, dims - 1))") + size(x, dims - 1) == length(bn.b) || error( + "InvertibleBatchNorm expected $(length(bn.b)) channels, got $(size(x, dims - 1))", + ) channels = size(x, dims - 1) as = ntuple(i -> i == ndims(x) - 1 ? size(x, i) : 1, dims) logs = reshape(bn.logs, as...) @@ -51,9 +50,9 @@ function with_logabsdet_jacobian(bn::InvertibleBatchNorm, x) b = reshape(bn.b, as...) if istraining() n = div(prod(size(x)), channels) - axes = [1:dims-2; dims] # axes to reduce along (all but channels axis) - m = mean(x, dims = axes) - v = sum((x .- m) .^ 2, dims = axes) ./ n + axes = [1:(dims - 2); dims] # axes to reduce along (all but channels axis) + m = mean(x; dims=axes) + v = sum((x .- m) .^ 2; dims=axes) ./ n # Update moving mean and variance mtm = bn.mtm T = eltype(bn.m) @@ -65,9 +64,7 @@ function with_logabsdet_jacobian(bn::InvertibleBatchNorm, x) end result = s .* (x .- m) ./ sqrt.(v .+ bn.eps) .+ b - logabsdetjac = ( - fill(sum(logs - log.(v .+ bn.eps) / 2), size(x, dims)) - ) + logabsdetjac = (fill(sum(logs - log.(v .+ bn.eps) / 2), size(x, dims))) return (result, logabsdetjac) end @@ -91,5 +88,5 @@ end transform(bn::Inverse{<:InvertibleBatchNorm}, y) = first(with_logabsdet_jacobian(bn, y)) function Base.show(io::IO, l::InvertibleBatchNorm) - print(io, "InvertibleBatchNorm($(join(size(l.b), ", ")))") + return print(io, "InvertibleBatchNorm($(join(size(l.b), ", ")))") end diff --git a/src/bijectors/ordered.jl b/src/bijectors/ordered.jl index 9bc172b2..d52aa34d 100644 --- a/src/bijectors/ordered.jl +++ b/src/bijectors/ordered.jl @@ -18,7 +18,11 @@ This transformation is currently only supported for otherwise unconstrained dist """ function ordered(d::ContinuousMultivariateDistribution) if bijector(d) !== identity - throw(ArgumentError("ordered transform is currently only supported for unconstrained distributions.")) + throw( + ArgumentError( + "ordered transform is currently only supported for unconstrained distributions.", + ), + ) end return transformed(d, OrderedBijector()) end @@ -32,7 +36,7 @@ function _transform_ordered(y::AbstractVector) @assert !isempty(y) @inbounds x[1] = y[1] - @inbounds for i = 2:length(x) + @inbounds for i in 2:length(x) x[i] = x[i - 1] + exp(y[i]) end @@ -43,7 +47,7 @@ function _transform_ordered(y::AbstractMatrix) x = similar(y) @assert !isempty(y) - @inbounds for j = 1:size(x, 2), i = 1:size(x, 1) + @inbounds for j in 1:size(x, 2), i in 1:size(x, 1) if i == 1 x[i, j] = y[i, j] else @@ -60,7 +64,7 @@ function _transform_inverse_ordered(x::AbstractVector) @assert !isempty(y) @inbounds y[1] = x[1] - @inbounds for i = 2:length(y) + @inbounds for i in 2:length(y) y[i] = log(x[i] - x[i - 1]) end @@ -71,7 +75,7 @@ function _transform_inverse_ordered(x::AbstractMatrix) y = similar(x) @assert !isempty(y) - @inbounds for j = 1:size(y, 2), i = 1:size(y, 1) + @inbounds for j in 1:size(y, 2), i in 1:size(y, 1) if i == 1 y[i, j] = x[i, j] else @@ -83,4 +87,4 @@ function _transform_inverse_ordered(x::AbstractMatrix) end logabsdetjac(b::OrderedBijector, x::AbstractVector) = sum(@view(x[2:end])) -logabsdetjac(b::OrderedBijector, x::AbstractMatrix) = vec(sum(@view(x[2:end, :]); dims = 1)) +logabsdetjac(b::OrderedBijector, x::AbstractMatrix) = vec(sum(@view(x[2:end, :]); dims=1)) diff --git a/src/bijectors/pd.jl b/src/bijectors/pd.jl index bed6ee9a..4a68bdd8 100644 --- a/src/bijectors/pd.jl +++ b/src/bijectors/pd.jl @@ -9,7 +9,7 @@ function replace_diag(f, X) end transform(b::PDBijector, X::AbstractMatrix{<:Real}) = pd_link(X) function pd_link(X) - Y = lower(parent(cholesky(X; check = true).L)) + Y = lower(parent(cholesky(X; check=true).L)) return replace_diag(log, Y) end lower(A::AbstractMatrix) = convert(typeof(A), LowerTriangular(A)) @@ -22,7 +22,7 @@ getpd(X) = LowerTriangular(X) * LowerTriangular(X)' function logabsdetjac(b::PDBijector, X::AbstractMatrix{<:Real}) T = eltype(X) - Xcf = cholesky(X, check = false) + Xcf = cholesky(X; check=false) if !issuccess(Xcf) Xcf = cholesky(X + max(eps(T), eps(T) * norm(X)) * I) end @@ -35,7 +35,7 @@ function logabsdetjac_pdbijector_chol(Xcf::Cholesky) UL = Xcf.UL d = size(UL, 1) z = sum(((d + 1):(-1):2) .* log.(diag(UL))) - return - (z + d * oftype(z, IrrationalConstants.logtwo)) + return -(z + d * oftype(z, IrrationalConstants.logtwo)) end # TODO: Implement explicitly. diff --git a/src/bijectors/permute.jl b/src/bijectors/permute.jl index a2b49aa7..ddfacaf8 100644 --- a/src/bijectors/permute.jl +++ b/src/bijectors/permute.jl @@ -99,7 +99,7 @@ function Permute(indices::AbstractVector{Int}) return Permute(A) end -function Permute(n::Int, indices::Pair{Int, Int}...) +function Permute(n::Int, indices::Pair{Int,Int}...) A = spdiagm(0 => ones(n)) dests = Set{Int}() @@ -111,7 +111,7 @@ function Permute(n::Int, indices::Pair{Int, Int}...) push!(dests, dst) push!(sources, src) - + A[dst, src] = 1.0 A[src, src] = 0.0 # <= remove `src => src` end @@ -122,7 +122,7 @@ function Permute(n::Int, indices::Pair{Int, Int}...) return Permute(A) end -function Permute(n::Int, indices::Pair{Vector{Int}, Vector{Int}}...) +function Permute(n::Int, indices::Pair{Vector{Int},Vector{Int}}...) A = spdiagm(0 => ones(n)) dests = Set{Int}() @@ -130,14 +130,14 @@ function Permute(n::Int, indices::Pair{Vector{Int}, Vector{Int}}...) for (srcs, dsts) in indices @argcheck length(srcs) == length(dsts) "$srcs => $dsts is not bijective" - + for (src, dst) in zip(srcs, dsts) @argcheck dst ∉ dests "$dst used more than once" @argcheck src ∉ sources "$src used more than once" push!(dests, dst) push!(sources, src) - + A[dst, src] = 1.0 A[src, src] = 0.0 # <= remove `src => src` end @@ -149,7 +149,6 @@ function Permute(n::Int, indices::Pair{Vector{Int}, Vector{Int}}...) return Permute(A) end - transform(b::Permute, x::AbstractVecOrMat) = b.A * x inverse(b::Permute) = Permute(transpose(b.A)) diff --git a/src/bijectors/planar_layer.jl b/src/bijectors/planar_layer.jl index be46de3a..4267dd61 100644 --- a/src/bijectors/planar_layer.jl +++ b/src/bijectors/planar_layer.jl @@ -10,7 +10,8 @@ # TODO: add docstring -struct PlanarLayer{T1<:AbstractVector{<:Real}, T2<:Union{Real, AbstractVector{<:Real}}} <: Bijector +struct PlanarLayer{T1<:AbstractVector{<:Real},T2<:Union{Real,AbstractVector{<:Real}}} <: + Bijector w::T1 u::T1 b::T2 @@ -29,7 +30,9 @@ end # all fields are numerical parameters Functors.@functor PlanarLayer -Base.show(io::IO, b::PlanarLayer) = print(io, "PlanarLayer(w = $(b.w), u = $(b.u), b = $(b.b))") +function Base.show(io::IO, b::PlanarLayer) + return print(io, "PlanarLayer(w = $(b.w), u = $(b.u), b = $(b.b))") +end """ get_u_hat(u::AbstractVector{<:Real}, w::AbstractVector{<:Real}) @@ -73,7 +76,7 @@ function _transform(flow::PlanarLayer, z::AbstractVecOrMat{<:Real}) û, wT_û = get_u_hat(flow.u, w) wT_z = aT_b(w, z) transformed = z .+ û .* tanh.(wT_z .+ b) - return (transformed = transformed, wT_û = wT_û, wT_z = wT_z) + return (transformed=transformed, wT_û=wT_û, wT_z=wT_z) end transform(b::PlanarLayer, z) = _transform(b, z).transformed @@ -103,7 +106,7 @@ function with_logabsdet_jacobian(flow::PlanarLayer, z::AbstractVecOrMat{<:Real}) b = first(flow.b) log_det_jacobian = log1p.(wT_û .* abs2.(sech.(_vec(wT_z) .+ b))) - return (result = transformed, logabsdetjac = log_det_jacobian) + return (result=transformed, logabsdetjac=log_det_jacobian) end function transform(ib::Inverse{<:PlanarLayer}, y::AbstractVecOrMat{<:Real}) diff --git a/src/bijectors/radial_layer.jl b/src/bijectors/radial_layer.jl index d4156f01..d2ed127f 100644 --- a/src/bijectors/radial_layer.jl +++ b/src/bijectors/radial_layer.jl @@ -8,7 +8,9 @@ # RadialLayer # ############### -mutable struct RadialLayer{T1<:Union{Real, AbstractVector{<:Real}}, T2<:AbstractVector{<:Real}} <: Bijector +mutable struct RadialLayer{ + T1<:Union{Real,AbstractVector{<:Real}},T2<:AbstractVector{<:Real} +} <: Bijector α_::T1 β::T1 z_0::T2 @@ -27,7 +29,9 @@ end # all fields are numerical parameters Functors.@functor RadialLayer -Base.show(io::IO, b::RadialLayer) = print(io, "RadialLayer(α_ = $(b.α_), β = $(b.β), z_0 = $(b.z_0))") +function Base.show(io::IO, b::RadialLayer) + return print(io, "RadialLayer(α_ = $(b.α_), β = $(b.β), z_0 = $(b.z_0))") +end h(α, r) = 1 ./ (α .+ r) # for radial flow from eq(14) #dh(α, r) = .- (1 ./ (α .+ r)) .^ 2 # for radial flow; derivative of h() @@ -42,10 +46,10 @@ function _radial_transform(α_, β, z_0, z) if z isa AbstractVector r = norm(z .- z_0) else - r = vec(sqrt.(sum(abs2, z .- z_0; dims = 1))) + r = vec(sqrt.(sum(abs2, z .- z_0; dims=1))) end transformed = z .+ β_hat ./ (α .+ r') .* (z .- z_0) # from eq(14) - return (transformed = transformed, α = α, β_hat = β_hat, r = r) + return (transformed=transformed, α=α, β_hat=β_hat, r=r) end transform(b::RadialLayer, z::AbstractVector{<:Real}) = vec(_transform(b, z).transformed) @@ -62,10 +66,9 @@ function with_logabsdet_jacobian(flow::RadialLayer, z::AbstractVecOrMat) T = typeof(vec(transformed)) end log_det_jacobian::T = @. ( - (d - 1) * log(1 + β_hat * h_) - + log(1 + β_hat * h_ + β_hat * (- h_ ^ 2) * r) + (d - 1) * log(1 + β_hat * h_) + log(1 + β_hat * h_ + β_hat * (-h_^2) * r) ) # from eq(14) - return (result = transformed, logabsdetjac = log_det_jacobian) + return (result=transformed, logabsdetjac=log_det_jacobian) end function transform(ib::Inverse{<:RadialLayer}, y::AbstractVector{<:Real}) @@ -125,4 +128,6 @@ function compute_r(y_minus_z0::AbstractVector{<:Real}, α, α_plus_β_hat) return r end -logabsdetjac(flow::RadialLayer, x::AbstractVecOrMat) = last(with_logabsdet_jacobian(flow, x)) +function logabsdetjac(flow::RadialLayer, x::AbstractVecOrMat) + return last(with_logabsdet_jacobian(flow, x)) +end diff --git a/src/bijectors/rational_quadratic_spline.jl b/src/bijectors/rational_quadratic_spline.jl index 6c0dd601..03e10a8a 100644 --- a/src/bijectors/rational_quadratic_spline.jl +++ b/src/bijectors/rational_quadratic_spline.jl @@ -78,21 +78,17 @@ struct RationalQuadraticSpline{T} <: Bijector derivatives::T # K derivatives, with endpoints being ones function RationalQuadraticSpline( - widths::T, - heights::T, - derivatives::T + widths::T, heights::T, derivatives::T ) where {T<:AbstractVector} # TODO: add a `NoArgCheck` type and argument so we can circumvent if we want @assert length(widths) == length(heights) == length(derivatives) @assert all(derivatives .> 0) "derivatives need to be positive" - + return new{T}(widths, heights, derivatives) end function RationalQuadraticSpline( - widths::T, - heights::T, - derivatives::T + widths::T, heights::T, derivatives::T ) where {T<:AbstractMatrix} @assert size(widths, 2) == size(heights, 2) == size(derivatives, 2) @assert all(derivatives .> 0) "derivatives need to be positive" @@ -101,32 +97,28 @@ struct RationalQuadraticSpline{T} <: Bijector end function RationalQuadraticSpline( - widths::A, - heights::A, - derivatives::A, - B::T2 -) where {T1, T2, A <: AbstractVector{T1}} + widths::A, heights::A, derivatives::A, B::T2 +) where {T1,T2,A<:AbstractVector{T1}} return RationalQuadraticSpline( (cumsum(vcat([zero(T1)], LogExpFunctions.softmax(widths))) .- 0.5) * 2 * B, (cumsum(vcat([zero(T1)], LogExpFunctions.softmax(heights))) .- 0.5) * 2 * B, - vcat([one(T1)], LogExpFunctions.log1pexp.(derivatives), [one(T1)]) + vcat([one(T1)], LogExpFunctions.log1pexp.(derivatives), [one(T1)]), ) end function RationalQuadraticSpline( - widths::A, - heights::A, - derivatives::A, - B::T2 -) where {T1, T2, A <: AbstractMatrix{T1}} - ws = hcat(zeros(T1, size(widths, 1)), LogExpFunctions.softmax(widths; dims = 2)) - hs = hcat(zeros(T1, size(widths, 1)), LogExpFunctions.softmax(heights; dims = 2)) - ds = hcat(ones(T1, size(widths, 1)), LogExpFunctions.log1pexp.(derivatives), ones(T1, size(widths, 1))) + widths::A, heights::A, derivatives::A, B::T2 +) where {T1,T2,A<:AbstractMatrix{T1}} + ws = hcat(zeros(T1, size(widths, 1)), LogExpFunctions.softmax(widths; dims=2)) + hs = hcat(zeros(T1, size(widths, 1)), LogExpFunctions.softmax(heights; dims=2)) + ds = hcat( + ones(T1, size(widths, 1)), + LogExpFunctions.log1pexp.(derivatives), + ones(T1, size(widths, 1)), + ) return RationalQuadraticSpline( - (2 * B) .* (cumsum(ws; dims = 2) .- 0.5), - (2 * B) .* (cumsum(hs; dims = 2) .- 0.5), - ds + (2 * B) .* (cumsum(ws; dims=2) .- 0.5), (2 * B) .* (cumsum(hs; dims=2) .- 0.5), ds ) end @@ -171,7 +163,6 @@ function rqs_univariate(widths, heights, derivatives, x::Real) return g end - # univariate function transform(b::RationalQuadraticSpline{<:AbstractVector}, x::Real) return rqs_univariate(b.widths, b.heights, b.derivatives, x) @@ -180,7 +171,10 @@ end # multivariate # TODO: Improve. function transform(b::RationalQuadraticSpline{<:AbstractMatrix}, x::AbstractVector) - return [rqs_univariate(b.widths[i, :], b.heights[i, :], b.derivatives[i, :], x[i]) for i = 1:length(x)] + return [ + rqs_univariate(b.widths[i, :], b.heights[i, :], b.derivatives[i, :], x[i]) for + i in 1:length(x) + ] end ########################## @@ -215,10 +209,10 @@ function rqs_univariate_inverse(widths, heights, derivatives, y::Real) # Eq. (26) a2 = Δy * d_k - (y - h_k) * ds # Eq. (27) - a3 = - s * (y - h_k) + a3 = -s * (y - h_k) # Eq. (24). There's a mistake in the paper; says `x` but should be `ξ` - numerator = - 2 * a3 + numerator = -2 * a3 denominator = (a2 + sqrt(a2^2 - 4 * a1 * a3)) ξ = numerator / denominator @@ -232,7 +226,10 @@ end # TODO: Improve. function transform(ib::Inverse{<:RationalQuadraticSpline}, y::AbstractVector) b = ib.orig - return [rqs_univariate_inverse(b.widths[i, :], b.heights[i, :], b.derivatives[i, :], y[i]) for i = 1:length(y)] + return [ + rqs_univariate_inverse(b.widths[i, :], b.heights[i, :], b.derivatives[i, :], y[i]) + for i in 1:length(y) + ] end ###################### @@ -241,7 +238,7 @@ end function rqs_logabsdetjac(widths, heights, derivatives, x::Real) T = promote_type(eltype(widths), eltype(heights), eltype(derivatives), eltype(y)) K = length(widths) - 1 - + # Find which bin `x` is in k = searchsortedfirst(widths, x) - 1 @@ -259,19 +256,15 @@ function rqs_logabsdetjac(widths, heights, derivatives, x::Real) s = Δy / w ξ = (x - widths[k]) / w - numerator = s^2 * (derivatives[k + 1] * ξ^2 - + 2 * s * ξ * (1 - ξ) - + derivatives[k] * (1 - ξ)^2) + numerator = + s^2 * (derivatives[k + 1] * ξ^2 + 2 * s * ξ * (1 - ξ) + derivatives[k] * (1 - ξ)^2) denominator = s + (derivatives[k + 1] + derivatives[k] - 2 * s) * ξ * (1 - ξ) return log(numerator) - 2 * log(denominator) end function rqs_logabsdetjac( - widths::AbstractVector, - heights::AbstractVector, - derivatives::AbstractVector, - x::Real + widths::AbstractVector, heights::AbstractVector, derivatives::AbstractVector, x::Real ) T = promote_type(eltype(widths), eltype(heights), eltype(derivatives), eltype(x)) @@ -310,8 +303,8 @@ end # TODO: Improve. function logabsdetjac(b::RationalQuadraticSpline{<:AbstractMatrix}, x::AbstractVector) return sum([ - rqs_logabsdetjac(b.widths[i, :], b.heights[i, :], b.derivatives[i, :], x[i]) - for i = 1:length(x) + rqs_logabsdetjac(b.widths[i, :], b.heights[i, :], b.derivatives[i, :], x[i]) for + i in 1:length(x) ]) end @@ -322,10 +315,7 @@ end # TODO: implement this for `x::AbstractVector` and similarily for 1-dimensional `b`, # and possibly inverses too? function rqs_forward( - widths::AbstractVector, - heights::AbstractVector, - derivatives::AbstractVector, - x::Real + widths::AbstractVector, heights::AbstractVector, derivatives::AbstractVector, x::Real ) T = promote_type(eltype(widths), eltype(heights), eltype(derivatives), eltype(x)) @@ -370,6 +360,8 @@ function with_logabsdet_jacobian(b::RationalQuadraticSpline{<:AbstractVector}, x return rqs_forward(b.widths, b.heights, b.derivatives, x) end -function with_logabsdet_jacobian(b::RationalQuadraticSpline{<:AbstractMatrix}, x::AbstractVector) +function with_logabsdet_jacobian( + b::RationalQuadraticSpline{<:AbstractMatrix}, x::AbstractVector +) return transform(b, x), logabsdetjac(b, x) end diff --git a/src/bijectors/scale.jl b/src/bijectors/scale.jl index bff549e5..1a277e21 100644 --- a/src/bijectors/scale.jl +++ b/src/bijectors/scale.jl @@ -18,7 +18,7 @@ transform(ib::Inverse{<:Scale{<:AbstractMatrix}}, y::AbstractVecOrMat) = ib.orig # We're going to implement custom adjoint for this logabsdetjac(b::Scale, x::Real) = _logabsdetjac_scale(b.a, x, Val(0)) -function logabsdetjac(b::Scale, x::AbstractArray{<:Real, N}) where {N} +function logabsdetjac(b::Scale, x::AbstractArray{<:Real,N}) where {N} return _logabsdetjac_scale(b.a, x, Val(N)) end diff --git a/src/bijectors/shift.jl b/src/bijectors/shift.jl index 908815a6..c02620ef 100644 --- a/src/bijectors/shift.jl +++ b/src/bijectors/shift.jl @@ -14,7 +14,7 @@ inverse(b::Shift) = Shift(-b.a) transform(b::Shift, x) = b.a .+ x # FIXME: implement custom adjoint to ensure we don't get tracking -function logabsdetjac(b::Shift, x::Union{Real, AbstractArray{<:Real}}) +function logabsdetjac(b::Shift, x::Union{Real,AbstractArray{<:Real}}) return _logabsdetjac_shift(b.a, x) end diff --git a/src/bijectors/simplex.jl b/src/bijectors/simplex.jl index 1fbc28d2..153d09c7 100644 --- a/src/bijectors/simplex.jl +++ b/src/bijectors/simplex.jl @@ -9,7 +9,9 @@ with_logabsdet_jacobian(b::SimplexBijector, x) = transform(b, x), logabsdetjac(b transform(b::SimplexBijector, x) = _simplex_bijector(x, b) transform!(b::SimplexBijector, y, x) = _simplex_bijector!(y, x, b) -_simplex_bijector(x::AbstractArray, b::SimplexBijector) = _simplex_bijector!(similar(x), x, b) +function _simplex_bijector(x::AbstractArray, b::SimplexBijector) + return _simplex_bijector!(similar(x), x, b) +end # Vector implementation. function _simplex_bijector!(y, x::AbstractVector, ::SimplexBijector{proj}) where {proj} @@ -24,7 +26,7 @@ function _simplex_bijector!(y, x::AbstractVector, ::SimplexBijector{proj}) where sum_tmp += x[k - 1] # z ∈ [ϵ, 1-ϵ] # x[k] = 0 && sum_tmp = 1 -> z ≈ 1 - z = (x[k] + ϵ)*(one(T) - 2ϵ)/((one(T) + ϵ) - sum_tmp) + z = (x[k] + ϵ) * (one(T) - 2ϵ) / ((one(T) + ϵ) - sum_tmp) y[k] = LogExpFunctions.logit(z) + log(T(K - k)) end @inbounds sum_tmp += x[K - 1] @@ -49,10 +51,10 @@ function _simplex_bijector!(Y, X::AbstractMatrix, ::SimplexBijector{proj}) where Y[1, n] = LogExpFunctions.logit(z) + log(T(K - 1)) for k in 2:(K - 1) sum_tmp += X[k - 1, n] - z = (X[k, n] + ϵ)*(one(T) - 2ϵ)/((one(T) + ϵ) - sum_tmp) + z = (X[k, n] + ϵ) * (one(T) - 2ϵ) / ((one(T) + ϵ) - sum_tmp) Y[k, n] = LogExpFunctions.logit(z) + log(T(K - k)) end - sum_tmp += X[K-1, n] + sum_tmp += X[K - 1, n] if proj Y[K, n] = zero(T) else @@ -64,11 +66,11 @@ function _simplex_bijector!(Y, X::AbstractMatrix, ::SimplexBijector{proj}) where end # Inverse. -transform(ib::Inverse{<:SimplexBijector}, y::AbstractArray) = _simplex_inv_bijector(y, ib.orig) +function transform(ib::Inverse{<:SimplexBijector}, y::AbstractArray) + return _simplex_inv_bijector(y, ib.orig) +end function transform!( - ib::Inverse{<:SimplexBijector}, - x::AbstractArray{T}, - y::AbstractArray{T}, + ib::Inverse{<:SimplexBijector}, x::AbstractArray{T}, y::AbstractArray{T} ) where {T} return _simplex_inv_bijector!(x, y, ib.orig) end @@ -83,9 +85,9 @@ function _simplex_inv_bijector!(x, y::AbstractVector, b::SimplexBijector{proj}) @inbounds z = LogExpFunctions.logistic(y[1] - log(T(K - 1))) @inbounds x[1] = _clamp((z - ϵ) / (one(T) - 2ϵ), 0, 1) sum_tmp = zero(T) - @inbounds @simd for k = 2:(K - 1) + @inbounds @simd for k in 2:(K - 1) z = LogExpFunctions.logistic(y[k] - log(T(K - k))) - sum_tmp += x[k-1] + sum_tmp += x[k - 1] x[k] = _clamp(((one(T) + ϵ) - sum_tmp) / (one(T) - 2ϵ) * z - ϵ, 0, 1) end @inbounds sum_tmp += x[K - 1] @@ -94,7 +96,7 @@ function _simplex_inv_bijector!(x, y::AbstractVector, b::SimplexBijector{proj}) else x[K] = _clamp(one(T) - sum_tmp - y[K], 0, 1) end - + return x end @@ -125,14 +127,14 @@ end function logabsdetjac(b::SimplexBijector, x::AbstractVector{T}) where {T} ϵ = _eps(T) lp = zero(T) - + K = length(x) sum_tmp = zero(eltype(x)) @inbounds z = x[1] lp += log(max(z, ϵ)) + log(max(one(T) - z, ϵ)) @inbounds @simd for k in 2:(K - 1) - sum_tmp += x[k-1] + sum_tmp += x[k - 1] z = x[k] / max(one(T) - sum_tmp, ϵ) lp += log(max(z, ϵ)) + log(max(one(T) - z, ϵ)) + log(max(one(T) - sum_tmp, ϵ)) end @@ -141,7 +143,7 @@ function logabsdetjac(b::SimplexBijector, x::AbstractVector{T}) where {T} end function simplex_logabsdetjac_gradient(x::AbstractVector) T = eltype(x) - ϵ = _eps(T) + ϵ = _eps(T) K = length(x) g = similar(x) g .= 0 @@ -151,9 +153,9 @@ function simplex_logabsdetjac_gradient(x::AbstractVector) c1 = z >= ϵ zc = one(T) - z c2 = zc >= ϵ - g[1] = ifelse(c1 & c2, -1/z + 1/zc, ifelse(c1, -1/z, 1/zc)) + g[1] = ifelse(c1 & c2, -1 / z + 1 / zc, ifelse(c1, -1 / z, 1 / zc)) @inbounds @simd for k in 2:(K - 1) - sum_tmp += x[k-1] + sum_tmp += x[k - 1] temp = 1 / (1 - sum_tmp) c0 = temp >= ϵ z = ifelse(c0, x[k] * temp, x[k] / ϵ) @@ -162,14 +164,14 @@ function simplex_logabsdetjac_gradient(x::AbstractVector) c1 = z >= ϵ zc = one(T) - z c2 = zc >= ϵ - dldz = ifelse(c1 & c2, 1/z - 1/zc, ifelse(c1, 1/z, -1/zc)) + dldz = ifelse(c1 & c2, 1 / z - 1 / zc, ifelse(c1, 1 / z, -1 / zc)) dldx = dldz * dzdx - g[k] -= dldx - for i in 1:k-1 - dzdxp = ifelse(c0, x[k] * dzdx^2, zero(T)) - dldxp = dldz * dzdxp - ifelse(c0, temp, zero(T)) - g[i] -= dldxp - end + g[k] -= dldx + for i in 1:(k - 1) + dzdxp = ifelse(c0, x[k] * dzdx^2, zero(T)) + dldxp = dldz * dzdxp - ifelse(c0, temp, zero(T)) + g[i] -= dldxp + end end return g end @@ -182,29 +184,29 @@ function simplex_logabsdetjac_gradient(x::AbstractMatrix) g .= 0 @inbounds @simd for col in 1:size(x, 2) sum_tmp = zero(eltype(x)) - z = x[1,col] + z = x[1, col] #lp += log(z + ϵ) + log((one(T) + ϵ) - z) c1 = z >= ϵ zc = one(T) - z c2 = zc >= ϵ - g[1,col] = ifelse(c1 & c2, -1/z + 1/zc, ifelse(c1, -1/z, 1/zc)) + g[1, col] = ifelse(c1 & c2, -1 / z + 1 / zc, ifelse(c1, -1 / z, 1 / zc)) for k in 2:(K - 1) - sum_tmp += x[k-1,col] + sum_tmp += x[k - 1, col] temp = 1 / (1 - sum_tmp) c0 = temp >= ϵ - z = ifelse(c0, x[k,col] * temp, x[k,col] / ϵ) + z = ifelse(c0, x[k, col] * temp, x[k, col] / ϵ) #lp += log(z + ϵ) + log((one(T) + ϵ) - z) + log(temp) dzdx = ifelse(c0, temp, one(T)) c1 = z >= ϵ zc = one(T) - z c2 = zc >= ϵ - dldz = ifelse(c1 & c2, 1/z - 1/zc, ifelse(c1, 1/z, -1/zc)) + dldz = ifelse(c1 & c2, 1 / z - 1 / zc, ifelse(c1, 1 / z, -1 / zc)) dldx = dldz * dzdx - g[k,col] -= dldx - for i in 1:k-1 - dzdxp = ifelse(c0, x[k,col] * dzdx^2, zero(T)) + g[k, col] -= dldx + for i in 1:(k - 1) + dzdxp = ifelse(c0, x[k, col] * dzdx^2, zero(T)) dldxp = dldz * dzdxp - ifelse(c0, temp, zero(T)) - g[i,col] -= dldxp + g[i, col] -= dldxp end end end @@ -212,9 +214,8 @@ function simplex_logabsdetjac_gradient(x::AbstractMatrix) end function simplex_link_jacobian( - x::AbstractVector{T}, - ::Val{proj}=Val(true), -) where {T<:Real, proj} + x::AbstractVector{T}, ::Val{proj}=Val(true) +) where {T<:Real,proj} K = length(x) @assert K > 1 "x needs to be of length greater than 1" dydxt = similar(x, length(x), length(x)) @@ -223,26 +224,28 @@ function simplex_link_jacobian( sum_tmp = zero(T) @inbounds z = x[1] * (one(T) - 2ϵ) + ϵ # z ∈ [ϵ, 1-ϵ] - @inbounds dydxt[1,1] = (1/z + 1/(1-z)) * (one(T) - 2ϵ) + @inbounds dydxt[1, 1] = (1 / z + 1 / (1 - z)) * (one(T) - 2ϵ) @inbounds @simd for k in 2:(K - 1) sum_tmp += x[k - 1] # z ∈ [ϵ, 1-ϵ] # x[k] = 0 && sum_tmp = 1 -> z ≈ 1 - z = (x[k] + ϵ)*(one(T) - 2ϵ)/((one(T) + ϵ) - sum_tmp) - dydxt[k,k] = (1/z + 1/(1-z)) * (one(T) - 2ϵ)/((one(T) + ϵ) - sum_tmp) - for i in 1:k-1 - dydxt[i,k] = (1/z + 1/(1-z)) * (x[k] + ϵ)*(one(T) - 2ϵ)/((one(T) + ϵ) - sum_tmp)^2 + z = (x[k] + ϵ) * (one(T) - 2ϵ) / ((one(T) + ϵ) - sum_tmp) + dydxt[k, k] = (1 / z + 1 / (1 - z)) * (one(T) - 2ϵ) / ((one(T) + ϵ) - sum_tmp) + for i in 1:(k - 1) + dydxt[i, k] = + (1 / z + 1 / (1 - z)) * (x[k] + ϵ) * (one(T) - 2ϵ) / + ((one(T) + ϵ) - sum_tmp)^2 end end @inbounds sum_tmp += x[K - 1] @inbounds if !proj @simd for i in 1:K - dydxt[i,K] = -1 + dydxt[i, K] = -1 end end return UpperTriangular(dydxt)' end -function jacobian(b::SimplexBijector{proj}, x::AbstractVector{T}) where {proj, T} +function jacobian(b::SimplexBijector{proj}, x::AbstractVector{T}) where {proj,T} return simplex_link_jacobian(x, Val(proj)) end @@ -313,9 +316,8 @@ end =# function simplex_invlink_jacobian( - y::AbstractVector{T}, - ::Val{proj}=Val(true), -) where {T<:Real, proj} + y::AbstractVector{T}, ::Val{proj}=Val(true) +) where {T<:Real,proj} K = length(y) @assert K > 1 "x needs to be of length greater than 1" dxdy = similar(y, length(y), length(y)) @@ -326,45 +328,45 @@ function simplex_invlink_jacobian( unclamped_x = (z - ϵ) / (one(T) - 2ϵ) clamped_x = _clamp(unclamped_x, 0, 1) @inbounds if unclamped_x == clamped_x - dxdy[1,1] = z * (1 - z) / (one(T) - 2ϵ) + dxdy[1, 1] = z * (1 - z) / (one(T) - 2ϵ) end sum_tmp = zero(T) - @inbounds for k = 2:(K - 1) + @inbounds for k in 2:(K - 1) z = LogExpFunctions.logistic(y[k] - log(T(K - k))) sum_tmp += clamped_x unclamped_x = ((one(T) + ϵ) - sum_tmp) / (one(T) - 2ϵ) * z - ϵ clamped_x = _clamp(unclamped_x, 0, 1) if unclamped_x == clamped_x - dxdy[k,k] = z * (1 - z) * ((one(T) + ϵ) - sum_tmp) / (one(T) - 2ϵ) - for i in 1:k-1 - for j in i:k-1 - dxdy[k,i] += -dxdy[j,i] * z / (one(T) - 2ϵ) + dxdy[k, k] = z * (1 - z) * ((one(T) + ϵ) - sum_tmp) / (one(T) - 2ϵ) + for i in 1:(k - 1) + for j in i:(k - 1) + dxdy[k, i] += -dxdy[j, i] * z / (one(T) - 2ϵ) end end end end @inbounds sum_tmp += clamped_x @inbounds if proj - unclamped_x = one(T) - sum_tmp + unclamped_x = one(T) - sum_tmp clamped_x = _clamp(unclamped_x, 0, 1) else - unclamped_x = one(T) - sum_tmp - y[K] + unclamped_x = one(T) - sum_tmp - y[K] clamped_x = _clamp(unclamped_x, 0, 1) if unclamped_x == clamped_x - dxdy[K,K] = -1 + dxdy[K, K] = -1 end end @inbounds if unclamped_x == clamped_x - for i in 1:K-1 - @simd for j in i:K-1 - dxdy[K,i] += -dxdy[j,i] + for i in 1:(K - 1) + @simd for j in i:(K - 1) + dxdy[K, i] += -dxdy[j, i] end end end return LowerTriangular(dxdy) end # jacobian -function jacobian(ib::Inverse{<:SimplexBijector{proj}}, y::AbstractVector{T}) where {proj, T} +function jacobian(ib::Inverse{<:SimplexBijector{proj}}, y::AbstractVector{T}) where {proj,T} return simplex_invlink_jacobian(y, Val(proj)) end diff --git a/src/bijectors/stacked.jl b/src/bijectors/stacked.jl index 9f0e73f4..4f0596cb 100644 --- a/src/bijectors/stacked.jl +++ b/src/bijectors/stacked.jl @@ -21,7 +21,7 @@ b = stack(b1, b2) b([0.0, 1.0]) == [b1(0.0), 1.0] # => true ``` """ -struct Stacked{Bs, Rs<:Union{Tuple,AbstractArray}} <: Transform +struct Stacked{Bs,Rs<:Union{Tuple,AbstractArray}} <: Transform bs::Bs ranges::Rs end @@ -48,27 +48,26 @@ isclosedform(b::Stacked) = all(isclosedform, b.bs) isinvertible(b::Stacked) = all(isinvertible, b.bs) - # For some reason `inverse.(sb.bs)` was unstable... This works though. inverse(sb::Stacked) = Stacked(map(inverse, sb.bs), sb.ranges) # map is not type stable for many stacked bijectors as a large tuple # hence the generated function -@generated function inverse(sb::Stacked{A}) where {A <: Tuple} +@generated function inverse(sb::Stacked{A}) where {A<:Tuple} exprs = [] - for i = 1:length(A.parameters) + for i in 1:length(A.parameters) push!(exprs, :(inverse(sb.bs[$i]))) end - :(Stacked(($(exprs...), ), sb.ranges)) + return :(Stacked(($(exprs...),), sb.ranges)) end -@generated function _transform(x, rs::NTuple{N, UnitRange{Int}}, bs...) where N +@generated function _transform(x, rs::NTuple{N,UnitRange{Int}}, bs...) where {N} exprs = [] - for i = 1:N + for i in 1:N push!(exprs, :(bs[$i](x[rs[$i]]))) end return :(vcat($(exprs...))) end -function _transform(x, rs::NTuple{1, UnitRange{Int}}, b) +function _transform(x, rs::NTuple{1,UnitRange{Int}}, b) @assert rs[1] == 1:length(x) return b(x) end @@ -89,10 +88,7 @@ function transform(sb::Stacked{<:AbstractArray}, x::AbstractVector{<:Real}) return y end -function logabsdetjac( - b::Stacked, - x::AbstractVector{<:Real} -) +function logabsdetjac(b::Stacked, x::AbstractVector{<:Real}) N = length(b.bs) init = sum(logabsdetjac(b.bs[1], x[b.ranges[1]])) @@ -106,8 +102,7 @@ function logabsdetjac( end function logabsdetjac( - b::Stacked{<:NTuple{N, Any}, <:NTuple{N, Any}}, - x::AbstractVector{<:Real} + b::Stacked{<:NTuple{N,Any},<:NTuple{N,Any}}, x::AbstractVector{<:Real} ) where {N} init = sum(logabsdetjac(b.bs[1], x[b.ranges[1]])) @@ -129,7 +124,9 @@ end # logjac += sum(_logjac) # return (vcat(y_1, y_2), logjac) # end -@generated function with_logabsdet_jacobian(b::Stacked{<:NTuple{N, Any}, <:NTuple{N, Any}}, x::AbstractVector) where {N} +@generated function with_logabsdet_jacobian( + b::Stacked{<:NTuple{N,Any},<:NTuple{N,Any}}, x::AbstractVector +) where {N} expr = Expr(:block) y_names = [] @@ -137,9 +134,12 @@ end # TODO: drop the `sum` when we have dimensionality push!(expr.args, :(logjac = sum(_logjac))) push!(y_names, :y_1) - for i = 2:N + for i in 2:N y_name = Symbol("y_$i") - push!(expr.args, :(($y_name, _logjac) = with_logabsdet_jacobian(b.bs[$i], x[b.ranges[$i]]))) + push!( + expr.args, + :(($y_name, _logjac) = with_logabsdet_jacobian(b.bs[$i], x[b.ranges[$i]])), + ) # TODO: drop the `sum` when we have dimensionality push!(expr.args, :(logjac += sum(_logjac))) diff --git a/src/bijectors/truncated.jl b/src/bijectors/truncated.jl index 52c09f4d..14ea0a2e 100644 --- a/src/bijectors/truncated.jl +++ b/src/bijectors/truncated.jl @@ -1,7 +1,7 @@ ####################################################### # Constrained to unconstrained distribution bijectors # ####################################################### -struct TruncatedBijector{T1, T2} <: Bijector +struct TruncatedBijector{T1,T2} <: Bijector lb::T1 ub::T2 end @@ -15,7 +15,7 @@ end function transform(b::TruncatedBijector, x) a, b = b.lb, b.ub return truncated_link.(_clamp.(x, a, b), a, b) -end +end function truncated_link(x::Real, a, b) lowerbounded, upperbounded = isfinite(a), isfinite(b) @@ -56,11 +56,11 @@ end function truncated_logabsdetjac(x, a, b) lowerbounded, upperbounded = isfinite(a), isfinite(b) if lowerbounded && upperbounded - return - log((x - a) * (b - x) / (b - a)) + return -log((x - a) * (b - x) / (b - a)) elseif lowerbounded - return - log(x - a) + return -log(x - a) elseif upperbounded - return - log(b - x) + return -log(b - x) else return zero(x) end diff --git a/src/chainrules.jl b/src/chainrules.jl index 45cacf82..26826e5a 100644 --- a/src/chainrules.jl +++ b/src/chainrules.jl @@ -1,10 +1,8 @@ # differentation rule for the iterative algorithm in the inverse of `PlanarLayer` ChainRulesCore.@scalar_rule( find_alpha(wt_y::Real, wt_u_hat::Real, b::Real), - @setup( - x = inv(1 + wt_u_hat * sech(Ω + b)^2), - ), - (x, - tanh(Ω + b) * x, x - 1), + @setup(x = inv(1 + wt_u_hat * sech(Ω + b)^2),), + (x, -tanh(Ω + b) * x, x - 1), ) function ChainRulesCore.rrule(::typeof(combine), m::PartitionMask, x_1, x_2, x_3) @@ -15,7 +13,9 @@ function ChainRulesCore.rrule(::typeof(combine), m::PartitionMask, x_1, x_2, x_3 function combine_pullback(ΔΩ) Δ = ChainRulesCore.unthunk(ΔΩ) dx_1, dx_2, dx_3 = partition(m, Δ) - return ChainRulesCore.NoTangent(), ChainRulesCore.NoTangent(), proj_x_1(dx_1), proj_x_2(dx_2), proj_x_3(dx_3) + return ChainRulesCore.NoTangent(), + ChainRulesCore.NoTangent(), proj_x_1(dx_1), proj_x_2(dx_2), + proj_x_3(dx_3) end return combine(m, x_1, x_2, x_3), combine_pullback @@ -81,7 +81,7 @@ function ChainRulesCore.rrule(::typeof(_transform_inverse_ordered), x::AbstractV project_x = ChainRulesCore.ProjectTo(x) r = similar(x) - @inbounds for i = 1:length(r) + @inbounds for i in 1:length(r) if i == 1 r[i] = 1 else @@ -95,7 +95,7 @@ function ChainRulesCore.rrule(::typeof(_transform_inverse_ordered), x::AbstractV @assert length(Δ_new) == length(Δ) n = length(Δ_new) - @inbounds for j = 1:n - 1 + @inbounds for j in 1:(n - 1) Δ_new[j] = (Δ[j] / r[j]) - (Δ[j + 1] / r[j + 1]) end @inbounds Δ_new[n] = Δ[n] / r[n] @@ -105,7 +105,7 @@ function ChainRulesCore.rrule(::typeof(_transform_inverse_ordered), x::AbstractV y = similar(x) @inbounds y[1] = x[1] - @inbounds for i = 2:length(x) + @inbounds for i in 2:length(x) y[i] = log(r[i]) end @@ -117,7 +117,7 @@ function ChainRulesCore.rrule(::typeof(_transform_inverse_ordered), x::AbstractM project_x = ChainRulesCore.ProjectTo(x) r = similar(x) - @inbounds for j = 1:size(x, 2), i = 1:size(x, 1) + @inbounds for j in 1:size(x, 2), i in 1:size(x, 1) if i == 1 r[i, j] = 1 else @@ -131,11 +131,11 @@ function ChainRulesCore.rrule(::typeof(_transform_inverse_ordered), x::AbstractM n = size(Δ, 1) @assert size(Δ) == size(Δ_new) - @inbounds for j = 1:size(Δ_new, 2), i = 1:n - 1 + @inbounds for j in 1:size(Δ_new, 2), i in 1:(n - 1) Δ_new[i, j] = (Δ[i, j] / r[i, j]) - (Δ[i + 1, j] / r[i + 1, j]) end - @inbounds for j = 1:size(Δ_new, 2) + @inbounds for j in 1:size(Δ_new, 2) Δ_new[n, j] = Δ[n, j] / r[n, j] end @@ -145,7 +145,7 @@ function ChainRulesCore.rrule(::typeof(_transform_inverse_ordered), x::AbstractM # Compute primal here so we can make use of the already # computed `r`. y = similar(x) - @inbounds for j = 1:size(x, 2), i = 1:size(x, 1) + @inbounds for j in 1:size(x, 2), i in 1:size(x, 1) if i == 1 y[i, j] = x[i, j] else @@ -157,4 +157,4 @@ function ChainRulesCore.rrule(::typeof(_transform_inverse_ordered), x::AbstractM end # Fixes Zygote's issues with `@debug` -ChainRulesCore.@non_differentiable _debug(::Any) \ No newline at end of file +ChainRulesCore.@non_differentiable _debug(::Any) diff --git a/src/compat/distributionsad.jl b/src/compat/distributionsad.jl index 99dc81d5..af5397f9 100644 --- a/src/compat/distributionsad.jl +++ b/src/compat/distributionsad.jl @@ -1,7 +1,15 @@ -using .DistributionsAD: TuringDirichlet, TuringWishart, TuringInverseWishart, - FillVectorOfUnivariate, FillMatrixOfUnivariate, - MatrixOfUnivariate, FillVectorOfMultivariate, VectorOfMultivariate, - TuringScalMvNormal, TuringDiagMvNormal, TuringDenseMvNormal +using .DistributionsAD: + TuringDirichlet, + TuringWishart, + TuringInverseWishart, + FillVectorOfUnivariate, + FillMatrixOfUnivariate, + MatrixOfUnivariate, + FillVectorOfMultivariate, + VectorOfMultivariate, + TuringScalMvNormal, + TuringDiagMvNormal, + TuringDenseMvNormal using Distributions: AbstractMvLogNormal # Bijectors @@ -20,49 +28,41 @@ bijector(d::MatrixOfUnivariate{Continuous}) = TruncatedBijector(_minmax(d.dists) bijector(d::VectorOfMultivariate{Discrete}) = identity for T in (:VectorOfMultivariate, :FillVectorOfMultivariate) @eval begin - bijector(d::$T{Continuous, <:MvNormal}) = identity - bijector(d::$T{Continuous, <:TuringScalMvNormal}) = identity - bijector(d::$T{Continuous, <:TuringDiagMvNormal}) = identity - bijector(d::$T{Continuous, <:TuringDenseMvNormal}) = identity - bijector(d::$T{Continuous, <:MvNormalCanon}) = identity - bijector(d::$T{Continuous, <:AbstractMvLogNormal}) = Log() - bijector(d::$T{Continuous, <:SimplexDistribution}) = SimplexBijector() - bijector(d::$T{Continuous, <:TuringDirichlet}) = SimplexBijector() + bijector(d::$T{Continuous,<:MvNormal}) = identity + bijector(d::$T{Continuous,<:TuringScalMvNormal}) = identity + bijector(d::$T{Continuous,<:TuringDiagMvNormal}) = identity + bijector(d::$T{Continuous,<:TuringDenseMvNormal}) = identity + bijector(d::$T{Continuous,<:MvNormalCanon}) = identity + bijector(d::$T{Continuous,<:AbstractMvLogNormal}) = Log() + bijector(d::$T{Continuous,<:SimplexDistribution}) = SimplexBijector() + bijector(d::$T{Continuous,<:TuringDirichlet}) = SimplexBijector() end end bijector(d::FillVectorOfMultivariate{Continuous}) = columnwise(bijector(d.dists.value)) -isdirichlet(::VectorOfMultivariate{Continuous, <:Dirichlet}) = true -isdirichlet(::VectorOfMultivariate{Continuous, <:TuringDirichlet}) = true +isdirichlet(::VectorOfMultivariate{Continuous,<:Dirichlet}) = true +isdirichlet(::VectorOfMultivariate{Continuous,<:TuringDirichlet}) = true isdirichlet(::TuringDirichlet) = true function link( - d::TuringDirichlet, - x::AbstractVecOrMat{<:Real}, - ::Val{proj}=Val(true), + d::TuringDirichlet, x::AbstractVecOrMat{<:Real}, ::Val{proj}=Val(true) ) where {proj} return SimplexBijector{proj}()(x) end function link_jacobian( - d::TuringDirichlet, - x::AbstractVector{<:Real}, - ::Val{proj}=Val(true), + d::TuringDirichlet, x::AbstractVector{<:Real}, ::Val{proj}=Val(true) ) where {proj} return jacobian(SimplexBijector{proj}(), x) end function invlink( - d::TuringDirichlet, - y::AbstractVecOrMat{<:Real}, - ::Val{proj}=Val(true), + d::TuringDirichlet, y::AbstractVecOrMat{<:Real}, ::Val{proj}=Val(true) ) where {proj} return inverse(SimplexBijector{proj}())(y) end function invlink_jacobian( - d::TuringDirichlet, - y::AbstractVector{<:Real}, - ::Val{proj}=Val(true), + d::TuringDirichlet, y::AbstractVector{<:Real}, ::Val{proj}=Val(true) ) where {proj} return jacobian(inverse(SimplexBijector{proj}()), y) end diff --git a/src/compat/forwarddiff.jl b/src/compat/forwarddiff.jl index a947cf18..1b51bb0b 100644 --- a/src/compat/forwarddiff.jl +++ b/src/compat/forwarddiff.jl @@ -1,7 +1,7 @@ import .ForwardDiff -_eps(::Type{<:ForwardDiff.Dual{<:Any, Real}}) = _eps(Real) -_eps(::Type{<:ForwardDiff.Dual{<:Any, <:Integer}}) = _eps(Real) +_eps(::Type{<:ForwardDiff.Dual{<:Any,Real}}) = _eps(Real) +_eps(::Type{<:ForwardDiff.Dual{<:Any,<:Integer}}) = _eps(Real) # Define forward-mode rule for ForwardDiff and don't trust support for ForwardDiff in Roots # https://github.com/JuliaMath/Roots.jl/issues/314 diff --git a/src/compat/reversediff.jl b/src/compat/reversediff.jl index c498205a..47c58054 100644 --- a/src/compat/reversediff.jl +++ b/src/compat/reversediff.jl @@ -1,17 +1,34 @@ module ReverseDiffCompat -using ..ReverseDiff: ReverseDiff, @grad, value, track, TrackedReal, TrackedVector, - TrackedMatrix +using ..ReverseDiff: + ReverseDiff, @grad, value, track, TrackedReal, TrackedVector, TrackedMatrix using Requires, LinearAlgebra -using ..Bijectors: Elementwise, SimplexBijector, maphcat, simplex_link_jacobian, - simplex_invlink_jacobian, simplex_logabsdetjac_gradient, Inverse -import ..Bijectors: _eps, logabsdetjac, _logabsdetjac_scale, _simplex_bijector, - _simplex_inv_bijector, replace_diag, jacobian, getpd, lower, - _inv_link_chol_lkj, _link_chol_lkj, _transform_ordered, _transform_inverse_ordered, +using ..Bijectors: + Elementwise, + SimplexBijector, + maphcat, + simplex_link_jacobian, + simplex_invlink_jacobian, + simplex_logabsdetjac_gradient, + Inverse +import ..Bijectors: + _eps, + logabsdetjac, + _logabsdetjac_scale, + _simplex_bijector, + _simplex_inv_bijector, + replace_diag, + jacobian, + getpd, + lower, + _inv_link_chol_lkj, + _link_chol_lkj, + _transform_ordered, + _transform_inverse_ordered, find_alpha -import ChainRulesCore +using ChainRulesCore: ChainRulesCore using Compat: eachcol using Distributions: LocationScale @@ -34,7 +51,9 @@ function Base.maximum(d::LocationScale{<:TrackedReal}) end end -logabsdetjac(b::Elementwise{typeof(log)}, x::Union{TrackedVector, TrackedMatrix}) = track(logabsdetjac, b, x) +function logabsdetjac(b::Elementwise{typeof(log)}, x::Union{TrackedVector,TrackedMatrix}) + return track(logabsdetjac, b, x) +end @grad function logabsdetjac(b::Elementwise{typeof(log)}, x::AbstractVector) return -sum(log, value(x)), Δ -> (nothing, -Δ ./ value(x)) end @@ -42,7 +61,8 @@ function _logabsdetjac_scale(a::TrackedReal, x::Real, ::Val{0}) return track(_logabsdetjac_scale, a, value(x), Val(0)) end @grad function _logabsdetjac_scale(a::Real, x::Real, v::Val{0}) - return _logabsdetjac_scale(value(a), value(x), Val(0)), Δ -> (inv(value(a)) .* Δ, nothing, nothing) + return _logabsdetjac_scale(value(a), value(x), Val(0)), + Δ -> (inv(value(a)) .* Δ, nothing, nothing) end # Need to treat `AbstractVector` and `AbstractMatrix` separately due to ambiguity errors function _logabsdetjac_scale(a::TrackedReal, x::AbstractVector, ::Val{0}) @@ -51,7 +71,8 @@ end @grad function _logabsdetjac_scale(a::Real, x::AbstractVector, v::Val{0}) da = value(a) J = fill(inv.(da), length(x)) - return _logabsdetjac_scale(da, value(x), Val(0)), Δ -> (transpose(J) * Δ, nothing, nothing) + return _logabsdetjac_scale(da, value(x), Val(0)), + Δ -> (transpose(J) * Δ, nothing, nothing) end function _logabsdetjac_scale(a::TrackedReal, x::AbstractMatrix, ::Val{0}) return track(_logabsdetjac_scale, a, value(x), Val(0)) @@ -59,7 +80,8 @@ end @grad function _logabsdetjac_scale(a::Real, x::AbstractMatrix, v::Val{0}) da = value(a) J = fill(size(x, 1) / da, size(x, 2)) - return _logabsdetjac_scale(da, value(x), Val(0)), Δ -> (transpose(J) * Δ, nothing, nothing) + return _logabsdetjac_scale(da, value(x), Val(0)), + Δ -> (transpose(J) * Δ, nothing, nothing) end # adjoints for 1-dim and 2-dim `Scale` using `AbstractVector` function _logabsdetjac_scale(a::TrackedVector, x::AbstractVector, ::Val{1}) @@ -82,7 +104,7 @@ end Jᵀ = repeat(inv.(da), 1, size(x, 2)) return _logabsdetjac_scale(da, value(x), Val(1)), Δ -> (Jᵀ * Δ, nothing, nothing) end -function _simplex_bijector(X::Union{TrackedVector, TrackedMatrix}, b::SimplexBijector) +function _simplex_bijector(X::Union{TrackedVector,TrackedMatrix}, b::SimplexBijector) return track(_simplex_bijector, X, b) end @grad function _simplex_bijector(Y::AbstractVector, b::SimplexBijector) @@ -90,7 +112,7 @@ end return _simplex_bijector(Yd, b), Δ -> (simplex_link_jacobian(Yd)' * Δ, nothing) end -function _simplex_inv_bijector(X::Union{TrackedVector, TrackedMatrix}, b::SimplexBijector) +function _simplex_inv_bijector(X::Union{TrackedVector,TrackedMatrix}, b::SimplexBijector) return track(_simplex_inv_bijector, X, b) end @grad function _simplex_inv_bijector(Y::AbstractVector, b::SimplexBijector) @@ -99,10 +121,12 @@ end end @grad function _simplex_inv_bijector(Y::AbstractMatrix, b::SimplexBijector) Yd = value(Y) - return _simplex_inv_bijector(Yd, b), Δ -> begin + return _simplex_inv_bijector(Yd, b), + Δ -> begin maphcat(eachcol(Yd), eachcol(Δ)) do c1, c2 simplex_invlink_jacobian(c1)' * c2 - end, nothing + end, + nothing end end @@ -112,7 +136,7 @@ replace_diag(::typeof(log), X::TrackedMatrix) = track(replace_diag, log, X) f(i, j) = i == j ? log(Xd[i, j]) : Xd[i, j] out = f.(1:size(Xd, 1), (1:size(Xd, 2))') out, ∇ -> begin - g(i, j) = i == j ? ∇[i, j]/Xd[i, j] : ∇[i, j] + g(i, j) = i == j ? ∇[i, j] / Xd[i, j] : ∇[i, j] return (nothing, g.(1:size(Xd, 1), (1:size(Xd, 2))')) end end @@ -123,12 +147,14 @@ replace_diag(::typeof(exp), X::TrackedMatrix) = track(replace_diag, exp, X) f(i, j) = ifelse(i == j, exp(Xd[i, j]), Xd[i, j]) out = f.(1:size(Xd, 1), (1:size(Xd, 2))') out, ∇ -> begin - g(i, j) = ifelse(i == j, ∇[i, j]*exp(Xd[i, j]), ∇[i, j]) + g(i, j) = ifelse(i == j, ∇[i, j] * exp(Xd[i, j]), ∇[i, j]) return (nothing, g.(1:size(Xd, 1), (1:size(Xd, 2))')) end end -logabsdetjac(b::SimplexBijector, x::Union{TrackedVector, TrackedMatrix}) = track(logabsdetjac, b, x) +function logabsdetjac(b::SimplexBijector, x::Union{TrackedVector,TrackedMatrix}) + return track(logabsdetjac, b, x) +end @grad function logabsdetjac(b::SimplexBijector, x::AbstractVector) xd = value(x) return logabsdetjac(b, xd), Δ -> begin @@ -139,7 +165,8 @@ end getpd(X::TrackedMatrix) = track(getpd, X) @grad function getpd(X::AbstractMatrix) Xd = value(X) - return LowerTriangular(Xd) * LowerTriangular(Xd)', Δ -> begin + return LowerTriangular(Xd) * LowerTriangular(Xd)', + Δ -> begin Xl = LowerTriangular(Xd) return (LowerTriangular(Δ' * Xl + Δ * Xl),) end @@ -157,7 +184,7 @@ end α = find_alpha(value(wt_y), value(wt_u_hat), value(b)) ∂wt_y = inv(1 + wt_u_hat * sech(α + b)^2) - ∂wt_u_hat = - tanh(α + b) * ∂wt_y + ∂wt_u_hat = -tanh(α + b) * ∂wt_y ∂b = ∂wt_y - 1 find_alpha_pullback(Δ::Real) = (Δ * ∂wt_y, Δ * ∂wt_u_hat, Δ * ∂b) @@ -165,7 +192,7 @@ end end # `OrderedBijector` -function _transform_ordered(y::Union{TrackedVector, TrackedMatrix}) +function _transform_ordered(y::Union{TrackedVector,TrackedMatrix}) return track(_transform_ordered, y) end @grad function _transform_ordered(y::AbstractVecOrMat) @@ -173,7 +200,7 @@ end return x, (wrap_chainrules_output ∘ Base.tail ∘ dx) end -function _transform_inverse_ordered(x::Union{TrackedVector, TrackedMatrix}) +function _transform_inverse_ordered(x::Union{TrackedVector,TrackedMatrix}) return track(_transform_inverse_ordered, x) end @grad function _transform_inverse_ordered(x::AbstractVecOrMat) diff --git a/src/compat/tracker.jl b/src/compat/tracker.jl index 1166a29e..efa45650 100644 --- a/src/compat/tracker.jl +++ b/src/compat/tracker.jl @@ -1,21 +1,22 @@ module TrackerCompat -using ..Tracker: Tracker, - TrackedReal, - TrackedVector, - TrackedMatrix, - TrackedArray, - TrackedVecOrMat, - @grad, - track, - data, - param +using ..Tracker: + Tracker, + TrackedReal, + TrackedVector, + TrackedMatrix, + TrackedArray, + TrackedVecOrMat, + @grad, + track, + data, + param import ..Bijectors using ..Bijectors: Elementwise, SimplexBijector, Inverse, Stacked -import ChainRulesCore -import LogExpFunctions +using ChainRulesCore: ChainRulesCore +using LogExpFunctions: LogExpFunctions using Compat: eachcol using LinearAlgebra @@ -23,11 +24,9 @@ using Distributions: LocationScale Bijectors.maporbroadcast(f, x::TrackedArray...) = f.(x...) function Bijectors.maporbroadcast( - f, - x1::TrackedArray{T, N}, - x::AbstractArray{<:TrackedReal}..., -) where {T, N} - return f.(convert(Array{TrackedReal{T}, N}, x1), x...) + f, x1::TrackedArray{T,N}, x::AbstractArray{<:TrackedReal}... +) where {T,N} + return f.(convert(Array{TrackedReal{T},N}, x1), x...) end Bijectors._eps(::Type{<:TrackedReal{T}}) where {T} = Bijectors._eps(T) @@ -56,16 +55,12 @@ function Bijectors._logabsdetjac_shift(a::TrackedReal, x::AbstractVector{<:Real} return tracker_shift_logabsdetjac(a, x, Val(0)) end function Bijectors._logabsdetjac_shift( - a::Union{TrackedReal, TrackedVector{<:Real}}, - x::AbstractVector{<:Real}, - ::Val{1} + a::Union{TrackedReal,TrackedVector{<:Real}}, x::AbstractVector{<:Real}, ::Val{1} ) return tracker_shift_logabsdetjac(a, x, Val(1)) end function Bijectors._logabsdetjac_shift( - a::Union{TrackedReal, TrackedVector{<:Real}}, - x::AbstractMatrix{<:Real}, - ::Val{1} + a::Union{TrackedReal,TrackedVector{<:Real}}, x::AbstractMatrix{<:Real}, ::Val{1} ) return tracker_shift_logabsdetjac(a, x, Val(1)) end @@ -88,7 +83,8 @@ function Bijectors._logabsdetjac_scale(a::TrackedReal, x::Real, ::Val{0}) return track(Bijectors._logabsdetjac_scale, a, data(x), Val(0)) end @grad function Bijectors._logabsdetjac_scale(a::Real, x::Real, ::Val{0}) - return Bijectors._logabsdetjac_scale(data(a), data(x), Val(0)), Δ -> (inv(data(a)) .* Δ, nothing, nothing) + return Bijectors._logabsdetjac_scale(data(a), data(x), Val(0)), + Δ -> (inv(data(a)) .* Δ, nothing, nothing) end # Need to treat `AbstractVector` and `AbstractMatrix` separately due to ambiguity errors function Bijectors._logabsdetjac_scale(a::TrackedReal, x::AbstractVector, ::Val{0}) @@ -97,7 +93,8 @@ end @grad function Bijectors._logabsdetjac_scale(a::Real, x::AbstractVector, ::Val{0}) da = data(a) J = fill(inv.(da), length(x)) - return Bijectors._logabsdetjac_scale(da, data(x), Val(0)), Δ -> (transpose(J) * Δ, nothing, nothing) + return Bijectors._logabsdetjac_scale(da, data(x), Val(0)), + Δ -> (transpose(J) * Δ, nothing, nothing) end function Bijectors._logabsdetjac_scale(a::TrackedReal, x::AbstractMatrix, ::Val{0}) return track(Bijectors._logabsdetjac_scale, a, data(x), Val(0)) @@ -105,7 +102,8 @@ end @grad function Bijectors._logabsdetjac_scale(a::Real, x::AbstractMatrix, ::Val{0}) da = data(a) J = fill(size(x, 1) / da, size(x, 2)) - return Bijectors._logabsdetjac_scale(da, data(x), Val(0)), Δ -> (transpose(J) * Δ, nothing, nothing) + return Bijectors._logabsdetjac_scale(da, data(x), Val(0)), + Δ -> (transpose(J) * Δ, nothing, nothing) end # adjoints for 1-dim and 2-dim `Scale` using `AbstractVector` function Bijectors._logabsdetjac_scale(a::TrackedVector, x::AbstractVector, ::Val{1}) @@ -118,7 +116,8 @@ end # = (1 / aᵢ) da = data(a) J = inv.(da) - return Bijectors._logabsdetjac_scale(da, data(x), Val(1)), Δ -> (J .* Δ, nothing, nothing) + return Bijectors._logabsdetjac_scale(da, data(x), Val(1)), + Δ -> (J .* Δ, nothing, nothing) end function Bijectors._logabsdetjac_scale(a::TrackedVector, x::AbstractMatrix, ::Val{1}) return track(Bijectors._logabsdetjac_scale, a, data(x), Val(1)) @@ -126,7 +125,8 @@ end @grad function Bijectors._logabsdetjac_scale(a::TrackedVector, x::AbstractMatrix, ::Val{1}) da = data(a) Jᵀ = repeat(inv.(da), 1, size(x, 2)) - return Bijectors._logabsdetjac_scale(da, data(x), Val(1)), Δ -> (Jᵀ * Δ, nothing, nothing) + return Bijectors._logabsdetjac_scale(da, data(x), Val(1)), + Δ -> (Jᵀ * Δ, nothing, nothing) end # TODO: implement analytical gradient for scaling a vector using a matrix # function _logabsdetjac_scale(a::TrackedMatrix, x::AbstractVector, ::Val{1}) @@ -145,39 +145,48 @@ function Bijectors._simplex_inv_bijector(Y::TrackedVecOrMat, b::SimplexBijector) end @grad function Bijectors._simplex_bijector(X::AbstractVector, b::SimplexBijector) Xd = data(X) - return Bijectors._simplex_bijector(Xd, b), Δ -> (Bijectors.simplex_link_jacobian(Xd)' * Δ, nothing) + return Bijectors._simplex_bijector(Xd, b), + Δ -> (Bijectors.simplex_link_jacobian(Xd)' * Δ, nothing) end @grad function Bijectors._simplex_inv_bijector(Y::AbstractVector, b::SimplexBijector) Yd = data(Y) - return Bijectors._simplex_inv_bijector(Yd, b), Δ -> (Bijectors.simplex_invlink_jacobian(Yd)' * Δ, nothing) + return Bijectors._simplex_inv_bijector(Yd, b), + Δ -> (Bijectors.simplex_invlink_jacobian(Yd)' * Δ, nothing) end -Bijectors.replace_diag(::typeof(log), X::TrackedMatrix) = track(Bijectors.replace_diag, log, X) +function Bijectors.replace_diag(::typeof(log), X::TrackedMatrix) + return track(Bijectors.replace_diag, log, X) +end @grad function Bijectors.replace_diag(::typeof(log), X) Xd = data(X) f(i, j) = i == j ? log(Xd[i, j]) : Xd[i, j] out = f.(1:size(Xd, 1), (1:size(Xd, 2))') out, ∇ -> begin - g(i, j) = i == j ? ∇[i, j]/Xd[i, j] : ∇[i, j] + g(i, j) = i == j ? ∇[i, j] / Xd[i, j] : ∇[i, j] return (nothing, g.(1:size(Xd, 1), (1:size(Xd, 2))')) end end -Bijectors.replace_diag(::typeof(exp), X::TrackedMatrix) = track(Bijectors.replace_diag, exp, X) +function Bijectors.replace_diag(::typeof(exp), X::TrackedMatrix) + return track(Bijectors.replace_diag, exp, X) +end @grad function Bijectors.replace_diag(::typeof(exp), X) Xd = data(X) f(i, j) = ifelse(i == j, exp(Xd[i, j]), Xd[i, j]) out = f.(1:size(Xd, 1), (1:size(Xd, 2))') out, ∇ -> begin - g(i, j) = ifelse(i == j, ∇[i, j]*exp(Xd[i, j]), ∇[i, j]) + g(i, j) = ifelse(i == j, ∇[i, j] * exp(Xd[i, j]), ∇[i, j]) return (nothing, g.(1:size(Xd, 1), (1:size(Xd, 2))')) end end -Bijectors.logabsdetjac(b::SimplexBijector, x::TrackedVecOrMat) = track(Bijectors.logabsdetjac, b, x) +function Bijectors.logabsdetjac(b::SimplexBijector, x::TrackedVecOrMat) + return track(Bijectors.logabsdetjac, b, x) +end @grad function Bijectors.logabsdetjac(b::SimplexBijector, x::AbstractVector) xd = data(x) - return Bijectors.logabsdetjac(b, xd), Δ -> begin + return Bijectors.logabsdetjac(b, xd), + Δ -> begin (nothing, Bijectors.simplex_logabsdetjac_gradient(xd) * Δ) end end @@ -212,7 +221,7 @@ for header in [ Tr = promote_type(eltype(z), eltype(z_0)) r::Tr = norm((z .- z_0)::TV) transformed::T = z .+ β_hat ./ (α .+ r') .* (z .- z_0) # from eq(14) - return (transformed = transformed, α = α, β_hat = β_hat, r = r) + return (transformed=transformed, α=α, β_hat=β_hat, r=r) end end end @@ -247,7 +256,7 @@ for header in [ end r::TV = eachcolnorm(z .- z_0) transformed::T = z .+ β_hat ./ (α .+ r') .* (z .- z_0) # from eq(14) - return (transformed = transformed, α = α, β_hat = β_hat, r = r) + return (transformed=transformed, α=α, β_hat=β_hat, r=r) end end end @@ -256,7 +265,7 @@ eachcolnorm(X::TrackedMatrix) = track(eachcolnorm, X) @grad function eachcolnorm(X) Xd = data(X) y = map(norm, eachcol(Xd)) - y, Δ -> begin + return y, Δ -> begin (Xd .* (Δ ./ y)',) end end @@ -284,7 +293,8 @@ end Bijectors.getpd(X::TrackedMatrix) = track(Bijectors.getpd, X) @grad function Bijectors.getpd(X::AbstractMatrix) Xd = data(X) - return Bijectors.LowerTriangular(Xd) * Bijectors.LowerTriangular(Xd)', Δ -> begin + return Bijectors.LowerTriangular(Xd) * Bijectors.LowerTriangular(Xd)', + Δ -> begin Xl = Bijectors.LowerTriangular(Xd) return (Bijectors.LowerTriangular(Δ' * Xl + Δ * Xl),) end @@ -306,20 +316,20 @@ Bijectors._inv_link_chol_lkj(y::TrackedMatrix) = track(Bijectors._inv_link_chol_ z_mat = similar(y) # cache for adjoint tmp_mat = similar(y) - + @inbounds for j in 1:K w[1, j] = 1 for i in 2:j - z = tanh(y[i-1, j]) - tmp = w[i-1, j] + z = tanh(y[i - 1, j]) + tmp = w[i - 1, j] z_mat[i, j] = z tmp_mat[i, j] = tmp - w[i-1, j] = z * tmp + w[i - 1, j] = z * tmp w[i, j] = tmp * sqrt(1 - z^2) end - for i in (j+1):K + for i in (j + 1):K w[i, j] = 0 end end @@ -330,14 +340,16 @@ Bijectors._inv_link_chol_lkj(y::TrackedMatrix) = track(Bijectors._inv_link_chol_ Δy = zero(y) @inbounds for j in 1:K - Δtmp = Δw[j,j] + Δtmp = Δw[j, j] for i in j:-1:2 - Δz = Δw[i-1, j] * tmp_mat[i, j] - Δtmp * tmp_mat[i, j] / sqrt(1 - z_mat[i, j]^2) * z_mat[i, j] - Δy[i-1, j] = Δz / cosh(y[i-1, j])^2 - Δtmp = Δw[i-1, j] * z_mat[i, j] + Δtmp * sqrt(1 - z_mat[i, j]^2) + Δz = + Δw[i - 1, j] * tmp_mat[i, j] - + Δtmp * tmp_mat[i, j] / sqrt(1 - z_mat[i, j]^2) * z_mat[i, j] + Δy[i - 1, j] = Δz / cosh(y[i - 1, j])^2 + Δtmp = Δw[i - 1, j] * z_mat[i, j] + Δtmp * sqrt(1 - z_mat[i, j]^2) end end - + return (Δy,) end @@ -349,14 +361,14 @@ Bijectors._link_chol_lkj(w::TrackedMatrix) = track(Bijectors._link_chol_lkj, w) w = data(w_tracked) K = LinearAlgebra.checksquare(w) - + z = similar(w) @inbounds z[1, 1] = 0 tmp_mat = similar(w) # cache for pullback. - @inbounds for j=2:K + @inbounds for j in 2:K z[1, j] = atanh(w[1, j]) tmp = sqrt(1 - w[1, j]^2) tmp_mat[1, j] = tmp @@ -374,22 +386,22 @@ Bijectors._link_chol_lkj(w::TrackedMatrix) = track(Bijectors._link_chol_lkj, w) Δw = similar(w) - @inbounds Δw[1,1] = zero(eltype(Δz)) + @inbounds Δw[1, 1] = zero(eltype(Δz)) - @inbounds for j=2:K + @inbounds for j in 2:K Δw[j, j] = 0 Δtmp = zero(eltype(Δz)) # Δtmp_mat[j-1,j] - for i in (j-1):-1:2 - p = w[i, j] / tmp_mat[i-1, j] + for i in (j - 1):-1:2 + p = w[i, j] / tmp_mat[i - 1, j] ftmp = sqrt(1 - p^2) d_ftmp_p = -p / ftmp - d_p_tmp = -w[i,j] / tmp_mat[i-1, j]^2 + d_p_tmp = -w[i, j] / tmp_mat[i - 1, j]^2 - Δp = Δz[i,j] / (1-p^2) + Δtmp * tmp_mat[i-1, j] * d_ftmp_p - Δw[i, j] = Δp / tmp_mat[i-1, j] + Δp = Δz[i, j] / (1 - p^2) + Δtmp * tmp_mat[i - 1, j] * d_ftmp_p + Δw[i, j] = Δp / tmp_mat[i - 1, j] Δtmp = Δp * d_p_tmp + Δtmp * ftmp # update to "previous" Δtmp end - Δw[1, j] = Δz[1, j] / (1-w[1,j]^2) - Δtmp / sqrt(1 - w[1,j]^2) * w[1,j] + Δw[1, j] = Δz[1, j] / (1 - w[1, j]^2) - Δtmp / sqrt(1 - w[1, j]^2) * w[1, j] end return (Δw,) @@ -401,11 +413,13 @@ end function Bijectors.find_alpha(wt_y::T, wt_u_hat::T, b::T) where {T<:TrackedReal} return track(Bijectors.find_alpha, wt_y, wt_u_hat, b) end -@grad function Bijectors.find_alpha(wt_y::TrackedReal, wt_u_hat::TrackedReal, b::TrackedReal) +@grad function Bijectors.find_alpha( + wt_y::TrackedReal, wt_u_hat::TrackedReal, b::TrackedReal +) α = Bijectors.find_alpha(data(wt_y), data(wt_u_hat), data(b)) ∂wt_y = inv(1 + wt_u_hat * sech(α + b)^2) - ∂wt_u_hat = - tanh(α + b) * ∂wt_y + ∂wt_u_hat = -tanh(α + b) * ∂wt_y ∂b = ∂wt_y - 1 find_alpha_pullback(Δ::Real) = (Δ * ∂wt_y, Δ * ∂wt_u_hat, Δ * ∂b) @@ -413,7 +427,9 @@ end end # `OrderedBijector` -Bijectors._transform_ordered(y::Union{TrackedVector,TrackedMatrix}) = track(Bijectors._transform_ordered, y) +function Bijectors._transform_ordered(y::Union{TrackedVector,TrackedMatrix}) + return track(Bijectors._transform_ordered, y) +end @grad function Bijectors._transform_ordered(y::AbstractVecOrMat) x, dx = ChainRulesCore.rrule(Bijectors._transform_ordered, data(y)) return x, (wrap_chainrules_output ∘ Base.tail ∘ dx) diff --git a/src/compat/zygote.jl b/src/compat/zygote.jl index eedf4b3d..8533b58f 100644 --- a/src/compat/zygote.jl +++ b/src/compat/zygote.jl @@ -11,14 +11,14 @@ end @adjoint function eachcolmaphcat(f, x1, x2) function g(f, x1, x2) init = reshape(f(view(x1, :, 1), x2[1]), :, 1) - return reduce(hcat, [f(view(x1, :, i), x2[i]) for i in 2:size(x1, 2)]; init = init) + return reduce(hcat, [f(view(x1, :, i), x2[i]) for i in 2:size(x1, 2)]; init=init) end return pullback(g, f, x1, x2) end @adjoint function eachcolmaphcat(f, x) function g(f, x) init = reshape(f(view(x, :, 1)), :, 1) - return reduce(hcat, [f(view(x, :, i)) for i in 2:size(x, 2)]; init = init) + return reduce(hcat, [f(view(x, :, i)) for i in 2:size(x, 2)]; init=init) end return pullback(g, f, x) end @@ -59,7 +59,7 @@ end @adjoint function replace_diag(::typeof(log), X) f(i, j) = i == j ? log(X[i, j]) : X[i, j] out = f.(1:size(X, 1), (1:size(X, 2))') - out, ∇ -> begin + return out, ∇ -> begin g(i, j) = i == j ? ∇[i, j] / X[i, j] : ∇[i, j] (nothing, g.(1:size(X, 1), (1:size(X, 2))')) end @@ -67,28 +67,20 @@ end @adjoint function replace_diag(::typeof(exp), X) f(i, j) = ifelse(i == j, exp(X[i, j]), X[i, j]) out = f.(1:size(X, 1), (1:size(X, 2))') - out, ∇ -> begin + return out, ∇ -> begin g(i, j) = ifelse(i == j, ∇[i, j] * exp(X[i, j]), ∇[i, j]) (nothing, g.(1:size(X, 1), (1:size(X, 2))')) end end -@adjoint function pd_logpdf_with_trans( - d, - X::AbstractMatrix{<:Real}, - transform::Bool, -) +@adjoint function pd_logpdf_with_trans(d, X::AbstractMatrix{<:Real}, transform::Bool) return pullback(pd_logpdf_with_trans_zygote, d, X, transform) end -function pd_logpdf_with_trans_zygote( - d, - X::AbstractMatrix{<:Real}, - transform::Bool, -) +function pd_logpdf_with_trans_zygote(d, X::AbstractMatrix{<:Real}, transform::Bool) T = eltype(X) - Xcf = cholesky(X, check = false) + Xcf = cholesky(X; check=false) if !issuccess(Xcf) - Xcf = cholesky(X + max(eps(T), eps(T) * norm(X)) * I, check = true) + Xcf = cholesky(X + max(eps(T), eps(T) * norm(X)) * I; check=true) end lp = getlogp(d, Xcf, X) if transform && isfinite(lp) @@ -114,17 +106,21 @@ end end @adjoint function _simplex_bijector(X::AbstractMatrix, b::SimplexBijector) - return _simplex_bijector(X, b), Δ -> begin + return _simplex_bijector(X, b), + Δ -> begin maphcat(eachcol(X), eachcol(Δ)) do c1, c2 simplex_link_jacobian(c1)' * c2 - end, nothing + end, + nothing end end @adjoint function _simplex_inv_bijector(Y::AbstractMatrix, b::SimplexBijector) - return _simplex_inv_bijector(Y, b), Δ -> begin + return _simplex_inv_bijector(Y, b), + Δ -> begin maphcat(eachcol(Y), eachcol(Δ)) do c1, c2 simplex_invlink_jacobian(c1)' * c2 - end, nothing + end, + nothing end end @@ -162,14 +158,15 @@ end return lower(A), Δ -> (lower(Δ),) end @adjoint function getpd(X::AbstractMatrix) - return LowerTriangular(X) * LowerTriangular(X)', Δ -> begin + return LowerTriangular(X) * LowerTriangular(X)', + Δ -> begin Xl = LowerTriangular(X) return (LowerTriangular(Δ' * Xl + Δ * Xl),) end end @adjoint function pd_link(X::AbstractMatrix{<:Real}) return pullback(X) do X - Y = cholesky(X; check = true).L + Y = cholesky(X; check=true).L return replace_diag(log, Y) end end @@ -181,20 +178,20 @@ end z_mat = similar(y) # cache for adjoint tmp_mat = similar(y) - + @inbounds for j in 1:K w[1, j] = 1 for i in 2:j - z = tanh(y[i-1, j]) - tmp = w[i-1, j] + z = tanh(y[i - 1, j]) + tmp = w[i - 1, j] z_mat[i, j] = z tmp_mat[i, j] = tmp - w[i-1, j] = z * tmp + w[i - 1, j] = z * tmp w[i, j] = tmp * sqrt(1 - z^2) end - for i in (j+1):K + for i in (j + 1):K w[i, j] = 0 end end @@ -205,14 +202,16 @@ end Δy = zero(y) @inbounds for j in 1:K - Δtmp = Δw[j,j] + Δtmp = Δw[j, j] for i in j:-1:2 - Δz = Δw[i-1, j] * tmp_mat[i, j] - Δtmp * tmp_mat[i, j] / sqrt(1 - z_mat[i, j]^2) * z_mat[i, j] - Δy[i-1, j] = Δz / cosh(y[i-1, j])^2 - Δtmp = Δw[i-1, j] * z_mat[i, j] + Δtmp * sqrt(1 - z_mat[i, j]^2) + Δz = + Δw[i - 1, j] * tmp_mat[i, j] - + Δtmp * tmp_mat[i, j] / sqrt(1 - z_mat[i, j]^2) * z_mat[i, j] + Δy[i - 1, j] = Δz / cosh(y[i - 1, j])^2 + Δtmp = Δw[i - 1, j] * z_mat[i, j] + Δtmp * sqrt(1 - z_mat[i, j]^2) end end - + return (Δy,) end @@ -221,14 +220,14 @@ end @adjoint function _link_chol_lkj(w) K = LinearAlgebra.checksquare(w) - + z = similar(w) @inbounds z[1, 1] = 0 tmp_mat = similar(w) # cache for pullback. - @inbounds for j=2:K + @inbounds for j in 2:K z[1, j] = atanh(w[1, j]) tmp = sqrt(1 - w[1, j]^2) tmp_mat[1, j] = tmp @@ -246,27 +245,26 @@ end Δw = similar(w) - @inbounds Δw[1,1] = zero(eltype(Δz)) + @inbounds Δw[1, 1] = zero(eltype(Δz)) - @inbounds for j=2:K + @inbounds for j in 2:K Δw[j, j] = 0 Δtmp = zero(eltype(Δz)) # Δtmp_mat[j-1,j] - for i in (j-1):-1:2 - p = w[i, j] / tmp_mat[i-1, j] + for i in (j - 1):-1:2 + p = w[i, j] / tmp_mat[i - 1, j] ftmp = sqrt(1 - p^2) d_ftmp_p = -p / ftmp - d_p_tmp = -w[i,j] / tmp_mat[i-1, j]^2 + d_p_tmp = -w[i, j] / tmp_mat[i - 1, j]^2 - Δp = Δz[i,j] / (1-p^2) + Δtmp * tmp_mat[i-1, j] * d_ftmp_p - Δw[i, j] = Δp / tmp_mat[i-1, j] + Δp = Δz[i, j] / (1 - p^2) + Δtmp * tmp_mat[i - 1, j] * d_ftmp_p + Δw[i, j] = Δp / tmp_mat[i - 1, j] Δtmp = Δp * d_p_tmp + Δtmp * ftmp # update to "previous" Δtmp end - Δw[1, j] = Δz[1, j] / (1-w[1,j]^2) - Δtmp / sqrt(1 - w[1,j]^2) * w[1,j] + Δw[1, j] = Δz[1, j] / (1 - w[1, j]^2) - Δtmp / sqrt(1 - w[1, j]^2) * w[1, j] end return (Δw,) end return z, pullback_link_chol_lkj - end diff --git a/src/interface.jl b/src/interface.jl index 2efcac98..91c9a961 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -3,7 +3,7 @@ import Base: ∘ import Random: AbstractRNG import Distributions: logpdf, rand, rand!, _rand!, _logpdf -const Elementwise{F} = Base.Fix1{<:Union{typeof(map),typeof(broadcast)}, F} +const Elementwise{F} = Base.Fix1{<:Union{typeof(map),typeof(broadcast)},F} """ elementwise(f) @@ -16,7 +16,9 @@ In the case where `f::ComposedFunction`, the result is elementwise(f) = Base.Fix1(broadcast, f) # TODO: This is makes dispatching quite a bit easier, but uncertain if this is really # the way to go. -elementwise(f::ComposedFunction) = ComposedFunction(elementwise(f.outer), elementwise(f.inner)) +function elementwise(f::ComposedFunction) + return ComposedFunction(elementwise(f.outer), elementwise(f.inner)) +end const Columnwise{F} = Base.Fix1{typeof(eachcolmaphcat),F} """ @@ -74,7 +76,9 @@ transform(f::F, x) where {F<:Function} = f(x) function transform(t::Transform, x) res = with_logabsdet_jacobian(t, x) if res isa ChangesOfVariables.NoLogAbsDetJacobian - error("`transform` not implemented for $(typeof(b)); implement `transform` and/or `with_logabsdet_jacobian`.") + error( + "`transform` not implemented for $(typeof(b)); implement `transform` and/or `with_logabsdet_jacobian`.", + ) end return first(res) @@ -98,7 +102,9 @@ Return `log(abs(det(J(b, x))))`, where `J(b, x)` is the jacobian of `b` at `x`. function logabsdetjac(b, x) res = with_logabsdet_jacobian(b, x) if res isa ChangesOfVariables.NoLogAbsDetJacobian - error("`logabsdetjac` not implemented for $(typeof(b)); implement `logabsdetjac` and/or `with_logabsdet_jacobian`.") + error( + "`logabsdetjac` not implemented for $(typeof(b)); implement `logabsdetjac` and/or `with_logabsdet_jacobian`.", + ) end return last(res) diff --git a/src/transformed_distribution.jl b/src/transformed_distribution.jl index ffa30237..5f0a0d8d 100644 --- a/src/transformed_distribution.jl +++ b/src/transformed_distribution.jl @@ -1,11 +1,18 @@ # Transformed distributions -struct TransformedDistribution{D, B, V} <: Distribution{V, Continuous} where {D<:Distribution{V, Continuous}, B} +struct TransformedDistribution{D,B,V} <: + Distribution{V,Continuous} where {D<:Distribution{V,Continuous},B} dist::D transform::B - TransformedDistribution(d::UnivariateDistribution, b) = new{typeof(d), typeof(b), Univariate}(d, b) - TransformedDistribution(d::MultivariateDistribution, b) = new{typeof(d), typeof(b), Multivariate}(d, b) - TransformedDistribution(d::MatrixDistribution, b) = new{typeof(d), typeof(b), Matrixvariate}(d, b) + function TransformedDistribution(d::UnivariateDistribution, b) + return new{typeof(d),typeof(b),Univariate}(d, b) + end + function TransformedDistribution(d::MultivariateDistribution, b) + return new{typeof(d),typeof(b),Multivariate}(d, b) + end + function TransformedDistribution(d::MatrixDistribution, b) + return new{typeof(d),typeof(b),Matrixvariate}(d, b) + end end # fields may contain nested numerical parameters @@ -17,7 +24,6 @@ const MvTransformed = MultivariateTransformed const MatrixTransformed = TransformedDistribution{<:Distribution,<:Any,Matrixvariate} const Transformed = TransformedDistribution - """ transformed(d::Distribution) transformed(d::Distribution, b::Bijector) @@ -64,14 +70,14 @@ bijector(d::KSOneSided) = Logit(zero(eltype(d)), one(eltype(d))) bijector_bounded(d, a=minimum(d), b=maximum(d)) = Logit(a, b) bijector_lowerbounded(d, a=minimum(d)) = elementwise(log) ∘ Shift(-a) -bijector_upperbounded(d, b=maximum(d)) = elementwise(log) ∘ Shift(b) ∘ Scale(- one(typeof(b))) +function bijector_upperbounded(d, b=maximum(d)) + return elementwise(log) ∘ Shift(b) ∘ Scale(-one(typeof(b))) +end -const BoundedDistribution = Union{ - Arcsine, Biweight, Cosine, Epanechnikov, Beta, NoncentralBeta -} +const BoundedDistribution = Union{Arcsine,Biweight,Cosine,Epanechnikov,Beta,NoncentralBeta} bijector(d::BoundedDistribution) = bijector_bounded(d) -const LowerboundedDistribution = Union{Pareto, Levy} +const LowerboundedDistribution = Union{Pareto,Levy} bijector(d::LowerboundedDistribution) = bijector_lowerbounded(d) bijector(d::PDMatDistribution) = PDBijector() @@ -111,7 +117,7 @@ function logpdf(td::MvTransformed{<:Dirichlet}, y::AbstractMatrix{<:Real}) ϵ = _eps(T) x, logjac = with_logabsdet_jacobian(inverse(td.transform), y) - return logpdf(td.dist, mappedarray(x->x+ϵ, x)) + logjac + return logpdf(td.dist, mappedarray(x -> x + ϵ, x)) + logjac end function _logpdf(td::MvTransformed, y::AbstractVector{<:Real}) @@ -124,7 +130,7 @@ function _logpdf(td::MvTransformed{<:Dirichlet}, y::AbstractVector{<:Real}) ϵ = _eps(T) x, logjac = with_logabsdet_jacobian(inverse(td.transform), y) - return logpdf(td.dist, mappedarray(x->x+ϵ, x)) + logjac + return logpdf(td.dist, mappedarray(x -> x + ϵ, x)) + logjac end # TODO: should eventually drop using `logpdf_with_trans` and replace with @@ -145,20 +151,23 @@ rand(rng::AbstractRNG, td::MvTransformed) = td.transform(rand(rng, td.dist)) # TODO: implement more efficiently for flows function rand(rng::AbstractRNG, td::MvTransformed, num_samples::Int) samples = rand(rng, td.dist, num_samples) - res = reduce(hcat, map(axes(samples, 2)) do i - return td.transform(view(samples, :, i)) - end) + res = reduce( + hcat, + map(axes(samples, 2)) do i + return td.transform(view(samples, :, i)) + end, + ) return res end function _rand!(rng::AbstractRNG, td::MvTransformed, x::AbstractVector{<:Real}) rand!(rng, td.dist, x) - x .= td.transform(x) + return x .= td.transform(x) end function _rand!(rng::AbstractRNG, td::MatrixTransformed, x::DenseMatrix{<:Real}) rand!(rng, td.dist, x) - x .= td.transform(x) + return x .= td.transform(x) end # utility stuff @@ -173,4 +182,3 @@ function Base.minimum(td::UnivariateTransformed) min, max = td.transform.((Base.minimum(td.dist), Base.maximum(td.dist))) return max < min ? max : min end - diff --git a/test/ad/chainrules.jl b/test/ad/chainrules.jl index b0e4dc2e..ebb461f0 100644 --- a/test/ad/chainrules.jl +++ b/test/ad/chainrules.jl @@ -5,7 +5,13 @@ test_frule(Bijectors.find_alpha, x, y, z) test_rrule(Bijectors.find_alpha, x, y, z) - test_rrule(Bijectors.combine, Bijectors.PartitionMask(3, [1], [2]) ⊢ ChainRulesTestUtils.NoTangent(), [1.0], [2.0], [3.0]) + test_rrule( + Bijectors.combine, + Bijectors.PartitionMask(3, [1], [2]) ⊢ ChainRulesTestUtils.NoTangent(), + [1.0], + [2.0], + [3.0], + ) # ordered bijector b = Bijectors.OrderedBijector() diff --git a/test/ad/utils.jl b/test/ad/utils.jl index 6bf8365f..dc7d8234 100644 --- a/test/ad/utils.jl +++ b/test/ad/utils.jl @@ -1,43 +1,45 @@ # Figure out which AD backend to test const AD = get(ENV, "AD", "All") -function test_ad(f, x, broken = (); rtol = 1e-6, atol = 1e-6) +function test_ad(f, x, broken=(); rtol=1e-6, atol=1e-6) finitediff = FiniteDifferences.grad(central_fdm(5, 1), f, x)[1] if AD == "All" || AD == "Tracker" if :Tracker in broken - @test_broken Tracker.data(Tracker.gradient(f, x)[1]) ≈ finitediff rtol=rtol atol=atol + @test_broken Tracker.data(Tracker.gradient(f, x)[1]) ≈ finitediff rtol = rtol atol = + atol else ∇tracker = Tracker.gradient(f, x)[1] - @test Tracker.data(∇tracker) ≈ finitediff rtol=rtol atol=atol + @test Tracker.data(∇tracker) ≈ finitediff rtol = rtol atol = atol @test Tracker.istracked(∇tracker) end end if AD == "All" || AD == "ForwardDiff" if :ForwardDiff in broken - @test_broken ForwardDiff.gradient(f, x) ≈ finitediff rtol=rtol atol=atol + @test_broken ForwardDiff.gradient(f, x) ≈ finitediff rtol = rtol atol = atol else - @test ForwardDiff.gradient(f, x) ≈ finitediff rtol=rtol atol=atol + @test ForwardDiff.gradient(f, x) ≈ finitediff rtol = rtol atol = atol end end if AD == "All" || AD == "Zygote" if :Zygote in broken - @test_broken Zygote.gradient(f, x)[1] ≈ finitediff rtol=rtol atol=atol + @test_broken Zygote.gradient(f, x)[1] ≈ finitediff rtol = rtol atol = atol else ∇zygote = Zygote.gradient(f, x)[1] - @test (all(finitediff .== 0) && ∇zygote === nothing) || isapprox(∇zygote, finitediff, rtol=rtol, atol=atol) + @test (all(finitediff .== 0) && ∇zygote === nothing) || + isapprox(∇zygote, finitediff; rtol=rtol, atol=atol) end end if AD == "All" || AD == "ReverseDiff" if :ReverseDiff in broken - @test_broken ReverseDiff.gradient(f, x) ≈ finitediff rtol=rtol atol=atol + @test_broken ReverseDiff.gradient(f, x) ≈ finitediff rtol = rtol atol = atol else - @test ReverseDiff.gradient(f, x) ≈ finitediff rtol=rtol atol=atol + @test ReverseDiff.gradient(f, x) ≈ finitediff rtol = rtol atol = atol end end - return + return nothing end diff --git a/test/bijectors/coupling.jl b/test/bijectors/coupling.jl index 5d6c6607..849fa6d1 100644 --- a/test/bijectors/coupling.jl +++ b/test/bijectors/coupling.jl @@ -1,12 +1,4 @@ -using Bijectors: - Coupling, - PartitionMask, - coupling, - couple, - partition, - combine, - Shift, - Scale +using Bijectors: Coupling, PartitionMask, coupling, couple, partition, combine, Shift, Scale @testset "Coupling" begin @testset "PartitionMask" begin @@ -15,9 +7,9 @@ using Bijectors: @test (m1.A_1 == m2.A_1) & (m1.A_2 == m2.A_2) & (m1.A_3 == m2.A_3) - x = [1., 2., 3.] + x = [1.0, 2.0, 3.0] x1, x2, x3 = partition(m1, x) - @test (x1 == [1.]) & (x2 == [2.]) & (x3 == [3.]) + @test (x1 == [1.0]) & (x2 == [2.0]) & (x3 == [3.0]) y = combine(m1, x1, x2, x3) @test y == x @@ -27,8 +19,8 @@ using Bijectors: m = PartitionMask(3, [1], [2]) cl1 = Coupling(x -> Shift(x[1]), m) - x = [1., 2., 3.] - @test cl1(x) == [3., 2., 3.] + x = [1.0, 2.0, 3.0] + @test cl1(x) == [3.0, 2.0, 3.0] cl2 = Coupling(θ -> Shift(θ[1]), m) @test cl2(x) == cl1(x) @@ -46,7 +38,7 @@ using Bijectors: # with_logabsdet_jacobian @test with_logabsdet_jacobian(cl1, x) == (cl1(x), logabsdetjac(cl1, x)) - @test with_logabsdet_jacobian(icl1, cl1(x)) == (x, - logabsdetjac(cl1, x)) + @test with_logabsdet_jacobian(icl1, cl1(x)) == (x, -logabsdetjac(cl1, x)) end @testset "Classic" begin @@ -54,12 +46,12 @@ using Bijectors: # With `Scale` cl = Coupling(x -> Scale(x[1]), m) - x = [-1., -2., -3.] - y = [2., -2., -3.] + x = [-1.0, -2.0, -3.0] + y = [2.0, -2.0, -3.0] test_bijector(cl, x; y=y, logjac=log(2)) - x = [1., 2., 3.] - y = [2., 2., 3.] + x = [1.0, 2.0, 3.0] + y = [2.0, 2.0, 3.0] test_bijector(cl, x; y=y, logjac=log(2)) end end diff --git a/test/bijectors/leaky_relu.jl b/test/bijectors/leaky_relu.jl index a51f33e5..f8cb1a08 100644 --- a/test/bijectors/leaky_relu.jl +++ b/test/bijectors/leaky_relu.jl @@ -18,11 +18,11 @@ using Bijectors: LeakyReLU b = LeakyReLU(Float32(b.α)) # < 0 - x = -1f0 + x = -1.0f0 test_bijector(b, x) # ≥ 0 - x = 1f0 + x = 1.0f0 test_bijector(b, x; test_not_identity=false, test_types=true) end diff --git a/test/bijectors/named_bijector.jl b/test/bijectors/named_bijector.jl index fbdce0ec..787a6b41 100644 --- a/test/bijectors/named_bijector.jl +++ b/test/bijectors/named_bijector.jl @@ -3,38 +3,38 @@ using Bijectors using Bijectors: Logit, AbstractNamedTransform, NamedTransform, NamedCoupling, Shift @testset "NamedTransform" begin - b = NamedTransform((a = elementwise(exp), b = elementwise(log))) - @test b((a = 0.0, b = exp(1.0))) == (a = 1.0, b = 1.0) + b = NamedTransform((a=elementwise(exp), b=elementwise(log))) + @test b((a=0.0, b=exp(1.0))) == (a=1.0, b=1.0) - with_logabsdet_jacobian(b, (a = 0.0, b = exp(1.0))) + with_logabsdet_jacobian(b, (a=0.0, b=exp(1.0))) end @testset "NamedCoupling" begin - nc = NamedCoupling(Val(:b), Val((:a, )), a -> Logit(zero(a), a)) - @inferred NamedCoupling(Val(:b), Val((:a, )), Shift) + nc = NamedCoupling(Val(:b), Val((:a,)), a -> Logit(zero(a), a)) + @inferred NamedCoupling(Val(:b), Val((:a,)), Shift) - nc = NamedCoupling(:b, (:a, ), a -> Logit(0., a)) # <= not type-inferrable but eh + nc = NamedCoupling(:b, (:a,), a -> Logit(0.0, a)) # <= not type-inferrable but eh @test Bijectors.target(nc) == :b - @test Bijectors.deps(nc) == (:a, ) + @test Bijectors.deps(nc) == (:a,) @inferred Bijectors.target(nc) @inferred Bijectors.deps(nc) - x = (a = 1.0, b = 0.5, c = 99999.) + x = (a=1.0, b=0.5, c=99999.0) @test Bijectors.coupling(nc)(x.a) isa Logit @test inverse(nc)(nc(x)) == x - @test logabsdetjac(nc, x) == logabsdetjac(Logit(0., 1.), x.b) + @test logabsdetjac(nc, x) == logabsdetjac(Logit(0.0, 1.0), x.b) @test logabsdetjac(inverse(nc), nc(x)) == -logabsdetjac(nc, x) - x = (a = 0.0, b = 2.0, c = 1.0) + x = (a=0.0, b=2.0, c=1.0) nc = NamedCoupling(:c, (:a, :b), (a, b) -> Logit(a, b)) @test nc(x).c == 0.0 @test inverse(nc)(nc(x)) == x - x = (a = 0.0, b = 2.0, c = 1.0) - nc = NamedCoupling(:c, (:b, ), b -> Shift(b)) + x = (a=0.0, b=2.0, c=1.0) + nc = NamedCoupling(:c, (:b,), b -> Shift(b)) @test nc(x).c == 3.0 @test inverse(nc)(nc(x)) == x end diff --git a/test/bijectors/permute.jl b/test/bijectors/permute.jl index 49e90a82..6ab0e968 100644 --- a/test/bijectors/permute.jl +++ b/test/bijectors/permute.jl @@ -8,10 +8,10 @@ using Bijectors: Permute # in the sense that the map is {1, 2} => {1} @test_throws ArgumentError Permute(2, 2 => 1) @test_throws ArgumentError Permute(2, [1, 2, 3] => [2, 1]) - + # Simplest case b1 = Permute([ - 0 1; + 0 1 1 0 ]) b2 = Permute([2, 1]) @@ -20,7 +20,7 @@ using Bijectors: Permute @test b1.A == b2.A == b3.A == b4.A - x = [1., 2.] + x = [1.0, 2.0] @test (inverse(b1) ∘ b1)(x) == x @test (inverse(b2) ∘ b2)(x) == x @test (inverse(b3) ∘ b3)(x) == x @@ -28,8 +28,8 @@ using Bijectors: Permute # Slightly more complex case; one entry is not permuted b1 = Permute([ - 0 1 0; - 1 0 0; + 0 1 0 + 1 0 0 0 0 1 ]) b2 = Permute([2, 1, 3]) @@ -37,8 +37,8 @@ using Bijectors: Permute b4 = Permute(3, [1, 2] => [2, 1]) @test b1.A == b2.A == b3.A == b4.A - - x = [1., 2., 3.] + + x = [1.0, 2.0, 3.0] @test (inverse(b1) ∘ b1)(x) == x @test (inverse(b2) ∘ b2)(x) == x @test (inverse(b3) ∘ b3)(x) == x diff --git a/test/bijectors/rational_quadratic_spline.jl b/test/bijectors/rational_quadratic_spline.jl index fe0ddc31..4b8be14b 100644 --- a/test/bijectors/rational_quadratic_spline.jl +++ b/test/bijectors/rational_quadratic_spline.jl @@ -4,7 +4,9 @@ using Bijectors: RationalQuadraticSpline @testset "RationalQuadraticSpline" begin # Monotonic spline on '[-B, B]' with `K` intermediate knots/"connection points". - d = 2; K = 3; B = 2; + d = 2 + K = 3 + B = 2 b_uv = RationalQuadraticSpline(randn(K), randn(K), randn(K - 1), B) b_mv = RationalQuadraticSpline(randn(d, K), randn(d, K), randn(d, K - 1), B) @@ -54,7 +56,7 @@ using Bijectors: RationalQuadraticSpline test_bijector(b, x) # Outside of domain - x = [-5., 5.] + x = [-5.0, 5.0] test_bijector(b, x; y=x, logjac=zero(eltype(x))) end end diff --git a/test/bijectors/utils.jl b/test/bijectors/utils.jl index dc1d3a55..b9ab1242 100644 --- a/test/bijectors/utils.jl +++ b/test/bijectors/utils.jl @@ -15,15 +15,16 @@ function test_bijector( changes_of_variables_test=true, inverse_functions_test=true, compare=isapprox, - kwargs... + kwargs..., ) # Ensure that everything is type-stable. ib = @inferred inverse(b) logjac_test = @inferred logabsdetjac(b, x) - res = @inferred with_logabsdet_jacobian(b, x) + res = @inferred with_logabsdet_jacobian(b, x) y_test = @inferred b(x) - ilogjac_test = !isnothing(y) ? @inferred(logabsdetjac(ib, y)) : @inferred(logabsdetjac(ib, y_test)) + ilogjac_test = + !isnothing(y) ? @inferred(logabsdetjac(ib, y)) : @inferred(logabsdetjac(ib, y_test)) ires = if !isnothing(y) @inferred(with_logabsdet_jacobian(inverse(b), y)) else @@ -34,18 +35,20 @@ function test_bijector( # For non-bijective transformations, these tests always fail since determinant of # the Jacobian is zero. Hence we allow the caller to disable them if necessary. if changes_of_variables_test - ChangesOfVariables.test_with_logabsdet_jacobian(b, x, getjacobian; compare=compare, kwargs...) ChangesOfVariables.test_with_logabsdet_jacobian( - ib, isnothing(y) ? y_test : y, getjacobian; - compare=compare, - kwargs... + b, x, getjacobian; compare=compare, kwargs... + ) + ChangesOfVariables.test_with_logabsdet_jacobian( + ib, isnothing(y) ? y_test : y, getjacobian; compare=compare, kwargs... ) end # InverseFunctions.jl if inverse_functions_test InverseFunctions.test_inverse(b, x; compare, kwargs...) - InverseFunctions.test_inverse(ib, isnothing(y) ? y_test : y; compare=compare, kwargs...) + InverseFunctions.test_inverse( + ib, isnothing(y) ? y_test : y; compare=compare, kwargs... + ) end # Always want the following to hold @@ -100,9 +103,9 @@ function test_functor(x, xs) @test _xs == xs end -function test_bijector_parameter_gradient(b::Bijectors.Transform, x, y = b(x)) +function test_bijector_parameter_gradient(b::Bijectors.Transform, x, y=b(x)) args, re = Functors.functor(b) - recon(k, param) = re(merge(args, NamedTuple{(k, )}((param, )))) + recon(k, param) = re(merge(args, NamedTuple{(k,)}((param,)))) # Compute the gradient wrt. one argument at the time. for (k, v) in pairs(args) diff --git a/test/interface.jl b/test/interface.jl index e1f8d0e4..7d989abf 100644 --- a/test/interface.jl +++ b/test/interface.jl @@ -7,7 +7,19 @@ using Tracker using DistributionsAD using Bijectors -using Bijectors: Shift, Scale, Logit, SimplexBijector, PDBijector, Permute, PlanarLayer, RadialLayer, Stacked, TruncatedBijector, RationalQuadraticSpline, LeakyReLU +using Bijectors: + Shift, + Scale, + Logit, + SimplexBijector, + PDBijector, + Permute, + PlanarLayer, + RadialLayer, + Stacked, + TruncatedBijector, + RationalQuadraticSpline, + LeakyReLU Random.seed!(123) @@ -19,7 +31,7 @@ contains(predicate::Function, b::Stacked) = any(contains.(predicate, b.bs)) # Tests with scalar-valued distributions. uni_dists = [ Arcsine(2, 4), - Beta(2,2), + Beta(2, 2), BetaPrime(), Biweight(), Cauchy(), @@ -44,10 +56,10 @@ contains(predicate::Function, b::Stacked) = any(contains.(predicate, b.bs)) Rayleigh(1.0), TDist(2), truncated(Normal(0, 1), -Inf, 2), - transformed(Beta(2,2)), + transformed(Beta(2, 2)), transformed(Exponential()), ] - + for dist in uni_dists @testset "$dist: dist" begin td = @inferred transformed(dist) @@ -68,7 +80,8 @@ contains(predicate::Function, b::Stacked) = any(contains.(predicate, b.bs)) b = @inferred bijector(d) x = rand(d) y = @inferred b(x) - @test logpdf(d, inverse(b)(y)) + logabsdetjacinv(b, y) ≈ logpdf_with_trans(d, x, true) + @test logpdf(d, inverse(b)(y)) + logabsdetjacinv(b, y) ≈ + logpdf_with_trans(d, x, true) @test logpdf(d, x) - logabsdetjac(b, x) ≈ logpdf_with_trans(d, x, true) # verify against AD @@ -78,8 +91,9 @@ contains(predicate::Function, b::Stacked) = any(contains.(predicate, b.bs)) y = b(x) # `ForwardDiff.derivative` can lead to some numerical inaccuracy, # so we use a slightly higher `atol` than default. - @test log(abs(ForwardDiff.derivative(b, x))) ≈ logabsdetjac(b, x) atol=1e-6 - @test log(abs(ForwardDiff.derivative(inverse(b), y))) ≈ logabsdetjac(inverse(b), y) atol=1e-6 + @test log(abs(ForwardDiff.derivative(b, x))) ≈ logabsdetjac(b, x) atol = 1e-6 + @test log(abs(ForwardDiff.derivative(inverse(b), y))) ≈ + logabsdetjac(inverse(b), y) atol = 1e-6 end end end @@ -91,7 +105,8 @@ end y = b(x) @test y ≈ link(d, x) @test inverse(b)(y) ≈ x - @test logabsdetjac(b, x) ≈ logpdf_with_trans(d, x, false) - logpdf_with_trans(d, x, true) + @test logabsdetjac(b, x) ≈ + logpdf_with_trans(d, x, false) - logpdf_with_trans(d, x, true) d = truncated(Normal(), -Inf, 1) b = bijector(d) @@ -99,7 +114,8 @@ end y = b(x) @test y ≈ link(d, x) @test inverse(b)(y) ≈ x - @test logabsdetjac(b, x) ≈ logpdf_with_trans(d, x, false) - logpdf_with_trans(d, x, true) + @test logabsdetjac(b, x) ≈ + logpdf_with_trans(d, x, false) - logpdf_with_trans(d, x, true) d = truncated(Normal(), 1, Inf) b = bijector(d) @@ -107,7 +123,8 @@ end y = b(x) @test y ≈ link(d, x) @test inverse(b)(y) ≈ x - @test logabsdetjac(b, x) ≈ logpdf_with_trans(d, x, false) - logpdf_with_trans(d, x, true) + @test logabsdetjac(b, x) ≈ + logpdf_with_trans(d, x, false) - logpdf_with_trans(d, x, true) end @testset "Multivariate" begin @@ -117,7 +134,7 @@ end Dirichlet([eps(Float64), 1000 * one(Float64)]), MvNormal(randn(10), Diagonal(exp.(randn(10)))), MvLogNormal(MvNormal(randn(10), Diagonal(exp.(randn(10))))), - Dirichlet([1000 * one(Float64), eps(Float64)]), + Dirichlet([1000 * one(Float64), eps(Float64)]), Dirichlet([eps(Float64), 1000 * one(Float64)]), transformed(MvNormal(randn(10), Diagonal(exp.(randn(10))))), transformed(MvLogNormal(MvNormal(randn(10), Diagonal(exp.(randn(10)))))), @@ -144,11 +161,16 @@ end # which in turn will lead to differences between `ForwardDiff.jacobian` # and `logabsdetjac` due to how we handle the boundary values in `SimplexBijector`. # We therefore test the realizations _on_ the boundary rather if we're near the boundary. - x = any(rand(dist) .> 0.9999) ? [0.0, 1.0][sortperm(rand(dist))] : rand(dist) + x = if any(rand(dist) .> 0.9999) + [0.0, 1.0][sortperm(rand(dist))] + else + rand(dist) + end y = b(x) @test b(param(x)) isa TrackedArray @test log(abs(det(ForwardDiff.jacobian(b, x)))) ≈ logabsdetjac(b, x) - @test log(abs(det(ForwardDiff.jacobian(inverse(b), y)))) ≈ logabsdetjac(inverse(b), y) + @test log(abs(det(ForwardDiff.jacobian(inverse(b), y)))) ≈ + logabsdetjac(inverse(b), y) else b = bijector(dist) x = rand(dist) @@ -156,8 +178,10 @@ end # `ForwardDiff.derivative` can lead to some numerical inaccuracy, # so we use a slightly higher `atol` than default. @test b(param(x)) isa TrackedArray - @test log(abs(det(ForwardDiff.jacobian(b, x)))) ≈ logabsdetjac(b, x) atol=1e-6 - @test log(abs(det(ForwardDiff.jacobian(inverse(b), y)))) ≈ logabsdetjac(inverse(b), y) atol=1e-6 + @test log(abs(det(ForwardDiff.jacobian(b, x)))) ≈ logabsdetjac(b, x) atol = + 1e-6 + @test log(abs(det(ForwardDiff.jacobian(inverse(b), y)))) ≈ + logabsdetjac(inverse(b), y) atol = 1e-6 end end end @@ -169,11 +193,11 @@ end S[1, 2] = S[2, 1] = 0.5 matrix_dists = [ - Wishart(v,S), - InverseWishart(v,S), - TuringWishart(v,S), - TuringInverseWishart(v,S), - LKJ(3, 1.), + Wishart(v, S), + InverseWishart(v, S), + TuringWishart(v, S), + TuringInverseWishart(v, S), + LKJ(3, 1.0), reshape(MvNormal(zeros(6), I), 2, 3), ] @@ -224,16 +248,16 @@ end @test sb1(param([x, x, y, y])) isa TrackedArray @test sb1([x, x, y, y]) ≈ res1[1] - @test logabsdetjac(sb1, [x, x, y, y]) ≈ 0 atol=1e-6 - @test res1[2] ≈ 0 atol=1e-6 + @test logabsdetjac(sb1, [x, x, y, y]) ≈ 0 atol = 1e-6 + @test res1[2] ≈ 0 atol = 1e-6 sb2 = Stacked([b, b, inverse(b), inverse(b)]) # <= Array res2 = with_logabsdet_jacobian(sb2, [x, x, y, y]) @test sb2(param([x, x, y, y])) isa TrackedArray @test sb2([x, x, y, y]) ≈ res2[1] - @test logabsdetjac(sb2, [x, x, y, y]) ≈ 0.0 atol=1e-12 - @test res2[2] ≈ 0.0 atol=1e-12 + @test logabsdetjac(sb2, [x, x, y, y]) ≈ 0.0 atol = 1e-12 + @test res2[2] ≈ 0.0 atol = 1e-12 # value-test x = ones(3) @@ -242,10 +266,10 @@ end @test sb(param(x)) isa TrackedArray @test sb(x) == [exp(x[1]), log(x[2]), x[3] + 5.0] @test res[1] == [exp(x[1]), log(x[2]), x[3] + 5.0] - @test logabsdetjac(sb, x) == sum([sum(logabsdetjac(sb.bs[i], x[sb.ranges[i]])) for i = 1:3]) + @test logabsdetjac(sb, x) == + sum([sum(logabsdetjac(sb.bs[i], x[sb.ranges[i]])) for i in 1:3]) @test res[2] == logabsdetjac(sb, x) - # TODO: change when we have dimensionality in the type sb = @inferred Stacked((elementwise(exp), SimplexBijector()), (1:1, 2:3)) x = ones(3) ./ 3.0 @@ -253,7 +277,8 @@ end @test sb(param(x)) isa TrackedArray @test sb(x) == [exp(x[1]), sb.bs[2](x[2:3])...] @test res[1] == [exp(x[1]), sb.bs[2](x[2:3])...] - @test logabsdetjac(sb, x) == sum([sum(logabsdetjac(sb.bs[i], x[sb.ranges[i]])) for i = 1:2]) + @test logabsdetjac(sb, x) == + sum([sum(logabsdetjac(sb.bs[i], x[sb.ranges[i]])) for i in 1:2]) @test res[2] == logabsdetjac(sb, x) x = ones(4) ./ 4.0 @@ -266,7 +291,8 @@ end @test sb(param(x)) isa TrackedArray @test sb(x) == [exp(x[1]), sb.bs[2](x[2:3])...] @test res[1] == [exp(x[1]), sb.bs[2](x[2:3])...] - @test logabsdetjac(sb, x) == sum([sum(logabsdetjac(sb.bs[i], x[sb.ranges[i]])) for i = 1:2]) + @test logabsdetjac(sb, x) == + sum([sum(logabsdetjac(sb.bs[i], x[sb.ranges[i]])) for i in 1:2]) @test res[2] == logabsdetjac(sb, x) x = ones(4) ./ 4.0 @@ -280,7 +306,8 @@ end @test sb(param(x)) isa TrackedArray @test sb(x) == [exp(x[1]), sb.bs[2](x[2:3])...] @test res[1] == [exp(x[1]), sb.bs[2](x[2:3])...] - @test logabsdetjac(sb, x) == sum([sum(logabsdetjac(sb.bs[i], x[sb.ranges[i]])) for i = 1:2]) + @test logabsdetjac(sb, x) == + sum([sum(logabsdetjac(sb.bs[i], x[sb.ranges[i]])) for i in 1:2]) @test res[2] == logabsdetjac(sb, x) x = ones(4) ./ 4.0 @@ -293,13 +320,13 @@ end @test sb(param(x)) isa TrackedArray @test sb(x) == [exp(x[1]), sb.bs[2](x[2:3])...] @test res[1] == [exp(x[1]), sb.bs[2](x[2:3])...] - @test logabsdetjac(sb, x) == sum([sum(logabsdetjac(sb.bs[i], x[sb.ranges[i]])) for i = 1:2]) + @test logabsdetjac(sb, x) == + sum([sum(logabsdetjac(sb.bs[i], x[sb.ranges[i]])) for i in 1:2]) @test res[2] == logabsdetjac(sb, x) x = ones(4) ./ 4.0 @test_throws AssertionError sb(x) - @testset "Stacked: ADVI with MvNormal" begin # MvNormal test dists = [ @@ -313,14 +340,14 @@ end InverseGamma(), Cauchy(), Gamma(), - MvNormal(zeros(2), I) + MvNormal(zeros(2), I), ] ranges = [] idx = 1 - for i = 1:length(dists) + for i in 1:length(dists) d = dists[i] - push!(ranges, idx:idx + length(d) - 1) + push!(ranges, idx:(idx + length(d) - 1)) idx += length(d) end ranges = tuple(ranges...) @@ -337,7 +364,7 @@ end @test sb isa Stacked td = transformed(d, sb) # => MultivariateTransformed <: Distribution{Multivariate, Continuous} - @test td isa Distribution{Multivariate, Continuous} + @test td isa Distribution{Multivariate,Continuous} # check that wrong ranges fails sb = Stacked(ibs) @@ -360,8 +387,8 @@ end # verification of computation x = rand(d) y = sb(x) - y_ = vcat([ibs[i](x[ranges[i]]) for i = 1:length(dists)]...) - x_ = vcat([bs[i](y[ranges[i]]) for i = 1:length(dists)]...) + y_ = vcat([ibs[i](x[ranges[i]]) for i in 1:length(dists)]...) + x_ = vcat([bs[i](y[ranges[i]]) for i in 1:length(dists)]...) @test x ≈ x_ @test y ≈ y_ @@ -371,8 +398,8 @@ end # Ensure `Stacked` works for a single bijector d = (MvNormal(zeros(2), I),) - sb = Stacked(bijector.(d), (1:2, )) - x = [.5, 1.] + sb = Stacked(bijector.(d), (1:2,)) + x = [0.5, 1.0] @test sb(x) == x @test logabsdetjac(sb, x) == 0 @test with_logabsdet_jacobian(sb, x) == (x, zero(eltype(x))) @@ -433,8 +460,8 @@ end elementwise(log), Scale(2.0), Scale(3.0), - Scale(rand(2,2)), - Scale(rand(2,2)), + Scale(rand(2, 2)), + Scale(rand(2, 2)), Shift(2.0), Shift(3.0), Shift(rand(2)), @@ -474,7 +501,7 @@ end end @testset "test_inverse and test_with_logabsdet_jacobian" begin - b = Bijectors.Scale{Float64,}(4.2) + b = Bijectors.Scale{Float64}(4.2) x = 0.3 InverseFunctions.test_inverse(b, x) diff --git a/test/norm_flows.jl b/test/norm_flows.jl index d00f632c..74fe3142 100644 --- a/test/norm_flows.jl +++ b/test/norm_flows.jl @@ -11,14 +11,15 @@ seed!(1) @test inverse(inverse(bn)) == bn @test inverse(bn)(bn(x)) ≈ x @test (inverse(bn) ∘ bn)(x) ≈ x - @test_throws ErrorException with_logabsdet_jacobian(bn, randn(10,2)) - @test logabsdetjac(inverse(bn), bn(x)) ≈ - logabsdetjac(bn, x) + @test_throws ErrorException with_logabsdet_jacobian(bn, randn(10, 2)) + @test logabsdetjac(inverse(bn), bn(x)) ≈ -logabsdetjac(bn, x) y, ladj = with_logabsdet_jacobian(bn, x) @test log(abs(det(ForwardDiff.jacobian(bn, x)))) ≈ sum(ladj) - @test log(abs(det(ForwardDiff.jacobian(inverse(bn), y)))) ≈ sum(logabsdetjac(inverse(bn), y)) + @test log(abs(det(ForwardDiff.jacobian(inverse(bn), y)))) ≈ + sum(logabsdetjac(inverse(bn), y)) - test_functor(bn, (b = bn.b, logs = bn.logs)) + test_functor(bn, (b=bn.b, logs=bn.logs)) end @testset "PlanarLayer" begin @@ -40,8 +41,8 @@ end z = ones(10, 100) @test inverse(flow)(flow(z)) ≈ z - test_functor(flow, (w = w, u = u, b = b)) - test_functor(inverse(flow), (orig = flow,)) + test_functor(flow, (w=w, u=u, b=b)) + test_functor(inverse(flow), (orig=flow,)) @testset "find_alpha" begin for wt_y in (-20.3, -3, -3//2, 0.0, 5, 29//4, 12.3) @@ -55,7 +56,8 @@ end # check if α is an approximate solution to the considered equation # have to set atol if wt_y is zero (otherwise only equality is checked) - @test wt_y ≈ α + wt_u_hat * tanh(α + b) atol=iszero(wt_y) ? 1e-14 : 0.0 + @test wt_y ≈ α + wt_u_hat * tanh(α + b) atol = + iszero(wt_y) ? 1e-14 : 0.0 end end end @@ -77,8 +79,8 @@ end our_method = sum(with_logabsdet_jacobian(flow, z)[2]) @test our_method ≈ forward_diff - @test inverse(flow)(flow(z)) ≈ z rtol=0.2 - @test (inverse(flow) ∘ flow)(z) ≈ z rtol=0.2 + @test inverse(flow)(flow(z)) ≈ z rtol = 0.2 + @test (inverse(flow) ∘ flow)(z) ≈ z rtol = 0.2 end α_ = 1.0 @@ -88,8 +90,8 @@ end flow = RadialLayer(α_, β, z_0) @test inverse(flow)(flow(z)) ≈ z - test_functor(flow, (α_ = α_, β = β, z_0 = z_0)) - test_functor(inverse(flow), (orig = flow,)) + test_functor(flow, (α_=α_, β=β, z_0=z_0)) + test_functor(inverse(flow), (orig=flow,)) end @testset "Flows" begin @@ -106,7 +108,7 @@ end lp = logpdf(d, x) - res[2] @test res[1] ≈ y - @test logpdf(flow, y) ≈ lp rtol=0.1 + @test logpdf(flow, y) ≈ lp rtol = 0.1 # flow with unconstrained-to-constrained d1 = Beta() diff --git a/test/runtests.jl b/test/runtests.jl index b48c656d..ee2401eb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -13,8 +13,17 @@ using Zygote using Random, LinearAlgebra, Test -using Bijectors: Shift, Scale, Logit, SimplexBijector, PDBijector, Permute, - PlanarLayer, RadialLayer, Stacked, TruncatedBijector +using Bijectors: + Shift, + Scale, + Logit, + SimplexBijector, + PDBijector, + Permute, + PlanarLayer, + RadialLayer, + Stacked, + TruncatedBijector using ChangesOfVariables: ChangesOfVariables using InverseFunctions: InverseFunctions @@ -43,4 +52,3 @@ if GROUP == "All" || GROUP == "AD" include("ad/chainrules.jl") include("ad/flows.jl") end - diff --git a/test/transform.jl b/test/transform.jl index 7be147d3..15cd9178 100644 --- a/test/transform.jl +++ b/test/transform.jl @@ -24,9 +24,10 @@ function single_sample_tests(dist, jacobian) # Check that the implementation of the logpdf agrees with the AD version. x = rand(dist) if dist isa SimplexDistribution - logpdf_ad = logpdf(dist, x .+ ϵ) - _logabsdet(jacobian(x->link(dist, x, false), x)) + logpdf_ad = + logpdf(dist, x .+ ϵ) - _logabsdet(jacobian(x -> link(dist, x, false), x)) else - logpdf_ad = logpdf(dist, x) - _logabsdet(jacobian(x->link(dist, x), x)) + logpdf_ad = logpdf(dist, x) - _logabsdet(jacobian(x -> link(dist, x), x)) end @test logpdf_ad ≈ logpdf_with_trans(dist, x, true) end @@ -38,7 +39,7 @@ function single_sample_tests(dist) # Check that invlink is inverse of link. x = rand(dist) - @test @inferred(invlink(dist, link(dist, copy(x)))) ≈ x atol=1e-9 + @test @inferred(invlink(dist, link(dist, copy(x)))) ≈ x atol = 1e-9 # Check that link is inverse of invlink. Hopefully this just holds given the above... y = @inferred(link(dist, x)) @@ -49,128 +50,139 @@ function single_sample_tests(dist) # 1.0 # julia> logistic(logit(0.9999999999999998)) # 0.9999999999999998 - @test @inferred(link(dist, invlink(dist, copy(y)))) ≈ y atol=0.5 + @test @inferred(link(dist, invlink(dist, copy(y)))) ≈ y atol = 0.5 else - @test @inferred(link(dist, invlink(dist, copy(y)))) ≈ y atol=1e-9 + @test @inferred(link(dist, invlink(dist, copy(y)))) ≈ y atol = 1e-9 end if dist isa SimplexDistribution # This should probably be exact. @test logpdf(dist, x .+ ϵ) == logpdf_with_trans(dist, x, false) # Check that invlink maps back to the apppropriate constrained domain. - @test all(isfinite, logpdf.(Ref(dist), [invlink(dist, _rand_real(x, 0)) .+ ϵ for _ in 1:100])) + @test all( + isfinite, + logpdf.(Ref(dist), [invlink(dist, _rand_real(x, 0)) .+ ϵ for _ in 1:100]), + ) else # This should probably be exact. @test logpdf(dist, x) == logpdf_with_trans(dist, x, false) - @test all(isfinite, logpdf.(Ref(dist), [invlink(dist, _rand_real(x)) for _ in 1:100])) + @test all( + isfinite, logpdf.(Ref(dist), [invlink(dist, _rand_real(x)) for _ in 1:100]) + ) end end # Scalar tests @testset "scalar" begin -let - # Tests with scalar-valued distributions. - uni_dists = [ - Arcsine(2, 4), - Beta(2,2), - BetaPrime(), - Biweight(), - Cauchy(), - Chi(3), - Chisq(2), - Cosine(), - Epanechnikov(), - Erlang(), - Exponential(), - FDist(1, 1), - Frechet(), - Gamma(), - InverseGamma(), - InverseGaussian(), - # Kolmogorov(), - Laplace(), - Levy(), - Logistic(), - LogNormal(1.0, 2.5), - Normal(0.1, 2.5), - Pareto(), - Rayleigh(1.0), - TDist(2), - truncated(Normal(0, 1), -Inf, 2), - ] - for dist in uni_dists - - single_sample_tests(dist, ForwardDiff.derivative) + let + # Tests with scalar-valued distributions. + uni_dists = [ + Arcsine(2, 4), + Beta(2, 2), + BetaPrime(), + Biweight(), + Cauchy(), + Chi(3), + Chisq(2), + Cosine(), + Epanechnikov(), + Erlang(), + Exponential(), + FDist(1, 1), + Frechet(), + Gamma(), + InverseGamma(), + InverseGaussian(), + # Kolmogorov(), + Laplace(), + Levy(), + Logistic(), + LogNormal(1.0, 2.5), + Normal(0.1, 2.5), + Pareto(), + Rayleigh(1.0), + TDist(2), + truncated(Normal(0, 1), -Inf, 2), + ] + for dist in uni_dists + single_sample_tests(dist, ForwardDiff.derivative) + end end end -end # Tests with vector-valued distributions. @testset "vector" begin -let ϵ = eps(Float64) - vector_dists = [ - Dirichlet(2, 3), - Dirichlet([1000 * one(Float64), eps(Float64)]), - Dirichlet([eps(Float64), 1000 * one(Float64)]), - MvNormal(randn(10), Diagonal(exp.(randn(10)))), - MvLogNormal(MvNormal(randn(10), Diagonal(exp.(randn(10))))), - Dirichlet([1000 * one(Float64), eps(Float64)]), - Dirichlet([eps(Float64), 1000 * one(Float64)]), - ] - for dist in vector_dists - - if dist isa Dirichlet - single_sample_tests(dist) - - # This should fail at the minute. Not sure what the correct way to test this is. - - # Workaround for intermittent test failures, result of `logpdf_with_trans(dist, x, true)` - # is incorrect for `x == [0.9999999999999998, 0.0]`: - x = if params(dist) == params(Dirichlet([1000 * one(Float64), eps(Float64)])) - [1.0, 0.0] + let ϵ = eps(Float64) + vector_dists = [ + Dirichlet(2, 3), + Dirichlet([1000 * one(Float64), eps(Float64)]), + Dirichlet([eps(Float64), 1000 * one(Float64)]), + MvNormal(randn(10), Diagonal(exp.(randn(10)))), + MvLogNormal(MvNormal(randn(10), Diagonal(exp.(randn(10))))), + Dirichlet([1000 * one(Float64), eps(Float64)]), + Dirichlet([eps(Float64), 1000 * one(Float64)]), + ] + for dist in vector_dists + if dist isa Dirichlet + single_sample_tests(dist) + + # This should fail at the minute. Not sure what the correct way to test this is. + + # Workaround for intermittent test failures, result of `logpdf_with_trans(dist, x, true)` + # is incorrect for `x == [0.9999999999999998, 0.0]`: + x = + if params(dist) == + params(Dirichlet([1000 * one(Float64), eps(Float64)])) + [1.0, 0.0] + else + rand(dist) + end + + logpdf_turing = logpdf_with_trans(dist, x, true) + J = ForwardDiff.jacobian(x -> link(dist, x, Val(false)), x) + @test logpdf(dist, x .+ ϵ) - _logabsdet(J) ≈ logpdf_turing + + # Issue #12 + stepsize = 1e10 + dim = length(dist) + x = [ + logpdf_with_trans( + dist, + invlink(dist, link(dist, rand(dist)) .+ randn(dim) .* stepsize), + true, + ) for _ in 1:1_000 + ] + @test !any(isinf, x) && !any(isnan, x) else - rand(dist) + single_sample_tests(dist, ForwardDiff.jacobian) end - - logpdf_turing = logpdf_with_trans(dist, x, true) - J = ForwardDiff.jacobian(x->link(dist, x, Val(false)), x) - @test logpdf(dist, x .+ ϵ) - _logabsdet(J) ≈ logpdf_turing - - # Issue #12 - stepsize = 1e10 - dim = length(dist) - x = [logpdf_with_trans(dist, invlink(dist, link(dist, rand(dist)) .+ randn(dim) .* stepsize), true) for _ in 1:1_000] - @test !any(isinf, x) && !any(isnan, x) - else - single_sample_tests(dist, ForwardDiff.jacobian) end end end -end # Tests with matrix-valued distributions. @testset "matrix" begin -let - matrix_dists = [ - Wishart(7, [1 0.5; 0.5 1]), - InverseWishart(2, [1 0.5; 0.5 1]), - ] - for dist in matrix_dists - - single_sample_tests(dist) + let + matrix_dists = [Wishart(7, [1 0.5; 0.5 1]), InverseWishart(2, [1 0.5; 0.5 1])] + for dist in matrix_dists + single_sample_tests(dist) - x = rand(dist); x = x + x' + 2I - lowerinds = [LinearIndices(size(x))[I] for I in CartesianIndices(size(x)) if I[1] >= I[2]] - upperinds = [LinearIndices(size(x))[I] for I in CartesianIndices(size(x)) if I[2] >= I[1]] - logpdf_turing = logpdf_with_trans(dist, x, true) - J = ForwardDiff.jacobian(x->link(dist, x), x) - J = J[lowerinds, upperinds] - @test logpdf(dist, x) - _logabsdet(J) ≈ logpdf_turing + x = rand(dist) + x = x + x' + 2I + lowerinds = [ + LinearIndices(size(x))[I] for I in CartesianIndices(size(x)) if I[1] >= I[2] + ] + upperinds = [ + LinearIndices(size(x))[I] for I in CartesianIndices(size(x)) if I[2] >= I[1] + ] + logpdf_turing = logpdf_with_trans(dist, x, true) + J = ForwardDiff.jacobian(x -> link(dist, x), x) + J = J[lowerinds, upperinds] + @test logpdf(dist, x) - _logabsdet(J) ≈ logpdf_turing + end end end -end @testset "correlation matrix" begin - dist = LKJ(2, 1) single_sample_tests(dist) @@ -178,10 +190,12 @@ end x = rand(dist) x = x + x' + 2I d = 1 ./ sqrt.(diag(x)) - x = d .* x .* d' + x = d .* x .* d' - upperinds = [LinearIndices(size(x))[I] for I in CartesianIndices(size(x)) if I[2] > I[1]] - J = ForwardDiff.jacobian(x->link(dist, x), x) + upperinds = [ + LinearIndices(size(x))[I] for I in CartesianIndices(size(x)) if I[2] > I[1] + ] + J = ForwardDiff.jacobian(x -> link(dist, x), x) J = J[upperinds, upperinds] logpdf_turing = logpdf_with_trans(dist, x, true) @test logpdf(dist, x) - _logabsdet(J) ≈ logpdf_turing @@ -198,24 +212,26 @@ end # -3.006450206744678 # julia> logpdf_with_trans(Dirichlet([1., 1., 1.]), [-1., -2., -3.], true, true) # -3.006450206744678 -d = Dirichlet([1., 1., 1.]) -r = [-1000., -1000., 0.0] -r2 = [-1., -2., 0.0] +d = Dirichlet([1.0, 1.0, 1.0]) +r = [-1000.0, -1000.0, 0.0] +r2 = [-1.0, -2.0, 0.0] # test vector invlink dist = Dirichlet(ones(5)) -x = [[-2.72689, -2.92751, 1.63114, -1.62054, 0.0] [-1.24249, 2.58902, -3.73043, -3.53685, 0.0]] -@test all(sum(Bijectors.invlink(dist, x), dims = 1) .== 1) +x = [[-2.72689, -2.92751, 1.63114, -1.62054, 0.0] [ + -1.24249, 2.58902, -3.73043, -3.53685, 0.0 +]] +@test all(sum(Bijectors.invlink(dist, x); dims=1) .== 1) # test link #link(d, r) # test invlink -@test invlink(d, r) ≈ [0., 0., 1.] atol=1e-9 +@test invlink(d, r) ≈ [0.0, 0.0, 1.0] atol = 1e-9 # test logpdf_with_trans #@test logpdf_with_trans(d, invlink(d, r), true) -1999.30685281944 1e-9 ≈ # atol=NaN -@test logpdf_with_trans(d, invlink(d, r2), true) ≈ -3.760398892580863 atol=1e-9 +@test logpdf_with_trans(d, invlink(d, r2), true) ≈ -3.760398892580863 atol = 1e-9 macro aeq(x, y) return quote @@ -237,11 +253,20 @@ end g1 = y -> invlink(dist, y, Val(true)) g2 = y -> invlink(dist, y, Val(false)) - @test @aeq ForwardDiff.jacobian(f1, x) @inferred(Bijectors.simplex_link_jacobian(x, Val(true))) - @test @aeq ForwardDiff.jacobian(f2, x) @inferred(Bijectors.simplex_link_jacobian(x, Val(false))) - @test @aeq ForwardDiff.jacobian(g1, y) @inferred(Bijectors.simplex_invlink_jacobian(y, Val(true))) - @test @aeq ForwardDiff.jacobian(g2, y) @inferred(Bijectors.simplex_invlink_jacobian(y, Val(false))) - @test @aeq Bijectors.simplex_link_jacobian(x, Val(false)) * Bijectors.simplex_invlink_jacobian(y, Val(false)) I + @test @aeq ForwardDiff.jacobian(f1, x) @inferred( + Bijectors.simplex_link_jacobian(x, Val(true)) + ) + @test @aeq ForwardDiff.jacobian(f2, x) @inferred( + Bijectors.simplex_link_jacobian(x, Val(false)) + ) + @test @aeq ForwardDiff.jacobian(g1, y) @inferred( + Bijectors.simplex_invlink_jacobian(y, Val(true)) + ) + @test @aeq ForwardDiff.jacobian(g2, y) @inferred( + Bijectors.simplex_invlink_jacobian(y, Val(false)) + ) + @test @aeq Bijectors.simplex_link_jacobian(x, Val(false)) * + Bijectors.simplex_invlink_jacobian(y, Val(false)) I end for i in 1:4 test_link_and_invlink()