From 3fb0262f4599905cd220d3e067bfdcb741c1f75c Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Fri, 15 Nov 2024 18:08:53 +0000 Subject: [PATCH 1/6] Add `import Pkg` for ease of copy-pasting (#552) --- tutorials/docs-01-contributing-guide/index.qmd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tutorials/docs-01-contributing-guide/index.qmd b/tutorials/docs-01-contributing-guide/index.qmd index 3ee296ef8..78d240708 100755 --- a/tutorials/docs-01-contributing-guide/index.qmd +++ b/tutorials/docs-01-contributing-guide/index.qmd @@ -27,16 +27,16 @@ Other sections of the website (anything that isn't a package, or a tutorial) – ### Tests -Turing, like most software libraries, has a test suite. You can run the whole suite the usual Julia way with +Turing, like most software libraries, has a test suite. You can run the whole suite by running `julia --project=.` from the root of the Turing repository, and then running ```julia -Pkg.test("Turing") +import Pkg; Pkg.test("Turing") ``` The test suite subdivides into files in the `test` folder, and you can run only some of them using commands like ```julia -Pkg.test("Turing"; test_args=["optim", "hmc", "--skip", "ext"]) +import Pkg; Pkg.test("Turing"; test_args=["optim", "hmc", "--skip", "ext"]) ``` This one would run all files with "optim" or "hmc" in their path, such as `test/optimisation/Optimisation.jl`, but not files with "ext" in their path. Alternatively, you can set these arguments as command line arguments when you run Julia From d70694527f5dd81fdce3ea9940404cf39db64288 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sun, 17 Nov 2024 13:17:10 +0000 Subject: [PATCH 2/6] Port #539 to master (#553) * Uncomment asserts * Bump Quarto version in CI * Mention Quarto min version in README --- README.md | 4 +++- tutorials/01-gaussian-mixture-model/index.qmd | 9 +++------ tutorials/09-variational-inference/index.qmd | 5 ++--- tutorials/11-probabilistic-pca/index.qmd | 19 ++++++++----------- 4 files changed, 16 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 9a7c1a20d..bf7a0078c 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,10 @@ This repository is part of [Turing.jl's](https://turinglang.org/) website (i.e. To get started with the docs website locally, you'll need to have [Quarto](https://quarto.org/docs/download/) installed. Make sure you have at least version 1.5 of Quarto installed, as this is required to correctly run [the native Julia engine](https://quarto.org/docs/computations/julia.html#using-the-julia-engine). +Ideally, you should use Quarto 1.6.31 or later as this version fixes [a bug which causes random number generation between different cells to not be deterministic](https://github.com/TuringLang/docs/issues/533). +Note that as of October 2024, Quarto 1.6 is a pre-release version, so you may need to install it from source rather than via a package manager like Homebrew. -Once you have the prerequisite installed, you can follow these steps: +Once you have Quarto installed, you can follow these steps: 1. Clone this repository: diff --git a/tutorials/01-gaussian-mixture-model/index.qmd b/tutorials/01-gaussian-mixture-model/index.qmd index 09ea373d7..ffa3c4dbd 100755 --- a/tutorials/01-gaussian-mixture-model/index.qmd +++ b/tutorials/01-gaussian-mixture-model/index.qmd @@ -142,8 +142,7 @@ let # μ[1] and μ[2] can switch places, so we sort the values first. chain = Array(chains[:, ["μ[1]", "μ[2]"], i]) μ_mean = vec(mean(chain; dims=1)) - # TODO: https://github.com/TuringLang/docs/issues/533 - # @assert isapprox(sort(μ_mean), μ; rtol=0.1) "Difference between estimated mean of μ ($(sort(μ_mean))) and data-generating μ ($μ) unexpectedly large!" + @assert isapprox(sort(μ_mean), μ; rtol=0.1) "Difference between estimated mean of μ ($(sort(μ_mean))) and data-generating μ ($μ) unexpectedly large!" end end ``` @@ -208,8 +207,7 @@ let # μ[1] and μ[2] can no longer switch places. Check that they've found the mean chain = Array(chains[:, ["μ[1]", "μ[2]"], i]) μ_mean = vec(mean(chain; dims=1)) - # TODO: https://github.com/TuringLang/docs/issues/533 - # @assert isapprox(sort(μ_mean), μ; rtol=0.4) "Difference between estimated mean of μ ($(sort(μ_mean))) and data-generating μ ($μ) unexpectedly large!" + @assert isapprox(sort(μ_mean), μ; rtol=0.4) "Difference between estimated mean of μ ($(sort(μ_mean))) and data-generating μ ($μ) unexpectedly large!" end end ``` @@ -349,8 +347,7 @@ let # μ[1] and μ[2] can no longer switch places. Check that they've found the mean chain = Array(chains[:, ["μ[1]", "μ[2]"], i]) μ_mean = vec(mean(chain; dims=1)) - # TODO: https://github.com/TuringLang/docs/issues/533 - # @assert isapprox(sort(μ_mean), μ; rtol=0.4) "Difference between estimated mean of μ ($(sort(μ_mean))) and data-generating μ ($μ) unexpectedly large!" + @assert isapprox(sort(μ_mean), μ; rtol=0.4) "Difference between estimated mean of μ ($(sort(μ_mean))) and data-generating μ ($μ) unexpectedly large!" end end ``` diff --git a/tutorials/09-variational-inference/index.qmd b/tutorials/09-variational-inference/index.qmd index ef9643756..eb7f16c0e 100755 --- a/tutorials/09-variational-inference/index.qmd +++ b/tutorials/09-variational-inference/index.qmd @@ -155,9 +155,8 @@ var(x), mean(x) #| echo: false let v, m = (mean(rand(q, 2000); dims=2)...,) - # TODO: Fix these as they randomly fail https://github.com/TuringLang/docs/issues/533 - # @assert isapprox(v, 1.022; atol=0.1) "Mean of s (VI posterior, 1000 samples): $v" - # @assert isapprox(m, -0.027; atol=0.03) "Mean of m (VI posterior, 1000 samples): $m" + @assert isapprox(v, 1.022; atol=0.1) "Mean of s (VI posterior, 1000 samples): $v" + @assert isapprox(m, -0.027; atol=0.03) "Mean of m (VI posterior, 1000 samples): $m" end ``` diff --git a/tutorials/11-probabilistic-pca/index.qmd b/tutorials/11-probabilistic-pca/index.qmd index d46541271..cb25bc93c 100755 --- a/tutorials/11-probabilistic-pca/index.qmd +++ b/tutorials/11-probabilistic-pca/index.qmd @@ -246,13 +246,10 @@ heatmap( We can quantitatively check the absolute magnitudes of the column average of the gap between `mat_exp` and `mat_rec`: ```{julia} -#| echo: false -# let -# diff_matrix = mat_exp .- mat_rec -# @assert abs(mean(diff_matrix[:, 4])) <= 0.5 #0.327 -# @assert abs(mean(diff_matrix[:, 5])) <= 0.5 #0.390 -# @assert abs(mean(diff_matrix[:, 6])) <= 0.5 #0.326 -# end +diff_matrix = mat_exp .- mat_rec +for col in 4:6 + @assert abs(mean(diff_matrix[:, col])) <= 0.5 +end ``` We observe that, using posterior mean, the recovered data matrix `mat_rec` has values align with the original data matrix - particularly the same pattern in the first and last 3 gene features are captured, which implies the inference and p-PCA decomposition are successful. @@ -281,12 +278,12 @@ Another way to put it: 2 dimensions is enough to capture the main structure of t A direct question arises from above practice is: how many principal components do we want to keep, in order to sufficiently represent the latent structure in the data? This is a very central question for all latent factor models, i.e. how many dimensions are needed to represent that data in the latent space. In the case of PCA, there exist a lot of heuristics to make that choice. -For example, We can tune the number of principal components using empirical methods such as cross-validation based some criteria such as MSE between the posterior predicted (e.g. mean predictions) data matrix and the original data matrix or the percentage of variation explained [3]. +For example, We can tune the number of principal components using empirical methods such as cross-validation based some criteria such as MSE between the posterior predicted (e.g. mean predictions) data matrix and the original data matrix or the percentage of variation explained [^3]. For p-PCA, this can be done in an elegant and principled way, using a technique called *Automatic Relevance Determination* (ARD). -ARD can help pick the correct number of principal directions by regularizing the solution space using a parameterized, data-dependent prior distribution that effectively prunes away redundant or superfluous features [4]. +ARD can help pick the correct number of principal directions by regularizing the solution space using a parameterized, data-dependent prior distribution that effectively prunes away redundant or superfluous features [^4]. Essentially, we are using a specific prior over the factor loadings $\mathbf{W}$ that allows us to prune away dimensions in the latent space. The prior is determined by a precision hyperparameter $\alpha$. Here, smaller values of $\alpha$ correspond to more important components. -You can find more details about this in e.g. [5]. +You can find more details about this in, for example, Bishop (2006) [^5]. ```{julia} @model function pPCA_ARD(X) @@ -383,4 +380,4 @@ It can also thought as a matrix factorisation method, in which $\mathbf{X}=(\mat [^2]: Probabilistic PCA by TensorFlow, "https://www.tensorflow.org/probability/examples/Probabilistic_PCA". [^3]: Gareth M. James, Daniela Witten, Trevor Hastie, Robert Tibshirani, *An Introduction to Statistical Learning*, Springer, 2013. [^4]: David Wipf, Srikantan Nagarajan, *A New View of Automatic Relevance Determination*, NIPS 2007. -[^5]: Christopher Bishop, *Pattern Recognition and Machine Learning*, Springer, 2006. \ No newline at end of file +[^5]: Christopher Bishop, *Pattern Recognition and Machine Learning*, Springer, 2006. From 7fa921e356086bbf955d39dc905885e1b919e37a Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 19 Nov 2024 16:36:17 +0000 Subject: [PATCH 3/6] Fix links (#555) --- _quarto.yml | 10 +++++----- tutorials/16-contexts/index.qmd | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/_quarto.yml b/_quarto.yml index 9530e4655..f271599ca 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -181,13 +181,13 @@ probabilistic-pca: tutorials/11-probabilistic-pca gplvm: tutorials/12-gplvm seasonal-time-series: tutorials/13-seasonal-time-series contexts: tutorials/16-contexts -minituring: tutorial/14-minituring +minituring: tutorials/14-minituring contributing-guide: tutorials/docs-01-contributing-guide using-turing-abstractmcmc: tutorials/docs-04-for-developers-abstractmc-turing using-turing-compiler: tutorials/docs-05-for-developers-compiler using-turing-interface: tutorials/docs-06-for-developers-interface using-turing-variational-inference: tutorials/docs-07-for-developers-variational-inference -using-turing-advanced: tutorials/tutorials/docs-09-using-turing-advanced +using-turing-advanced: tutorials/docs-09-using-turing-advanced using-turing-autodiff: tutorials/docs-10-using-turing-autodiff using-turing-dynamichmc: tutorials/docs-11-using-turing-dynamichmc using-turing: tutorials/docs-12-using-turing-guide @@ -197,7 +197,7 @@ using-turing-external-samplers: tutorials/docs-16-using-turing-external-samplers using-turing-implementing-samplers: tutorials/docs-17-implementing-samplers using-turing-mode-estimation: tutorials/docs-17-mode-estimation usage-probability-interface: tutorials/usage-probability-interface -usage-custom-distribution: tutorials/tutorials/usage-custom-distribution -usage-generated-quantities: tutorials/tutorials/usage-generated-quantities -usage-modifying-logprob: tutorials/tutorials/usage-modifying-logprob +usage-custom-distribution: tutorials/usage-custom-distribution +usage-generated-quantities: tutorials/usage-generated-quantities +usage-modifying-logprob: tutorials/usage-modifying-logprob dev-model-manual: tutorials/dev-model-manual diff --git a/tutorials/16-contexts/index.qmd b/tutorials/16-contexts/index.qmd index 5600577e9..01430f241 100755 --- a/tutorials/16-contexts/index.qmd +++ b/tutorials/16-contexts/index.qmd @@ -14,7 +14,7 @@ In the [Mini Turing]({{< meta minituring >}}) tutorial we developed a miniature # Mini Turing expanded, now with more contexts -If you haven't read [Mini Turing]({{< meta minituring >}}t) yet, you should do that first. We start by repeating verbatim much of the code from there. Define the type for holding values for variables: +If you haven't read [Mini Turing]({{< meta minituring >}}) yet, you should do that first. We start by repeating verbatim much of the code from there. Define the type for holding values for variables: ```{julia} import MacroTools, Random, AbstractMCMC From dad1fa780fc8927505c9cce8d2f397e462d2ad0d Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 19 Nov 2024 17:16:50 +0000 Subject: [PATCH 4/6] Update publish workflow (#558) --- .github/workflows/publish.yml | 37 +++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 5b8e35a6f..cc917d0a9 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -11,6 +11,9 @@ concurrency: group: docs cancel-in-progress: true +permissions: + contents: write + jobs: build-and-deploy: runs-on: ubuntu-latest @@ -105,18 +108,26 @@ jobs: run: | jq -s '.[0] + .[1]' _site/search_original.json fixed_main_search.json > _site/search.json - - name: Deploy versioned docs - uses: JamesIves/github-pages-deploy-action@v4 + - name: Checkout gh-pages branch + uses: actions/checkout@v4 with: - branch: gh-pages - folder: _site - target-folder: versions/${{ env.version }} - clean: false + ref: gh-pages + path: ../gh-pages - - name: Deploy latest docs to root - if: env.version == env.LATEST - uses: JamesIves/github-pages-deploy-action@v4 - with: - branch: gh-pages - folder: _site - clean: false + - name: Update gh-pages branch + run: | + # Copy to versions/ subdirectory + mkdir -p ../gh-pages/versions/${{ env.version }} + cp -r _site/* ../gh-pages/versions/${{ env.version }} + + # Find the latest version of the docs and copy that to the root + cd ../gh-pages/versions + LATEST_DOCS=$(ls -d * | sort -V | tail -n 1) + cp -r $LATEST_DOCS/* ../ + + # Commit and push + git config --global user.name "github-actions[bot]" + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git add . + git commit -m "Publish docs @ ${GITHUB_REPOSITORY}@${GITHUB_SHA}" + git push From cd27f7dc1e23622c41a26d6709fcd698e189edff Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 19 Nov 2024 17:24:39 +0000 Subject: [PATCH 5/6] Fix checkout path --- .github/workflows/publish.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index cc917d0a9..862b8deba 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -112,16 +112,16 @@ jobs: uses: actions/checkout@v4 with: ref: gh-pages - path: ../gh-pages + path: gh-pages - name: Update gh-pages branch run: | # Copy to versions/ subdirectory - mkdir -p ../gh-pages/versions/${{ env.version }} - cp -r _site/* ../gh-pages/versions/${{ env.version }} + mkdir -p gh-pages/versions/${{ env.version }} + cp -r _site/* gh-pages/versions/${{ env.version }} # Find the latest version of the docs and copy that to the root - cd ../gh-pages/versions + cd gh-pages/versions LATEST_DOCS=$(ls -d * | sort -V | tail -n 1) cp -r $LATEST_DOCS/* ../ From 06674069ce7ebabcdf82d837c15135cc268576af Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 19 Nov 2024 17:31:38 +0000 Subject: [PATCH 6/6] Commit everything, not just subdir --- .github/workflows/publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 862b8deba..ddda023ed 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -128,6 +128,6 @@ jobs: # Commit and push git config --global user.name "github-actions[bot]" git config --global user.email "github-actions[bot]@users.noreply.github.com" - git add . + git add -A git commit -m "Publish docs @ ${GITHUB_REPOSITORY}@${GITHUB_SHA}" git push