Fix book and add CRP figures

promised-ai · Apr 2, 2024 · 39a51e2 · 39a51e2
1 parent 9216856
commit 39a51e2
Show file tree

Hide file tree

Showing 7 changed files with 18 additions and 6 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
+- Added ability Pitman-Yor prior process
+- Users now specify the prior process in the codebook
+- `StateAlpha` and `ViewAlpha` transitions are now `StatePriorProcessParams` and `ViewPriorProcessParams`
 
 ## [python-0.7.1] - 2024-02-27
 

diff --git a/book/src/pcc/img/crp.png b/book/src/pcc/img/crp.png
diff --git a/book/src/pcc/img/pyp.png b/book/src/pcc/img/pyp.png
diff --git a/book/src/pcc/prior-processes.md b/book/src/pcc/prior-processes.md
@@ -7,6 +7,15 @@ In Lace (and in Bayesian nonparametrics) we put a prior on the number of paramet
 
 The Dirichlet process more heavily penalizes new categories with an exponential fall off while the Pitman-Yor process has a power law fall off in the number for categories. When d = 0, Pitman-Yor is equivalent to the Dirichlet process.
 
-While Pitman-Yor may fit the data better it will create more parameters, which will cause model training to take longer.
+![Dirichlet Process](img/crp.png)
+
+**Figure**: Category ID (y-axis) by instance number (x-axis) for Dirichlet process draws for various values of alpha.
+
+Pitman-Yor may fit the data better but (and because) it will create more parameters, which will cause model training to take longer.
+
+![Pitman-Yor Process](img/pyp.png)
+
+**Figure**: Category ID (y-axis) by instance number (x-axis) for Pitman-Yor process draws for various values of alpha and d.
+
 
 For those looking for a good introduction to prior process, [this slide deck](https://www.gatsby.ucl.ac.uk/~ywteh/teaching/probmodels/lecture5bnp.pdf) from Yee Whye Teh is a good resource.
diff --git a/book/src/workflow/codebook.md b/book/src/workflow/codebook.md
@@ -49,7 +49,7 @@ let df = CsvReader::from_path(paths.data)
     .unwrap();
 
 // Create the default codebook
-let codebook = Codebook::from_df(&df, None, None, false).unwrap();
+let codebook = Codebook::from_df(&df, None, None, None, false).unwrap();
 ```
 
 </div>

diff --git a/book/src/workflow/model.md b/book/src/workflow/model.md
@@ -125,7 +125,7 @@ let df = CsvReader::from_path(paths.data)
     .unwrap();
 
 // Create the default codebook
-let codebook = Codebook::from_df(&df, None, None, false).unwrap();
+let codebook = Codebook::from_df(&df, None, None, None, false).unwrap();
 
 // Build an rng
 let rng = Xoshiro256Plus::from_entropy();
@@ -156,12 +156,12 @@ let run_config = EngineUpdateConfig::new()
     .n_iters(100)
     .transitions(vec![
         StateTransition::ColumnAssignment(ColAssignAlg::Gibbs),
-        StateTransition::StateAlpha,
+        StateTransition::StatePriorProcessParams,
         StateTransition::RowAssignment(RowAssignAlg::Sams),
         StateTransition::ComponentParams,
         StateTransition::RowAssignment(RowAssignAlg::Slice),
         StateTransition::ComponentParams,
-        StateTransition::ViewAlphas,
+        StateTransition::ViewPriorProcessParams,
         StateTransition::FeaturePriors,
     ]);
 

diff --git a/lace/Cargo.lock b/lace/Cargo.lock