MMO_tutorial.html

<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>

<meta charset="utf-8">
<meta name="generator" content="quarto-1.3.433">

<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">

<meta name="author" content="Irene B. R. Smith">
<meta name="dcterms.date" content="2024-09-01">

<title>MMO Tutorial</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
  width: 0.8em;
  margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 
  vertical-align: middle;
}
/* CSS for syntax highlighting */
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
  { counter-reset: source-line 0; }
pre.numberSource code > span
  { position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
  { content: counter(source-line);
    position: relative; left: -1em; text-align: right; vertical-align: baseline;
    border: none; display: inline-block;
    -webkit-touch-callout: none; -webkit-user-select: none;
    -khtml-user-select: none; -moz-user-select: none;
    -ms-user-select: none; user-select: none;
    padding: 0 4px; width: 4em;
  }
pre.numberSource { margin-left: 3em;  padding-left: 4px; }
div.sourceCode
  {   }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
/* CSS for citations */
div.csl-bib-body { }
div.csl-entry {
  clear: both;
}
.hanging-indent div.csl-entry {
  margin-left:2em;
  text-indent:-2em;
}
div.csl-left-margin {
  min-width:2em;
  float:left;
}
div.csl-right-inline {
  margin-left:2em;
  padding-left:1em;
}
div.csl-indent {
  margin-left: 2em;
}</style>


<script src="MMO_tutorial_files/libs/clipboard/clipboard.min.js"></script>
<script src="MMO_tutorial_files/libs/quarto-html/quarto.js"></script>
<script src="MMO_tutorial_files/libs/quarto-html/popper.min.js"></script>
<script src="MMO_tutorial_files/libs/quarto-html/tippy.umd.min.js"></script>
<script src="MMO_tutorial_files/libs/quarto-html/anchor.min.js"></script>
<link href="MMO_tutorial_files/libs/quarto-html/tippy.css" rel="stylesheet">
<link href="MMO_tutorial_files/libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="MMO_tutorial_files/libs/bootstrap/bootstrap.min.js"></script>
<link href="MMO_tutorial_files/libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="MMO_tutorial_files/libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">


</head>

<body>

<div id="quarto-content" class="page-columns page-rows-contents page-layout-article">
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
<div class="quarto-alternate-formats"><h2>Other Formats</h2><ul><li><a href="MMO_tutorial.pdf"><i class="bi bi-file-pdf"></i>PDF</a></li></ul></div></div>
<main class="content" id="quarto-document-content">

<header id="title-block-header" class="quarto-title-block default">
<div class="quarto-title">
<h1 class="title">MMO Tutorial</h1>
</div>


<div class="quarto-title-meta-author">
  <div class="quarto-title-meta-heading">Author</div>
  <div class="quarto-title-meta-heading">Affiliation</div>
  
    <div class="quarto-title-meta-contents">
    <p class="author"><a href="http://www.irenebrsmith.com">Irene B. R. Smith</a> </p>
  </div>
    <div class="quarto-title-meta-contents">
        <p class="affiliation">
            McGill University
          </p>
      </div>
    </div>

<div class="quarto-title-meta">

      
    <div>
    <div class="quarto-title-meta-heading">Published</div>
    <div class="quarto-title-meta-contents">
      <p class="date">September 1, 2024</p>
    </div>
  </div>
  
    
  </div>
  

</header>

<section id="introduction" class="level2">
<h2 class="anchored" data-anchor-id="introduction">Introduction</h2>
<p>To skip ahead to running code, go to <a href="#Packages">the next section</a>.</p>
<p>This tutorial is meant to accompany <span class="citation" data-cites="smith_modelled_2024">Smith, Sonderegger, and Spade Consortium (<a href="#ref-smith_modelled_2024" role="doc-biblioref">2024</a>)</span> (<a href="https://arxiv.org/pdf/2406.16319">preprint</a>), which describes a new method for calculating vowel merger: Modelled Multivariate Overlap (MMO). The application discussed in the paper and shown in this tutorial is the PIN-PEN merger <span class="citation" data-cites="labov_atlas_2006">(<a href="#ref-labov_atlas_2006" role="doc-biblioref">Labov, Ash, and Boberg 2006</a>)</span>. We operationalize the degree of merger between two vowels as the amount of distributional overlap in F1 / F2 space between those two vowels. While the paper and the tutorial both focus on vowel merger, the method applies more generally to quantifying the overlap between distributions of any kind, including distributions in more than 2 dimensions<a href="#fn1" class="footnote-ref" id="fnref1" role="doc-noteref"><sup>1</sup></a>. Additionally, this method is flexible in that it can be applied for any measure of overlap that can be calculated from empirical distributions. Here, we show the method applied to calculate Bhattacharyya affinity and Euclidean distance, but it could also be used to calculate Pillai or scores any of the other methods described in <span class="citation" data-cites="nycz_best_2014">Nycz and Hall-Lew (<a href="#ref-nycz_best_2014" role="doc-biblioref">2014</a>)</span> or <span class="citation" data-cites="kelley_comparison_2020">Kelley and Tucker (<a href="#ref-kelley_comparison_2020" role="doc-biblioref">2020</a>)</span>.</p>
<p>The steps for calculating MMO are outlined below. For more details, see <span class="citation" data-cites="smith_modelled_2024">Smith, Sonderegger, and Spade Consortium (<a href="#ref-smith_modelled_2024" role="doc-biblioref">2024</a>)</span>. The steps are written for the PIN-PEN merger application that is carried throughout this tutorial, so “F1,” “F2,” “vowel,” and “context” might be different quantities in your specific application.</p>
<ol type="1">
<li><strong>Preprocessing</strong></li>
</ol>
<ul>
<li>Likely includes some form of normalization of F1 and F2 , so that overlap measures are comparable across speakers</li>
<li>After normalization, we discard all vowels except for the two that are interest here: /ɪ/ and /ɛ/</li>
<li>In addition to these two steps, you can do whatever data cleaning or transformations you like, such as centering and scaling predictors, or eliminating problem tokens</li>
</ul>
<ol start="2" type="1">
<li><strong>Modelling</strong>: Fit a multivariate model to jointly predict F1 and F2.</li>
</ol>
<ul>
<li>Bayesian multivariate modelling is an extremely convenient way to implement MMO, because it directly models distributions</li>
<li>If you have another way to generate a modelled distribution, go for it</li>
<li>If fitting Bayesian models is intimidating for you, the code for fitting a model in <code>brms</code> that is included here, along with basic knowledge of <code>lmer</code>-style model syntax, should be enough to get you fitting your own Bayesian models</li>
<li>The Interspeech2024 subfolder of this repository has examples of more complex <code>brms</code> models, specifically, with modelled variance and nested random effects structures</li>
</ul>
<ol start="3" type="1">
<li><strong>Simulation</strong>: Using the model that was fit in Step 2, simulate tokens of each vowel</li>
</ol>
<ul>
<li>Vary or hold constant other variables as appropriate
<ul>
<li>For example, for the PIN-PEN merger, <code>context</code> (prenasal or preoral) needs to be accounted for, so we simulate tokens for each <code>vowel</code>-by-<code>context</code> pair</li>
<li>If <code>duration</code> and <code>speaker gender</code> were included as model predictors, then it might make sense to hold <code>duration</code> constant (for example, at its average value across all tokens of all speakers) and to set <code>speaker gender</code> to the appropriate value for each speaker</li>
</ul></li>
<li>Simulations can be made either for each individual speaker, or for the “average” speaker</li>
<li>Below, we show how to make both of these kinds of simulations using the <code>tidybayes</code> package</li>
</ul>
<ol start="4" type="1">
<li><strong>Overlap calculation</strong>: Now that you have simulated an empirical distribution, you can calculate any measure of overlap you like.</li>
</ol>
<ul>
<li>This tutorial shows how to calculate Bhattacharyya affinity and Euclidean distance</li>
</ul>
<p>Finally, this tutorial is designed such that it can reasonably be run locally on a personal computer. The most resource-intensive step is fitting the Bayesian model: if this is too much for your computer, you can reduce the dataset even further than shown here. After fitting the model once, the <code>brm</code> line will read the file instead of fitting the model, and spare your computer the need to re-fit the model from scratch.</p>
<p>It is possible that, if you are fitting models on corpus data, you will need a more powerful computer than your personal computer. Making model predictions and calculating Bhattacharyya affinity can also be very resource-intensive, so I would also recommend calculating those externally and saving the output. The Interspeech2024 folder has examples of scripts for doing this.</p>
</section>
<section id="Packages" class="level2">
<h2 class="anchored" data-anchor-id="Packages">Packages</h2>
<p>The following packages were used to carry out the core parts of the analysis:</p>
<ul>
<li><p><code>brms</code> <span class="citation" data-cites="burkner_advanced_2018">(<a href="#ref-burkner_advanced_2018" role="doc-biblioref">Bürkner 2018</a>)</span> is used to fit the Bayesian model. It uses <code>RStan</code> <span class="citation" data-cites="stan_rstan">(<a href="#ref-stan_rstan" role="doc-biblioref">Stan Development Team, n.d.</a>)</span>, which needs to be manually installed. Instructions for installing <code>RStan</code> can be found <a href="https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started">here</a>. Once <code>RStan</code> is installed, <code>brms</code> can simply be installed using <code>install.packages("brms")</code> (more information <a href="https://paul-buerkner.github.io/brms/">here</a>).</p></li>
<li><p><code>tidybayes</code> <span class="citation" data-cites="kay_tidybayes_2020">(<a href="#ref-kay_tidybayes_2020" role="doc-biblioref">Kay 2020</a>)</span> is used to get model predictions and to summarize model predictions.</p></li>
<li><p><code>adehabitatHR</code><span class="citation" data-cites="calenge_the_2006">(<a href="#ref-calenge_the_2006" role="doc-biblioref">Calenge 2006</a>)</span> is the package used to compute Bhattacharyya affinity. The code below is adapted from <a href="https://joeystanley.com/blog/a-tutorial-in-calculating-vowel-overlap/">Joey Stanley’s tutorial</a>.</p></li>
<li><p><code>tidyverse</code> is used for <code>ggplot2</code> and <code>dplyr</code> functionality.</p></li>
</ul>
<div class="cell">
<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(brms)</span>
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(tidybayes)</span>
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(adehabitatHR)</span>
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(tidyverse)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>For visualizing results, we use the following libraries:</p>
<ul>
<li><code>patchwork</code> to help arrange <code>ggplot2</code> plots</li>
<li><code>khroma</code> for extra color palettes</li>
</ul>
<div class="cell">
<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(patchwork)</span>
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(khroma)</span>
<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a><span class="co"># set global plot theme to theme_minimal</span></span>
<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a><span class="fu">theme_set</span>(<span class="fu">theme_minimal</span>())</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
</section>
<section id="data" class="level2">
<h2 class="anchored" data-anchor-id="data">Data</h2>
<p>We’ll be using the Raleigh corpus <span class="citation" data-cites="dodsworth_language_2020">(<a href="#ref-dodsworth_language_2020" role="doc-biblioref">Dodsworth and Benton 2020</a>)</span> from the SPADE project <span class="citation" data-cites="sonderegger_managing_2022">(<a href="#ref-sonderegger_managing_2022" role="doc-biblioref">Sonderegger et al. 2022</a>)</span>, available on <a href="https://osf.io/79jgu">OSF</a>. The data has been Lobanov normalized <span class="citation" data-cites="lobanov_classification_1971">(<a href="#ref-lobanov_classification_1971" role="doc-biblioref">Lobanov 1971</a>)</span> and subsetted to include stressed tokens of /ɪ/ and /ɛ/ from 10 speakers. To see the preprocessing details, look at the script create_tutorial_dataset.R.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>raleigh_data <span class="ot">&lt;-</span> <span class="fu">readRDS</span>(<span class="st">"data/Raleigh_subset.rds"</span>)</span>
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(raleigh_data)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 6
# Groups:   speaker [1]
  speaker vowel context word      F1    F2
  &lt;chr&gt;   &lt;fct&gt; &lt;fct&gt;   &lt;chr&gt;  &lt;dbl&gt; &lt;dbl&gt;
1 ral135  e     oral    BET   -0.377 1.29 
2 ral135  i     oral    IT    -0.962 0.429
3 ral135  i     nasal   IN    -1.33  0.936
4 ral135  i     oral    IT    -1.19  0.910
5 ral135  i     nasal   IN    -0.761 1.96 
6 ral135  i     oral    IS    -1.21  0.368</code></pre>
</div>
</div>
<p>There is currently too much data to fit the model on my computer, so we randomly pick 50 tokens of each <code>vowel</code>-by-<code>context</code> pair for each speaker. If you’re still having trouble fitting a model, you could try sampling even fewer tokens.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="co"># set seed so that code is replicable</span></span>
<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">22</span>)</span>
<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a>raleigh_data_50 <span class="ot">&lt;-</span> raleigh_data <span class="sc">%&gt;%</span></span>
<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(speaker, context, vowel) <span class="sc">%&gt;%</span> </span>
<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">slice_sample</span>(<span class="at">n=</span><span class="dv">50</span>) <span class="sc">%&gt;%</span></span>
<span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
</section>
<section id="exploratory-plots" class="level2">
<h2 class="anchored" data-anchor-id="exploratory-plots">Exploratory plots</h2>
<p>This section is not strictly necessary. To skip ahead, go to <a href="#Model_Fitting">Model Fitting</a>.</p>
<p>Let’s make some exploratory plots to set up our expectations.</p>
<p>First, let’s just start with looking at the <code>vowel</code>-by-<code>context</code> distributions in F1 / F2 space for each speaker.</p>
<p>If you want, you can change the vowel labels to their IPA symbols by mutating the <code>vowel</code> column before passing the data to <code>ggplot</code>. I’ll do this once to demonstrate, but in the remaining plots, ‘/ɪ/’ and ‘/ɛ/’ will show up as ‘i’ and ‘e’.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>raleigh_data <span class="sc">%&gt;%</span></span>
<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">vowel=</span><span class="fu">factor</span>(vowel, <span class="at">levels=</span><span class="fu">c</span>(<span class="st">'i'</span>, <span class="st">'e'</span>), <span class="fu">c</span>(<span class="st">'/ɪ/'</span>, <span class="st">'/ɛ/'</span>))) <span class="sc">%&gt;%</span></span>
<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>F2, <span class="at">y=</span>F1, <span class="at">color=</span>context)) <span class="sc">+</span></span>
<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">stat_ellipse</span>(<span class="at">level=</span><span class="fl">0.66</span>, <span class="fu">aes</span>(<span class="at">lty=</span>vowel)) <span class="sc">+</span></span>
<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_x_reverse</span>() <span class="sc">+</span> <span class="fu">scale_y_reverse</span>() <span class="sc">+</span></span>
<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">coord_cartesian</span>(<span class="at">xlim=</span><span class="fu">c</span>(<span class="dv">2</span>, <span class="sc">-</span><span class="dv">2</span>), <span class="at">ylim=</span><span class="fu">c</span>(<span class="dv">2</span>, <span class="sc">-</span><span class="dv">2</span>), <span class="at">clip=</span><span class="st">'off'</span>) <span class="sc">+</span></span>
<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a>  <span class="fu">facet_wrap</span>(<span class="sc">~</span>speaker) <span class="sc">+</span></span>
<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme</span>(<span class="at">aspect.ratio=</span><span class="dv">1</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="MMO_tutorial_files/figure-html/empirical_distributions-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>Let’s look at each speaker’s category center on the same graph. The lines connecting the centers represent the Euclidean distance between vowel for each context (prenasal and preoral).</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>raleigh_data <span class="sc">%&gt;%</span></span>
<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%&gt;%</span> </span>
<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarize</span>(<span class="fu">across</span>(<span class="fu">c</span>(F1, F2), mean)) <span class="sc">%&gt;%</span></span>
<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span></span>
<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>F2, <span class="at">y=</span>F1, <span class="at">color=</span>context, <span class="at">shape=</span>vowel)) <span class="sc">+</span></span>
<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">stat_ellipse</span>(<span class="at">data=</span>raleigh_data, <span class="at">level=</span><span class="fl">0.66</span>, <span class="fu">aes</span>(<span class="at">lty=</span>vowel)) <span class="sc">+</span></span>
<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_line</span>(<span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(context, speaker))) <span class="sc">+</span></span>
<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_x_reverse</span>() <span class="sc">+</span> <span class="fu">scale_y_reverse</span>() <span class="sc">+</span></span>
<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a>  <span class="fu">coord_cartesian</span>(<span class="at">xlim=</span><span class="fu">c</span>(<span class="dv">2</span>, <span class="sc">-</span><span class="dv">2</span>), <span class="at">ylim=</span><span class="fu">c</span>(<span class="dv">2</span>, <span class="sc">-</span><span class="dv">2</span>), <span class="at">clip=</span><span class="st">'off'</span>) <span class="sc">+</span></span>
<span id="cb7-11"><a href="#cb7-11" aria-hidden="true" tabindex="-1"></a>  <span class="fu">facet_wrap</span>(<span class="sc">~</span>speaker) <span class="sc">+</span></span>
<span id="cb7-12"><a href="#cb7-12" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme</span>(<span class="at">aspect.ratio=</span><span class="dv">1</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="MMO_tutorial_files/figure-html/distribution_centers-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>The average (across speakers) effect of context on vowel for both F1 and F2.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>p_av_F1 <span class="ot">&lt;-</span> raleigh_data <span class="sc">%&gt;%</span></span>
<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>  <span class="co"># give all speakers equal weight</span></span>
<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%&gt;%</span> </span>
<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarize</span>(<span class="at">F1=</span><span class="fu">mean</span>(F1)) <span class="sc">%&gt;%</span></span>
<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span></span>
<span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a>  <span class="co"># now average all speakers together</span></span>
<span id="cb8-7"><a href="#cb8-7" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(vowel, context) <span class="sc">%&gt;%</span></span>
<span id="cb8-8"><a href="#cb8-8" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarize</span>(<span class="at">F1=</span><span class="fu">mean</span>(F1)) <span class="sc">%&gt;%</span></span>
<span id="cb8-9"><a href="#cb8-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span></span>
<span id="cb8-10"><a href="#cb8-10" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>vowel, <span class="at">y=</span>F1, <span class="at">color=</span>context, <span class="at">fill=</span>context)) <span class="sc">+</span></span>
<span id="cb8-11"><a href="#cb8-11" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>(<span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb8-12"><a href="#cb8-12" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_line</span>(<span class="fu">aes</span>(<span class="at">group=</span>context)) <span class="sc">+</span></span>
<span id="cb8-13"><a href="#cb8-13" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_y_reverse</span>() </span>
<span id="cb8-14"><a href="#cb8-14" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb8-15"><a href="#cb8-15" aria-hidden="true" tabindex="-1"></a>p_av_F2 <span class="ot">&lt;-</span> raleigh_data <span class="sc">%&gt;%</span></span>
<span id="cb8-16"><a href="#cb8-16" aria-hidden="true" tabindex="-1"></a>  <span class="co"># give all speakers equal weight</span></span>
<span id="cb8-17"><a href="#cb8-17" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%&gt;%</span> </span>
<span id="cb8-18"><a href="#cb8-18" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarize</span>(<span class="at">F2=</span><span class="fu">mean</span>(F2)) <span class="sc">%&gt;%</span></span>
<span id="cb8-19"><a href="#cb8-19" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span></span>
<span id="cb8-20"><a href="#cb8-20" aria-hidden="true" tabindex="-1"></a>  <span class="co"># now average all speakers together</span></span>
<span id="cb8-21"><a href="#cb8-21" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(vowel, context) <span class="sc">%&gt;%</span></span>
<span id="cb8-22"><a href="#cb8-22" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarize</span>(<span class="at">F2=</span><span class="fu">mean</span>(F2)) <span class="sc">%&gt;%</span></span>
<span id="cb8-23"><a href="#cb8-23" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span></span>
<span id="cb8-24"><a href="#cb8-24" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>vowel, <span class="at">y=</span>F2, <span class="at">color=</span>context, <span class="at">fill=</span>context)) <span class="sc">+</span></span>
<span id="cb8-25"><a href="#cb8-25" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>(<span class="at">shape=</span><span class="dv">24</span>, <span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb8-26"><a href="#cb8-26" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_line</span>(<span class="at">lty=</span><span class="dv">2</span>, <span class="fu">aes</span>(<span class="at">group=</span>context), <span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb8-27"><a href="#cb8-27" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_y_reverse</span>() </span>
<span id="cb8-28"><a href="#cb8-28" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb8-29"><a href="#cb8-29" aria-hidden="true" tabindex="-1"></a>p_av_F1 <span class="sc">+</span> p_av_F2 <span class="sc">+</span> <span class="fu">plot_layout</span>(<span class="at">guides=</span><span class="st">'collect'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="MMO_tutorial_files/figure-html/F1_vowel_by_context_emp_av-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>Now we can make the same plot but with each individual speaker’s interactions:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>p_F1_sp_emp <span class="ot">&lt;-</span> raleigh_data <span class="sc">%&gt;%</span></span>
<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%&gt;%</span> </span>
<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarize</span>(<span class="at">F1=</span><span class="fu">mean</span>(F1)) <span class="sc">%&gt;%</span></span>
<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span></span>
<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>vowel, <span class="at">y=</span>F1, <span class="at">color=</span>context, <span class="at">fill=</span>context)) <span class="sc">+</span></span>
<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>(<span class="at">shape=</span><span class="dv">21</span>, <span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb9-7"><a href="#cb9-7" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_line</span>(<span class="at">lty=</span><span class="dv">1</span>, <span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(context, speaker))) <span class="sc">+</span></span>
<span id="cb9-8"><a href="#cb9-8" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_y_reverse</span>()</span>
<span id="cb9-9"><a href="#cb9-9" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb9-10"><a href="#cb9-10" aria-hidden="true" tabindex="-1"></a>p_F2_sp_emp <span class="ot">&lt;-</span> raleigh_data <span class="sc">%&gt;%</span></span>
<span id="cb9-11"><a href="#cb9-11" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%&gt;%</span> </span>
<span id="cb9-12"><a href="#cb9-12" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarize</span>(<span class="at">F2=</span><span class="fu">mean</span>(F2)) <span class="sc">%&gt;%</span></span>
<span id="cb9-13"><a href="#cb9-13" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span></span>
<span id="cb9-14"><a href="#cb9-14" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>vowel, <span class="at">y=</span>F2, <span class="at">color=</span>context, <span class="at">fill=</span>context)) <span class="sc">+</span></span>
<span id="cb9-15"><a href="#cb9-15" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>(<span class="at">shape=</span><span class="dv">24</span>, <span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb9-16"><a href="#cb9-16" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_line</span>(<span class="at">lty=</span><span class="dv">2</span>, <span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(context, speaker)), <span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb9-17"><a href="#cb9-17" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_y_reverse</span>()</span>
<span id="cb9-18"><a href="#cb9-18" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb9-19"><a href="#cb9-19" aria-hidden="true" tabindex="-1"></a>p_F1_sp_emp <span class="sc">+</span> p_F2_sp_emp <span class="sc">+</span> <span class="fu">plot_layout</span>(<span class="at">guides=</span><span class="st">'collect'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="MMO_tutorial_files/figure-html/F1_vowel_by_context_emp_sp-1.png" class="img-fluid" width="672"></p>
</div>
</div>
</section>
<section id="Model_Fitting" class="level2">
<h2 class="anchored" data-anchor-id="Model_Fitting">Model fitting</h2>
<p>We are going to fit a simple multivariate Bayesian model (similar to the “minimal” model in our Interspeech paper <span class="citation" data-cites="smith_modelled_2024">(<a href="#ref-smith_modelled_2024" role="doc-biblioref">Smith, Sonderegger, and Spade Consortium 2024</a>)</span>). For more model syntax examples (including more complex model structures, such as nested random effects, modelling variance, and predictors with nonlinear effects), see the model-fitting scripts in the Interspeech folder of the github repository.</p>
<p>The model we are fitting jointly models F1 and F2, which is a key feature of the method we’re proposing. The remainder of the model structure is quite simple. The fixed predictors are <code>context</code>, <code>vowel</code>, and their interaction. There are by-<code>word</code> and by-<code>speaker</code> random intercepts, and by-<code>speaker</code> random slopes of of <code>vowel</code>, <code>context</code>, and their interaction.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb10"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="co"># "p" and "q" are just dummy variables and can be anything</span></span>
<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a>F1 <span class="ot">=</span> <span class="fu">bf</span>(F1 <span class="sc">~</span> </span>
<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a>          context<span class="sc">*</span>vowel <span class="sc">+</span></span>
<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a>          (<span class="dv">1</span><span class="sc">|</span>p<span class="sc">|</span>word) <span class="sc">+</span> (<span class="dv">1</span><span class="sc">+</span>context<span class="sc">*</span>vowel<span class="sc">|</span>q<span class="sc">|</span>speaker))</span>
<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a>F2 <span class="ot">=</span> <span class="fu">bf</span>(F2 <span class="sc">~</span> </span>
<span id="cb10-7"><a href="#cb10-7" aria-hidden="true" tabindex="-1"></a>          context<span class="sc">*</span>vowel <span class="sc">+</span></span>
<span id="cb10-8"><a href="#cb10-8" aria-hidden="true" tabindex="-1"></a>          (<span class="dv">1</span><span class="sc">|</span>p<span class="sc">|</span>word) <span class="sc">+</span> (<span class="dv">1</span><span class="sc">+</span>context<span class="sc">*</span>vowel<span class="sc">|</span>q<span class="sc">|</span>speaker))</span>
<span id="cb10-9"><a href="#cb10-9" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb10-10"><a href="#cb10-10" aria-hidden="true" tabindex="-1"></a>model <span class="ot">&lt;-</span> <span class="fu">brm</span>(F1 <span class="sc">+</span> F2 <span class="sc">+</span> </span>
<span id="cb10-11"><a href="#cb10-11" aria-hidden="true" tabindex="-1"></a>               <span class="co"># set_rescor(TRUE) means that correlations between F1 &amp; F2 </span></span>
<span id="cb10-12"><a href="#cb10-12" aria-hidden="true" tabindex="-1"></a>               <span class="co"># will be modelled.</span></span>
<span id="cb10-13"><a href="#cb10-13" aria-hidden="true" tabindex="-1"></a>               <span class="fu">set_rescor</span>(<span class="cn">TRUE</span>),</span>
<span id="cb10-14"><a href="#cb10-14" aria-hidden="true" tabindex="-1"></a>             <span class="co"># we want to use the smaller dataset so it doesn't take </span></span>
<span id="cb10-15"><a href="#cb10-15" aria-hidden="true" tabindex="-1"></a>             <span class="co"># forever to fit</span></span>
<span id="cb10-16"><a href="#cb10-16" aria-hidden="true" tabindex="-1"></a>             <span class="at">data=</span>raleigh_data_50, </span>
<span id="cb10-17"><a href="#cb10-17" aria-hidden="true" tabindex="-1"></a>             <span class="co"># fitting a gaussian model (more details below): this is the </span></span>
<span id="cb10-18"><a href="#cb10-18" aria-hidden="true" tabindex="-1"></a>             <span class="co"># default, but I'm making it explicit here for pedagogical</span></span>
<span id="cb10-19"><a href="#cb10-19" aria-hidden="true" tabindex="-1"></a>             <span class="co"># purposes</span></span>
<span id="cb10-20"><a href="#cb10-20" aria-hidden="true" tabindex="-1"></a>             <span class="at">family=</span>gaussian,</span>
<span id="cb10-21"><a href="#cb10-21" aria-hidden="true" tabindex="-1"></a>             <span class="co"># if this file exists, this call will just read in the file. </span></span>
<span id="cb10-22"><a href="#cb10-22" aria-hidden="true" tabindex="-1"></a>             <span class="co"># Otherwise, the call will save the model here. </span></span>
<span id="cb10-23"><a href="#cb10-23" aria-hidden="true" tabindex="-1"></a>             <span class="at">file=</span><span class="st">"tutorial_model"</span>,</span>
<span id="cb10-24"><a href="#cb10-24" aria-hidden="true" tabindex="-1"></a>             <span class="co"># Set prior for correlations. All other parameters just use </span></span>
<span id="cb10-25"><a href="#cb10-25" aria-hidden="true" tabindex="-1"></a>             <span class="co"># default priors. </span></span>
<span id="cb10-26"><a href="#cb10-26" aria-hidden="true" tabindex="-1"></a>             <span class="at">prior =</span> <span class="fu">c</span>(<span class="fu">prior</span>(<span class="fu">lkj</span>(<span class="fl">1.5</span>), <span class="at">class =</span> cor)), </span>
<span id="cb10-27"><a href="#cb10-27" aria-hidden="true" tabindex="-1"></a>             <span class="co"># my computer has 8 cores, so I'm using 4 here, change </span></span>
<span id="cb10-28"><a href="#cb10-28" aria-hidden="true" tabindex="-1"></a>             <span class="co"># according to your own machine. Ideally, you want to use the </span></span>
<span id="cb10-29"><a href="#cb10-29" aria-hidden="true" tabindex="-1"></a>             <span class="co"># same number of cores as chains, so that all the chains can </span></span>
<span id="cb10-30"><a href="#cb10-30" aria-hidden="true" tabindex="-1"></a>             <span class="co"># run in parallel.</span></span>
<span id="cb10-31"><a href="#cb10-31" aria-hidden="true" tabindex="-1"></a>             <span class="at">chains=</span><span class="dv">4</span>, <span class="at">cores=</span><span class="dv">4</span>, </span>
<span id="cb10-32"><a href="#cb10-32" aria-hidden="true" tabindex="-1"></a>             <span class="co"># If there are warnings, consider increasing the number of</span></span>
<span id="cb10-33"><a href="#cb10-33" aria-hidden="true" tabindex="-1"></a>             <span class="co"># iterations.</span></span>
<span id="cb10-34"><a href="#cb10-34" aria-hidden="true" tabindex="-1"></a>             <span class="at">iter =</span> <span class="dv">4000</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>The key difference between a Bayesian model (like the one we just fit) and a frequentist model (which we could fit with very similar model syntax in <code>lmer</code>) is that the Bayesian model is modelling the <em>distributions</em> of the output variables, whereas frequentist models fit a point prediction. While I am not here to take an ideological stance, the fact that Bayesian models inherently predict distributions is extremely useful for the present application.</p>
</section>
<section id="Simulate" class="level2">
<h2 class="anchored" data-anchor-id="Simulate">Simulate distributions</h2>
<p>To understand more about what exactly “drawing from the model” is doing, check out the <a href="http://mjskay.github.io/tidybayes/articles/tidy-brms.html">tidybayes help page</a>.</p>
<p>Using <code>tidybayes</code>, we can simulate draws of the <em>parameters</em> (using <code>add_epred_draws</code>) or of the <em>data</em> (using <code>add_predicted_draws</code>). Which one you choose to do should be determined by the quantity you want to compute from the predictions. We’ll show both in this tutorial, because the two measures of overlap that we will be calculating (Bhattacharyya affinity and Euclidean distance) necessitate different kinds of predictions.</p>
<p>First, we need to know what parameter values to predict for. For our application, we at least want to get predicted values for each combination of <code>vowel</code> and <code>context</code>. Because we have by-<code>speaker</code> random intercepts with <code>vowel</code>-by-<code>context</code> random slopes, we can make predictions for each <code>speaker</code>-by-<code>vowel</code>-by-<code>context</code> pair. We can also make predictions for the “average” speaker by discarding the entire random effects structure. We will do both. (All of the analysis is done for the “average” word, i.e., the by-word random intercepts are always ignored. Looking at lexical effects would open a whole new set of possible research questions, so be my guest if this is of interest.)</p>
<p>Additionally, we also want to make “average” speaker predictions, so we’ll just ignore the random effects structure entirely, i.e., predict F1 and F2 as a function of just <code>vowel</code> and <code>context</code>.</p>
<p>Let’s make a dataframe with every combination of vowel and context: these are what we use to make the “average” speaker simulations. You unfortunately need to reset the factor levels, since the <code>levels()</code> function (not to be confused with the <code>levels</code> argument to the <code>factor()</code> function) makes a vector of character strings and not factors.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="co"># these are both vectors of characters, but their order is determined by the </span></span>
<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a><span class="co"># order in the model</span></span>
<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a>vowel_levels <span class="ot">&lt;-</span> <span class="fu">levels</span>(model<span class="sc">$</span>data<span class="sc">$</span>vowel) </span>
<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a>context_levels <span class="ot">&lt;-</span> <span class="fu">levels</span>(model<span class="sc">$</span>data<span class="sc">$</span>context)</span>
<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-6"><a href="#cb11-6" aria-hidden="true" tabindex="-1"></a>av_preds <span class="ot">&lt;-</span> <span class="fu">expand_grid</span>(<span class="at">context =</span> <span class="fu">factor</span>(context_levels,</span>
<span id="cb11-7"><a href="#cb11-7" aria-hidden="true" tabindex="-1"></a>                                    <span class="at">levels=</span>context_levels),</span>
<span id="cb11-8"><a href="#cb11-8" aria-hidden="true" tabindex="-1"></a>                   <span class="at">vowel =</span> <span class="fu">factor</span>(vowel_levels,</span>
<span id="cb11-9"><a href="#cb11-9" aria-hidden="true" tabindex="-1"></a>                                  <span class="at">levels=</span>vowel_levels))</span>
<span id="cb11-10"><a href="#cb11-10" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-11"><a href="#cb11-11" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(av_preds)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 4 × 2
  context vowel
  &lt;fct&gt;   &lt;fct&gt;
1 oral    i    
2 oral    e    
3 nasal   i    
4 nasal   e    </code></pre>
</div>
</div>
<p>Similarly, let’s make a dataframe with every combination of speaker, vowel, and context. We use this one to make all of our by-speaker simulations.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>sp_preds <span class="ot">&lt;-</span> <span class="fu">expand_grid</span>(<span class="at">context =</span> <span class="fu">factor</span>(context_levels,</span>
<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>                                    <span class="at">levels=</span>context_levels),</span>
<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>                   <span class="at">vowel =</span> <span class="fu">factor</span>(vowel_levels,</span>
<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a>                                  <span class="at">levels=</span>vowel_levels),</span>
<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a>                   <span class="at">speaker =</span> <span class="fu">unique</span>(model<span class="sc">$</span>data<span class="sc">$</span>speaker))</span>
<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb13-7"><a href="#cb13-7" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(sp_preds)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 3
  context vowel speaker
  &lt;fct&gt;   &lt;fct&gt; &lt;chr&gt;  
1 oral    i     ral130 
2 oral    i     ral135 
3 oral    i     ral139 
4 oral    i     ral216 
5 oral    i     ral303 
6 oral    i     ral342 </code></pre>
</div>
</div>
<p>We’ll also define two variables: <code>num_draws</code>, which is the number of draws to take, and <code>num_points</code>, which is the number of <em>points per draw</em> to take for calculating Bhattacharyya affinity. You can change these to smaller numbers if your computer is struggling, or to bigger numbers if you have the resources. I would in particular recommend changing <code>num_points</code> if you want better estimates of Bhattacharyya affinity.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb15"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>num_draws<span class="ot">=</span><span class="dv">100</span></span>
<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>num_points<span class="ot">=</span><span class="dv">25</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Now, let’s make our first simulations. Here, we are making by-speaker predictions for the F1 and F2 <em>means</em> as a function of context and vowel. The resulting distribution is the posterior for the <em>means</em> of the data, not the distributions for the data itself. This is what we want in order to compute Euclidean distance.</p>
<p>When computing Euclidean distance from empirical distributions, we get the distribution center by averaging across tokens, but that isn’t necessary if we are predicting from the model because we already have a distribution of the estimate for the mean from the model we fit! We still take multiple draws of the mean, because that allows us to calculate credible intervals later on.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb16"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>fitted_sp <span class="ot">&lt;-</span> sp_preds <span class="sc">%&gt;%</span> </span>
<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">add_epred_draws</span>(model, </span>
<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a>                  <span class="co"># include only the part of the random effects that we're </span></span>
<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a>                  <span class="co"># interested in</span></span>
<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a>                  <span class="at">re_formula =</span> <span class="sc">~</span>(<span class="dv">1</span><span class="sc">+</span>context<span class="sc">*</span>vowel<span class="sc">|</span> q<span class="sc">|</span>speaker),</span>
<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a>                  <span class="at">ndraws=</span>num_draws) <span class="sc">%&gt;%</span></span>
<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span></span>
<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a>  <span class="co"># pivot so that F1 and F2 each get a column</span></span>
<span id="cb16-9"><a href="#cb16-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">pivot_wider</span>(<span class="at">names_from=</span><span class="st">".category"</span>, <span class="at">values_from=</span><span class="st">".epred"</span>) <span class="sc">%&gt;%</span></span>
<span id="cb16-10"><a href="#cb16-10" aria-hidden="true" tabindex="-1"></a>  <span class="co"># we never use the `.row`, `.chain`, or `.iteration` columns, so let's get rid </span></span>
<span id="cb16-11"><a href="#cb16-11" aria-hidden="true" tabindex="-1"></a>  <span class="co"># of them</span></span>
<span id="cb16-12"><a href="#cb16-12" aria-hidden="true" tabindex="-1"></a>  dplyr<span class="sc">::</span><span class="fu">select</span>(<span class="sc">-</span>.row, <span class="sc">-</span>.chain, <span class="sc">-</span>.iteration)</span>
<span id="cb16-13"><a href="#cb16-13" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb16-14"><a href="#cb16-14" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(fitted_sp)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 6
  context vowel speaker .draw     F1    F2
  &lt;fct&gt;   &lt;fct&gt; &lt;chr&gt;   &lt;int&gt;  &lt;dbl&gt; &lt;dbl&gt;
1 oral    i     ral130      1 -1.00  1.03 
2 oral    i     ral130      2 -0.986 1.03 
3 oral    i     ral130      3 -1.00  1.00 
4 oral    i     ral130      4 -0.964 0.944
5 oral    i     ral130      5 -0.962 0.971
6 oral    i     ral130      6 -1.10  1.03 </code></pre>
</div>
</div>
<p>Now we do the exact same thing, but this time, we exclude the random effects in order to draw from the distribution of the means for the <em>average</em> speaker.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb18"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>fitted_av <span class="ot">&lt;-</span> av_preds <span class="sc">%&gt;%</span> </span>
<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">add_epred_draws</span>(model, </span>
<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a>                  <span class="co"># re_formula, needs to be NA (which keeps it empty), rather </span></span>
<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a>                  <span class="co"># than NULL, which is the "default," i.e., complete random </span></span>
<span id="cb18-5"><a href="#cb18-5" aria-hidden="true" tabindex="-1"></a>                  <span class="co"># effects structure (including by-word random effects)</span></span>
<span id="cb18-6"><a href="#cb18-6" aria-hidden="true" tabindex="-1"></a>                  <span class="at">re_formula=</span><span class="cn">NA</span>, </span>
<span id="cb18-7"><a href="#cb18-7" aria-hidden="true" tabindex="-1"></a>                  <span class="at">ndraws=</span>num_draws) <span class="sc">%&gt;%</span></span>
<span id="cb18-8"><a href="#cb18-8" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span></span>
<span id="cb18-9"><a href="#cb18-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">pivot_wider</span>(<span class="at">names_from=</span><span class="st">".category"</span>, <span class="at">values_from=</span><span class="st">".epred"</span>) <span class="sc">%&gt;%</span></span>
<span id="cb18-10"><a href="#cb18-10" aria-hidden="true" tabindex="-1"></a>  dplyr<span class="sc">::</span><span class="fu">select</span>(<span class="sc">-</span>.row, <span class="sc">-</span>.chain, <span class="sc">-</span>.iteration)</span>
<span id="cb18-11"><a href="#cb18-11" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb18-12"><a href="#cb18-12" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(fitted_av)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 5
  context vowel .draw     F1    F2
  &lt;fct&gt;   &lt;fct&gt; &lt;int&gt;  &lt;dbl&gt; &lt;dbl&gt;
1 oral    i         1 -0.946 0.929
2 oral    i         2 -0.883 0.895
3 oral    i         3 -0.921 0.893
4 oral    i         4 -0.905 0.773
5 oral    i         5 -0.858 0.757
6 oral    i         6 -0.892 0.819</code></pre>
</div>
</div>
<p>Each row in <code>fitted_sp</code> and <code>fitted_av</code> should be interpreted as a possible value of the mean. Because we have 100 such values, we can use this variation to estimate our uncertainty in an estimate of the mean (or of any quantity calculated from these dataframes).</p>
<p>Now, instead of simulating the means, let’s simulate the data. For this, we use <code>add_predicted_draws</code>, but aside from that, the procedure is the same as above. Here, we’re not actually using these two dataframes to calculate a measure of overlap, but these are the dataframes that are directly comparable to the empirical distributions, so we do want to look at them.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb20"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a>predicted_sp <span class="ot">&lt;-</span> sp_preds <span class="sc">%&gt;%</span> </span>
<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">add_predicted_draws</span>(model, </span>
<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a>                      <span class="at">re_formula =</span> <span class="sc">~</span>(<span class="dv">1</span><span class="sc">+</span>context<span class="sc">*</span>vowel<span class="sc">|</span>q<span class="sc">|</span>speaker),</span>
<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a>                      <span class="at">ndraws=</span>num_draws) <span class="sc">%&gt;%</span></span>
<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span></span>
<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">pivot_wider</span>(<span class="at">names_from=</span><span class="st">".category"</span>, <span class="at">values_from=</span><span class="st">".prediction"</span>) <span class="sc">%&gt;%</span></span>
<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a>  dplyr<span class="sc">::</span><span class="fu">select</span>(<span class="sc">-</span>.row, <span class="sc">-</span>.chain, <span class="sc">-</span>.iteration)</span>
<span id="cb20-8"><a href="#cb20-8" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb20-9"><a href="#cb20-9" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(predicted_sp)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 6
  context vowel speaker .draw     F1    F2
  &lt;fct&gt;   &lt;fct&gt; &lt;chr&gt;   &lt;int&gt;  &lt;dbl&gt; &lt;dbl&gt;
1 oral    i     ral130      1 -0.658 1.39 
2 oral    i     ral130      2 -0.751 0.975
3 oral    i     ral130      3 -0.514 0.264
4 oral    i     ral130      4 -1.21  0.894
5 oral    i     ral130      5 -0.695 0.713
6 oral    i     ral130      6 -1.33  1.09 </code></pre>
</div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb22"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a>predicted_av <span class="ot">&lt;-</span> av_preds <span class="sc">%&gt;%</span> </span>
<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">add_predicted_draws</span>(model, </span>
<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a>                      <span class="at">re_formula=</span><span class="cn">NA</span>,</span>
<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a>                      <span class="at">ndraws=</span>num_draws) <span class="sc">%&gt;%</span></span>
<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span></span>
<span id="cb22-6"><a href="#cb22-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">pivot_wider</span>(<span class="at">names_from=</span><span class="st">".category"</span>, <span class="at">values_from=</span><span class="st">".prediction"</span>) <span class="sc">%&gt;%</span></span>
<span id="cb22-7"><a href="#cb22-7" aria-hidden="true" tabindex="-1"></a>  dplyr<span class="sc">::</span><span class="fu">select</span>(<span class="sc">-</span>.row, <span class="sc">-</span>.chain, <span class="sc">-</span>.iteration)</span>
<span id="cb22-8"><a href="#cb22-8" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb22-9"><a href="#cb22-9" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(predicted_sp)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 6
  context vowel speaker .draw     F1    F2
  &lt;fct&gt;   &lt;fct&gt; &lt;chr&gt;   &lt;int&gt;  &lt;dbl&gt; &lt;dbl&gt;
1 oral    i     ral130      1 -0.658 1.39 
2 oral    i     ral130      2 -0.751 0.975
3 oral    i     ral130      3 -0.514 0.264
4 oral    i     ral130      4 -1.21  0.894
5 oral    i     ral130      5 -0.695 0.713
6 oral    i     ral130      6 -1.33  1.09 </code></pre>
</div>
</div>
<p>We need to make special dataframes for the Bhattacharyya affinity calculations because we want to be able to calculate uncertainty. Bhattacharyya affinity is calculated by first estimating the underlying distribution of the data and therefore takes multiple datapoints to estimate (the more, the better). If we used the <code>predicted_sp</code> and <code>predicted_av</code> dataframes to calculate BA, then we would only get one BA estimate per person, and would not be able to estimate uncertainty. Instead, the approach is to simulate multiple values from the posterior predictive distribution using a fixed value for each of the parameters, i.e., make multiple output values from a single draw. We still take multiple draws, and calculate Bhattacharyya affinity <em>for each draw</em>. To accomplish this, we pass in the same combination of predictor variables multiple times, since we get one of each output for each set of predictors that are passed in.</p>
<p>To start, let’s define a function for repeating a dataframe <code>df</code> <code>n_rep</code> times. We don’t need to do this, but it makes the code for getting the predicted dataframes cleaner.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb24"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a>rep_df <span class="ot">&lt;-</span> <span class="cf">function</span>(df, n_reps)</span>
<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a>{</span>
<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a>  df_repeated <span class="ot">&lt;-</span> <span class="fu">do.call</span>(<span class="st">"rbind"</span>, <span class="fu">replicate</span>(n_reps, df, <span class="at">simplify =</span> <span class="cn">FALSE</span>))</span>
<span id="cb24-4"><a href="#cb24-4" aria-hidden="true" tabindex="-1"></a>}</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Because we only need these special repeated dataframes to calculate BA (which requires a distribution to compute), we just need to draw from posterior predictions, not from the fitted means. What we’re doing here is getting <code>num_points</code> (i.e., 25) predictions <em>for each draw</em>, i.e, we assume a fixed set of parameter values and then get 25 values from the posterior predicted distribution assuming those values. We do this <code>n_draws</code> times, which in the end will give us a <em>distribution</em> of possible BA values.</p>
<p>Now we can get the predicted draws just like we did before, with the additional step of repeating the input dataframe <code>num_points</code> times:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb25"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a>predicted_sp_BA <span class="ot">&lt;-</span> sp_preds <span class="sc">%&gt;%</span> </span>
<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a>  <span class="co"># repeat the `sp_preds` dataframe `num_points` times </span></span>
<span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">rep_df</span>(num_points) <span class="sc">%&gt;%</span></span>
<span id="cb25-4"><a href="#cb25-4" aria-hidden="true" tabindex="-1"></a>  <span class="co"># now run `add_predicted_draws` on the repeated dataframe</span></span>
<span id="cb25-5"><a href="#cb25-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">add_predicted_draws</span>(model, </span>
<span id="cb25-6"><a href="#cb25-6" aria-hidden="true" tabindex="-1"></a>                      <span class="at">re_formula =</span> <span class="sc">~</span>(<span class="dv">1</span><span class="sc">+</span>context<span class="sc">*</span>vowel<span class="sc">|</span>q<span class="sc">|</span>speaker),</span>
<span id="cb25-7"><a href="#cb25-7" aria-hidden="true" tabindex="-1"></a>                      <span class="at">ndraws=</span>num_draws) <span class="sc">%&gt;%</span></span>
<span id="cb25-8"><a href="#cb25-8" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span></span>
<span id="cb25-9"><a href="#cb25-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">pivot_wider</span>(<span class="at">names_from=</span><span class="st">".category"</span>, <span class="at">values_from=</span><span class="st">".prediction"</span>) <span class="sc">%&gt;%</span></span>
<span id="cb25-10"><a href="#cb25-10" aria-hidden="true" tabindex="-1"></a>  <span class="co"># .row now contains meaningful information, but excluding it still doesn't </span></span>
<span id="cb25-11"><a href="#cb25-11" aria-hidden="true" tabindex="-1"></a>  <span class="co"># cause problems</span></span>
<span id="cb25-12"><a href="#cb25-12" aria-hidden="true" tabindex="-1"></a>  dplyr<span class="sc">::</span><span class="fu">select</span>(<span class="sc">-</span>.row, <span class="sc">-</span>.chain, <span class="sc">-</span>.iteration)</span>
<span id="cb25-13"><a href="#cb25-13" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb25-14"><a href="#cb25-14" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(predicted_sp_BA)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 6
  context vowel speaker .draw     F1    F2
  &lt;fct&gt;   &lt;fct&gt; &lt;chr&gt;   &lt;int&gt;  &lt;dbl&gt; &lt;dbl&gt;
1 oral    i     ral130      1 -0.744 0.396
2 oral    i     ral130      2 -0.980 1.47 
3 oral    i     ral130      3 -0.750 0.583
4 oral    i     ral130      4 -1.10  1.15 
5 oral    i     ral130      5 -1.43  0.180
6 oral    i     ral130      6 -1.14  1.22 </code></pre>
</div>
</div>
<p>Taking “BA draws” for the average speaker is likewise exactly analygous to what we have already done:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb27"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a>predicted_av_BA <span class="ot">&lt;-</span> av_preds <span class="sc">%&gt;%</span> </span>
<span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">rep_df</span>(num_points) <span class="sc">%&gt;%</span></span>
<span id="cb27-3"><a href="#cb27-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">add_predicted_draws</span>(model, </span>
<span id="cb27-4"><a href="#cb27-4" aria-hidden="true" tabindex="-1"></a>                      <span class="at">re_formula=</span><span class="cn">NA</span>,</span>
<span id="cb27-5"><a href="#cb27-5" aria-hidden="true" tabindex="-1"></a>                      <span class="at">ndraws=</span>num_draws) <span class="sc">%&gt;%</span></span>
<span id="cb27-6"><a href="#cb27-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span></span>
<span id="cb27-7"><a href="#cb27-7" aria-hidden="true" tabindex="-1"></a>  <span class="fu">pivot_wider</span>(<span class="at">names_from=</span><span class="st">".category"</span>, <span class="at">values_from=</span><span class="st">".prediction"</span>) <span class="sc">%&gt;%</span></span>
<span id="cb27-8"><a href="#cb27-8" aria-hidden="true" tabindex="-1"></a>  dplyr<span class="sc">::</span><span class="fu">select</span>(<span class="sc">-</span>.row, <span class="sc">-</span>.chain, <span class="sc">-</span>.iteration)</span>
<span id="cb27-9"><a href="#cb27-9" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb27-10"><a href="#cb27-10" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(predicted_av_BA)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 5
  context vowel .draw     F1    F2
  &lt;fct&gt;   &lt;fct&gt; &lt;int&gt;  &lt;dbl&gt; &lt;dbl&gt;
1 oral    i         1 -0.107 0.323
2 oral    i         2 -1.01  0.934
3 oral    i         3 -1.01  0.883
4 oral    i         4 -0.795 0.897
5 oral    i         5 -0.358 1.59 
6 oral    i         6 -0.762 0.605</code></pre>
</div>
</div>
</section>
<section id="empirical-plots-revisited" class="level2">
<h2 class="anchored" data-anchor-id="empirical-plots-revisited">Empirical plots, revisited</h2>
<p>If you are not interested in the empirical plots, skip to the <a href="#uncertainty">discussion of uncertainty</a>.</p>
<p>Now that we have our simulated distributions, we can compare them to the empirical distributions. This is a good sanity check and may also reveal how the model “helps us.”</p>
<p>We’ll start by plotting the modelled distributions next to the empirical distributions (repeated from above).</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb29"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a>p_dist_modelled <span class="ot">&lt;-</span> fitted_sp <span class="sc">%&gt;%</span></span>
<span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%&gt;%</span> </span>
<span id="cb29-3"><a href="#cb29-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarize</span>(<span class="at">F1=</span><span class="fu">mean</span>(F1), <span class="at">F2=</span><span class="fu">mean</span>(F2)) <span class="sc">%&gt;%</span></span>
<span id="cb29-4"><a href="#cb29-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span></span>
<span id="cb29-5"><a href="#cb29-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>F2, <span class="at">y=</span>F1, <span class="at">color=</span>context, <span class="at">shape=</span>vowel)) <span class="sc">+</span></span>
<span id="cb29-6"><a href="#cb29-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>(<span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb29-7"><a href="#cb29-7" aria-hidden="true" tabindex="-1"></a>  <span class="fu">stat_ellipse</span>(<span class="at">data=</span>predicted_sp, </span>
<span id="cb29-8"><a href="#cb29-8" aria-hidden="true" tabindex="-1"></a>               <span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(speaker, vowel, context), <span class="at">lty=</span>vowel),</span>
<span id="cb29-9"><a href="#cb29-9" aria-hidden="true" tabindex="-1"></a>               <span class="at">level=</span><span class="fl">0.66</span>, <span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb29-10"><a href="#cb29-10" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_line</span>(<span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(context, speaker)), <span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb29-11"><a href="#cb29-11" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_x_reverse</span>() <span class="sc">+</span> <span class="fu">scale_y_reverse</span>() <span class="sc">+</span></span>
<span id="cb29-12"><a href="#cb29-12" aria-hidden="true" tabindex="-1"></a>  <span class="fu">coord_cartesian</span>(<span class="at">xlim=</span><span class="fu">c</span>(<span class="dv">2</span>, <span class="sc">-</span><span class="dv">2</span>), <span class="at">ylim=</span><span class="fu">c</span>(<span class="dv">2</span>, <span class="sc">-</span><span class="dv">2</span>), <span class="at">clip=</span><span class="st">'off'</span>) <span class="sc">+</span></span>
<span id="cb29-13"><a href="#cb29-13" aria-hidden="true" tabindex="-1"></a>  <span class="fu">labs</span>(<span class="at">title=</span><span class="st">'modelled'</span>) <span class="sc">+</span></span>
<span id="cb29-14"><a href="#cb29-14" aria-hidden="true" tabindex="-1"></a>  <span class="fu">facet_wrap</span>(<span class="sc">~</span>speaker, <span class="at">ncol=</span><span class="dv">2</span>) <span class="sc">+</span></span>
<span id="cb29-15"><a href="#cb29-15" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme</span>(<span class="at">aspect.ratio=</span><span class="dv">1</span>)</span>
<span id="cb29-16"><a href="#cb29-16" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb29-17"><a href="#cb29-17" aria-hidden="true" tabindex="-1"></a>p_dist_empirical <span class="ot">&lt;-</span> raleigh_data <span class="sc">%&gt;%</span></span>
<span id="cb29-18"><a href="#cb29-18" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%&gt;%</span> </span>
<span id="cb29-19"><a href="#cb29-19" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarize</span>(<span class="fu">across</span>(<span class="fu">c</span>(F1, F2), mean)) <span class="sc">%&gt;%</span></span>
<span id="cb29-20"><a href="#cb29-20" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span></span>
<span id="cb29-21"><a href="#cb29-21" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>F2, <span class="at">y=</span>F1, <span class="at">color=</span>context, <span class="at">shape=</span>vowel)) <span class="sc">+</span></span>
<span id="cb29-22"><a href="#cb29-22" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb29-23"><a href="#cb29-23" aria-hidden="true" tabindex="-1"></a>  <span class="fu">stat_ellipse</span>(<span class="at">data=</span>raleigh_data, </span>
<span id="cb29-24"><a href="#cb29-24" aria-hidden="true" tabindex="-1"></a>               <span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(speaker, vowel, context), <span class="at">lty=</span>vowel),</span>
<span id="cb29-25"><a href="#cb29-25" aria-hidden="true" tabindex="-1"></a>               <span class="at">level=</span><span class="fl">0.66</span>) <span class="sc">+</span></span>
<span id="cb29-26"><a href="#cb29-26" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_line</span>(<span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(context, speaker))) <span class="sc">+</span></span>
<span id="cb29-27"><a href="#cb29-27" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_x_reverse</span>() <span class="sc">+</span> <span class="fu">scale_y_reverse</span>() <span class="sc">+</span></span>
<span id="cb29-28"><a href="#cb29-28" aria-hidden="true" tabindex="-1"></a>  <span class="fu">coord_cartesian</span>(<span class="at">xlim=</span><span class="fu">c</span>(<span class="dv">2</span>, <span class="sc">-</span><span class="dv">2</span>), <span class="at">ylim=</span><span class="fu">c</span>(<span class="dv">2</span>, <span class="sc">-</span><span class="dv">2</span>), <span class="at">clip=</span><span class="st">'off'</span>) <span class="sc">+</span></span>
<span id="cb29-29"><a href="#cb29-29" aria-hidden="true" tabindex="-1"></a>  <span class="fu">facet_wrap</span>(<span class="sc">~</span>speaker, <span class="at">ncol=</span><span class="dv">2</span>) <span class="sc">+</span></span>
<span id="cb29-30"><a href="#cb29-30" aria-hidden="true" tabindex="-1"></a>  <span class="fu">labs</span>(<span class="at">title=</span><span class="st">'empirical'</span>) <span class="sc">+</span></span>
<span id="cb29-31"><a href="#cb29-31" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme</span>(<span class="at">axis.title.y=</span><span class="fu">element_blank</span>(),</span>
<span id="cb29-32"><a href="#cb29-32" aria-hidden="true" tabindex="-1"></a>        <span class="at">aspect.ratio=</span><span class="dv">1</span>)</span>
<span id="cb29-33"><a href="#cb29-33" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb29-34"><a href="#cb29-34" aria-hidden="true" tabindex="-1"></a>p_dist_modelled <span class="sc">+</span> p_dist_empirical</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="MMO_tutorial_files/figure-html/modelled_centered-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>Notice that all of the ellipses in the modelled distributions have roughly the same size and orientation: this is because, with the way the model that fit them is defined, we have fit a single variance-covariance matrix. If we want variability to be allowed to vary across speakers, contexts, and vowels (which we probably do), we have to model variance too. An example of how to do this is in the scripts ending in “_exp” in the Interspeech2024/model_fitting folder.</p>
<p>We can also plot the by-speaker interactions, like we did for the empirical data:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb30"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a>p_F1_interaction_modelled <span class="ot">&lt;-</span> fitted_sp <span class="sc">%&gt;%</span></span>
<span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%&gt;%</span> </span>
<span id="cb30-3"><a href="#cb30-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarize</span>(<span class="at">F1=</span><span class="fu">mean</span>(F1)) <span class="sc">%&gt;%</span></span>
<span id="cb30-4"><a href="#cb30-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span></span>
<span id="cb30-5"><a href="#cb30-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>vowel, <span class="at">y=</span>F1, <span class="at">color=</span>context)) <span class="sc">+</span></span>
<span id="cb30-6"><a href="#cb30-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>(<span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb30-7"><a href="#cb30-7" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_y_reverse</span>() <span class="sc">+</span></span>
<span id="cb30-8"><a href="#cb30-8" aria-hidden="true" tabindex="-1"></a>  <span class="fu">labs</span>(<span class="at">title=</span><span class="st">'modelled'</span>) <span class="sc">+</span></span>
<span id="cb30-9"><a href="#cb30-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_line</span>(<span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(context, speaker)), <span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb30-10"><a href="#cb30-10" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ylim</span>(<span class="fl">0.2</span>, <span class="sc">-</span><span class="fl">1.2</span>)</span>
<span id="cb30-11"><a href="#cb30-11" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb30-12"><a href="#cb30-12" aria-hidden="true" tabindex="-1"></a>p_F1_interaction_empirical <span class="ot">&lt;-</span> raleigh_data <span class="sc">%&gt;%</span></span>
<span id="cb30-13"><a href="#cb30-13" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%&gt;%</span> </span>
<span id="cb30-14"><a href="#cb30-14" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarize</span>(<span class="at">F1=</span><span class="fu">mean</span>(F1)) <span class="sc">%&gt;%</span></span>
<span id="cb30-15"><a href="#cb30-15" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span></span>
<span id="cb30-16"><a href="#cb30-16" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>vowel, <span class="at">y=</span>F1, <span class="at">color=</span>context)) <span class="sc">+</span></span>
<span id="cb30-17"><a href="#cb30-17" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb30-18"><a href="#cb30-18" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_y_reverse</span>() <span class="sc">+</span></span>
<span id="cb30-19"><a href="#cb30-19" aria-hidden="true" tabindex="-1"></a>  <span class="fu">labs</span>(<span class="at">title=</span><span class="st">'empirical'</span>) <span class="sc">+</span></span>
<span id="cb30-20"><a href="#cb30-20" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_line</span>(<span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(context, speaker))) <span class="sc">+</span></span>
<span id="cb30-21"><a href="#cb30-21" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ylim</span>(<span class="fl">0.2</span>, <span class="sc">-</span><span class="fl">1.2</span>) <span class="sc">+</span></span>
<span id="cb30-22"><a href="#cb30-22" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme</span>(<span class="at">axis.title.y=</span><span class="fu">element_blank</span>())</span>
<span id="cb30-23"><a href="#cb30-23" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb30-24"><a href="#cb30-24" aria-hidden="true" tabindex="-1"></a>p_F2_interaction_modelled <span class="ot">&lt;-</span> fitted_sp <span class="sc">%&gt;%</span></span>
<span id="cb30-25"><a href="#cb30-25" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%&gt;%</span> </span>
<span id="cb30-26"><a href="#cb30-26" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarize</span>(<span class="at">F2=</span><span class="fu">mean</span>(F2)) <span class="sc">%&gt;%</span></span>
<span id="cb30-27"><a href="#cb30-27" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span></span>
<span id="cb30-28"><a href="#cb30-28" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>vowel, <span class="at">y=</span>F2, <span class="at">color=</span>context, <span class="at">fill=</span>context)) <span class="sc">+</span></span>
<span id="cb30-29"><a href="#cb30-29" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>(<span class="at">shape=</span><span class="dv">24</span>, <span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb30-30"><a href="#cb30-30" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_y_reverse</span>() <span class="sc">+</span></span>
<span id="cb30-31"><a href="#cb30-31" aria-hidden="true" tabindex="-1"></a>  <span class="co"># labs(title='modelled') +</span></span>
<span id="cb30-32"><a href="#cb30-32" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_line</span>(<span class="at">lty=</span><span class="dv">2</span>, <span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(context, speaker)),</span>
<span id="cb30-33"><a href="#cb30-33" aria-hidden="true" tabindex="-1"></a>            <span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb30-34"><a href="#cb30-34" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ylim</span>(<span class="fl">1.5</span>, <span class="fl">0.3</span>)</span>
<span id="cb30-35"><a href="#cb30-35" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb30-36"><a href="#cb30-36" aria-hidden="true" tabindex="-1"></a>p_F2_interaction_empirical <span class="ot">&lt;-</span> raleigh_data <span class="sc">%&gt;%</span></span>
<span id="cb30-37"><a href="#cb30-37" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%&gt;%</span> </span>
<span id="cb30-38"><a href="#cb30-38" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarize</span>(<span class="at">F2=</span><span class="fu">mean</span>(F2)) <span class="sc">%&gt;%</span></span>
<span id="cb30-39"><a href="#cb30-39" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span></span>
<span id="cb30-40"><a href="#cb30-40" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>vowel, <span class="at">y=</span>F2, <span class="at">color=</span>context, <span class="at">fill=</span>context)) <span class="sc">+</span></span>
<span id="cb30-41"><a href="#cb30-41" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>(<span class="at">shape=</span><span class="dv">24</span>) <span class="sc">+</span></span>
<span id="cb30-42"><a href="#cb30-42" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_y_reverse</span>() <span class="sc">+</span></span>
<span id="cb30-43"><a href="#cb30-43" aria-hidden="true" tabindex="-1"></a>  <span class="co"># labs(title='empirical') +</span></span>
<span id="cb30-44"><a href="#cb30-44" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_line</span>(<span class="at">lty=</span><span class="dv">2</span>, <span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(context, speaker))) <span class="sc">+</span></span>
<span id="cb30-45"><a href="#cb30-45" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ylim</span>(<span class="fl">1.5</span>, <span class="fl">0.3</span>) <span class="sc">+</span></span>
<span id="cb30-46"><a href="#cb30-46" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme</span>(<span class="at">axis.title.y=</span><span class="fu">element_blank</span>())</span>
<span id="cb30-47"><a href="#cb30-47" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb30-48"><a href="#cb30-48" aria-hidden="true" tabindex="-1"></a>(p_F1_interaction_modelled <span class="sc">+</span> p_F1_interaction_empirical) <span class="sc">/</span></span>
<span id="cb30-49"><a href="#cb30-49" aria-hidden="true" tabindex="-1"></a>  (p_F2_interaction_modelled <span class="sc">+</span> p_F2_interaction_empirical)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="MMO_tutorial_files/figure-html/F1_interaction_sp-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<section id="uncertainty" class="level3">
<h3 class="anchored" data-anchor-id="uncertainty">Uncertainty</h3>
<p>Now, we’ll make some plots that we couldn’t make from the empirical data: plots showing uncertainty!</p>
<p>Let’s make a dataframe with an estimate of the mean F1 and F2, along with 95% credible intervals. <code>tidybayes</code> has a useful set of functions for doing this: we’ll use <code>mean_qi()</code>, which gives you the mean and 95% quantile intervals (unless otherwise specified), but there are other options (any combination of mean, median, or mode, plus qi (quantile interval), hdi (highest-density interval), or hdci (highest-density continuous interval), e.g., <code>mode_hdci()</code>). For estimating uncertainty (in the form of 95% credible intervals), we will default to using <code>mean_qi()</code> in this tutorial.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb31"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a>sp_estimates <span class="ot">&lt;-</span> fitted_sp <span class="sc">%&gt;%</span></span>
<span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%&gt;%</span></span>
<span id="cb31-3"><a href="#cb31-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mean_qi</span>() <span class="sc">%&gt;%</span> </span>
<span id="cb31-4"><a href="#cb31-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>()</span>
<span id="cb31-5"><a href="#cb31-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb31-6"><a href="#cb31-6" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(sp_estimates)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 12
  speaker vowel context      F1 F1.lower F1.upper    F2 F2.lower F2.upper .width
  &lt;chr&gt;   &lt;fct&gt; &lt;fct&gt;     &lt;dbl&gt;    &lt;dbl&gt;    &lt;dbl&gt; &lt;dbl&gt;    &lt;dbl&gt;    &lt;dbl&gt;  &lt;dbl&gt;
1 ral130  i     oral    -1.02     -1.14   -0.895  0.985    0.874    1.12    0.95
2 ral130  i     nasal   -0.534    -0.674  -0.398  1.14     0.986    1.30    0.95
3 ral130  e     oral    -0.0294   -0.125   0.102  0.672    0.535    0.816   0.95
4 ral130  e     nasal   -0.0957   -0.195   0.0147 0.748    0.626    0.891   0.95
5 ral135  i     oral    -0.961    -1.07   -0.858  0.886    0.741    0.991   0.95
6 ral135  i     nasal   -0.798    -0.928  -0.696  0.964    0.801    1.12    0.95
# ℹ 2 more variables: .point &lt;chr&gt;, .interval &lt;chr&gt;</code></pre>
</div>
</div>
<p>Now we can use this dataframe to show uncertainty. For example, we can plot errorbars on the by-speaker vowel-by-context estimates</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb33"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a>sp_estimates <span class="sc">%&gt;%</span></span>
<span id="cb33-2"><a href="#cb33-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>vowel, <span class="at">color=</span>context, <span class="at">fill=</span>context)) <span class="sc">+</span></span>
<span id="cb33-3"><a href="#cb33-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>(<span class="at">shape=</span><span class="dv">21</span>, <span class="fu">aes</span>(<span class="at">y=</span>F1)) <span class="sc">+</span></span>
<span id="cb33-4"><a href="#cb33-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_errorbar</span>(<span class="fu">aes</span>(<span class="at">ymin=</span>F1.lower, <span class="at">ymax=</span>F1.upper)) <span class="sc">+</span></span>
<span id="cb33-5"><a href="#cb33-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_line</span>(<span class="at">lty=</span><span class="dv">1</span>, <span class="fu">aes</span>(<span class="at">y=</span>F1, <span class="at">group=</span>context)) <span class="sc">+</span></span>
<span id="cb33-6"><a href="#cb33-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">facet_wrap</span>(<span class="sc">~</span>speaker)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="MMO_tutorial_files/figure-html/unnamed-chunk-3-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>Another way to visualize the uncertainty is to plot all of the mean draws together, like so:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb34"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a>fitted_sp <span class="sc">%&gt;%</span></span>
<span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>vowel, <span class="at">y=</span>F1, <span class="at">color=</span>context)) <span class="sc">+</span></span>
<span id="cb34-3"><a href="#cb34-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_line</span>(<span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(context, .draw)), <span class="at">alpha=</span><span class="fl">0.25</span>) <span class="sc">+</span> </span>
<span id="cb34-4"><a href="#cb34-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">facet_wrap</span>(<span class="sc">~</span>speaker)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="MMO_tutorial_files/figure-html/unnamed-chunk-4-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>The good thing about this way of viewing uncertainty is that it demonstrates that the variability is actually <em>less</em> than what just the errorbars show, i.e., the draws as plotted here vary mostly in their intercepts, not their slopes.</p>
</section>
</section>
<section id="Calculate_Overlap" class="level2">
<h2 class="anchored" data-anchor-id="Calculate_Overlap">Overlap calculations</h2>
<p>Now that we have predicted and fitted dataframes that “look like” the raw data, we can calculate any measure of overlap we like.</p>
<p>Let’s define a function to calculate Bhattacharyya affinity. For further explanation, see <a href="https://joeystanley.com/blog/a-tutorial-in-calculating-vowel-overlap/">Joey Stanley’s thorough tutorial</a>.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb35"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a>bhatt <span class="ot">&lt;-</span> <span class="cf">function</span> (F1, F2, vowel) </span>
<span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a>{</span>
<span id="cb35-3"><a href="#cb35-3" aria-hidden="true" tabindex="-1"></a>  vowel_data <span class="ot">&lt;-</span> <span class="fu">droplevels</span>(<span class="fu">data.frame</span>(vowel))</span>
<span id="cb35-4"><a href="#cb35-4" aria-hidden="true" tabindex="-1"></a>  </span>
<span id="cb35-5"><a href="#cb35-5" aria-hidden="true" tabindex="-1"></a>  sp_df <span class="ot">&lt;-</span> <span class="fu">tryCatch</span>(</span>
<span id="cb35-6"><a href="#cb35-6" aria-hidden="true" tabindex="-1"></a>    <span class="at">expr=</span><span class="fu">SpatialPointsDataFrame</span>(<span class="fu">na.omit</span>(<span class="fu">cbind</span>(F1, F2)), vowel_data), </span>
<span id="cb35-7"><a href="#cb35-7" aria-hidden="true" tabindex="-1"></a>    <span class="at">error=</span><span class="cf">function</span>(e){<span class="cn">NA</span>})</span>
<span id="cb35-8"><a href="#cb35-8" aria-hidden="true" tabindex="-1"></a>  <span class="fu">tryCatch</span>(</span>
<span id="cb35-9"><a href="#cb35-9" aria-hidden="true" tabindex="-1"></a>    <span class="at">expr =</span> {<span class="fu">kerneloverlap</span>(sp_df, <span class="at">method=</span><span class="st">'BA'</span>)[<span class="dv">1</span>,<span class="dv">2</span>]}, </span>
<span id="cb35-10"><a href="#cb35-10" aria-hidden="true" tabindex="-1"></a>    <span class="at">error =</span> <span class="cf">function</span>(e){<span class="cn">NA</span>})</span>
<span id="cb35-11"><a href="#cb35-11" aria-hidden="true" tabindex="-1"></a>}</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>First, let’s calculate BA from the empirical distributions:</p>
<div class="cell" data-hash="MMO_tutorial_cache/html/bhatt_calc_emp_e1d2188da6b2328867549b1e3b59a835">
<div class="sourceCode cell-code" id="cb36"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a>BA_emp_sp <span class="ot">&lt;-</span> raleigh_data <span class="sc">%&gt;%</span></span>
<span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(speaker, context) <span class="sc">%&gt;%</span> <span class="co"># calcualte for each speaker and context</span></span>
<span id="cb36-3"><a href="#cb36-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarise</span>(<span class="at">bhatt_aff =</span> <span class="fu">bhatt</span>(F1, F2, vowel)) <span class="sc">%&gt;%</span></span>
<span id="cb36-4"><a href="#cb36-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>()</span>
<span id="cb36-5"><a href="#cb36-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb36-6"><a href="#cb36-6" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(BA_emp_sp)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 3
  speaker context bhatt_aff
  &lt;chr&gt;   &lt;fct&gt;       &lt;dbl&gt;
1 ral130  oral        0.519
2 ral130  nasal       0.812
3 ral135  oral        0.608
4 ral135  nasal       0.861
5 ral139  oral        0.697
6 ral139  nasal       0.904</code></pre>
</div>
</div>
<p>Notice that we have gotten rid of the <code>vowel</code> column: because we are calculating the overlap <em>between vowels</em>, we will always lose the vowel column when we calculate overlap.</p>
<p>We calculate the by-speaker modelled BA in much the same way:</p>
<div class="cell" data-hash="MMO_tutorial_cache/html/bhatt_calc_sp_eeea4d20c2b977442c6911d026fa3e22">
<div class="sourceCode cell-code" id="cb38"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a>BA_mod_sp <span class="ot">&lt;-</span> predicted_sp_BA <span class="sc">%&gt;%</span></span>
<span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(speaker, context, .draw) <span class="sc">%&gt;%</span> <span class="co"># calculate for each speaker, context, AND draw</span></span>
<span id="cb38-3"><a href="#cb38-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarise</span>(<span class="at">bhatt_aff =</span> <span class="fu">bhatt</span>(F1, F2, vowel)) <span class="sc">%&gt;%</span></span>
<span id="cb38-4"><a href="#cb38-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>()</span>
<span id="cb38-5"><a href="#cb38-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb38-6"><a href="#cb38-6" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(BA_mod_sp)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 4
  speaker context .draw bhatt_aff
  &lt;chr&gt;   &lt;fct&gt;   &lt;int&gt;     &lt;dbl&gt;
1 ral130  oral        1     0.361
2 ral130  oral        2     0.558
3 ral130  oral        3     0.524
4 ral130  oral        4     0.603
5 ral130  oral        5     0.432
6 ral130  oral        6     0.495</code></pre>
</div>
</div>
<p>And finally, the average-speaker modelled BA:</p>
<div class="cell" data-hash="MMO_tutorial_cache/html/bhatt_calc_av_0e577543a046a15151ac690568aea1df">
<div class="sourceCode cell-code" id="cb40"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a>BA_mod_av <span class="ot">&lt;-</span> predicted_av_BA <span class="sc">%&gt;%</span></span>
<span id="cb40-2"><a href="#cb40-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(context, .draw) <span class="sc">%&gt;%</span> <span class="co"># calculate for each context draw</span></span>
<span id="cb40-3"><a href="#cb40-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarise</span>(<span class="at">bhatt_aff =</span> <span class="fu">bhatt</span>(F1, F2, vowel)) <span class="sc">%&gt;%</span></span>
<span id="cb40-4"><a href="#cb40-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>()</span>
<span id="cb40-5"><a href="#cb40-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb40-6"><a href="#cb40-6" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(BA_mod_av)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 3
  context .draw bhatt_aff
  &lt;fct&gt;   &lt;int&gt;     &lt;dbl&gt;
1 oral        1     0.685
2 oral        2     0.587
3 oral        3     0.562
4 oral        4     0.648
5 oral        5     0.482
6 oral        6     0.690</code></pre>
</div>
</div>
<p>For calculating Euclidean distance, we need to take a slightly different approach for the empirical distributions than for the modelled distributions. Let’s do the empirical distributions first:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb42"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a>ED_emp_sp <span class="ot">&lt;-</span> raleigh_data <span class="sc">%&gt;%</span></span>
<span id="cb42-2"><a href="#cb42-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(speaker, context, vowel) <span class="sc">%&gt;%</span></span>
<span id="cb42-3"><a href="#cb42-3" aria-hidden="true" tabindex="-1"></a>  <span class="co"># first, find the average for each speaker, context, vowel pair</span></span>
<span id="cb42-4"><a href="#cb42-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarise</span>(<span class="at">F1=</span><span class="fu">mean</span>(F1),</span>
<span id="cb42-5"><a href="#cb42-5" aria-hidden="true" tabindex="-1"></a>            <span class="at">F2=</span><span class="fu">mean</span>(F2)) <span class="sc">%&gt;%</span></span>
<span id="cb42-6"><a href="#cb42-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span></span>
<span id="cb42-7"><a href="#cb42-7" aria-hidden="true" tabindex="-1"></a>  <span class="co"># reshape the dataframe so that "i" and "e" are in separate columns</span></span>
<span id="cb42-8"><a href="#cb42-8" aria-hidden="true" tabindex="-1"></a>  <span class="fu">pivot_wider</span>(<span class="at">names_from=</span><span class="st">"vowel"</span>, <span class="at">values_from=</span><span class="fu">c</span>(<span class="st">"F1"</span>, <span class="st">"F2"</span>)) <span class="sc">%&gt;%</span></span>
<span id="cb42-9"><a href="#cb42-9" aria-hidden="true" tabindex="-1"></a>  <span class="co"># now apply the pythagorean theorem, which defines Euclidean distance</span></span>
<span id="cb42-10"><a href="#cb42-10" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">eucl_dist =</span> <span class="fu">sqrt</span>((F1_i <span class="sc">-</span> F1_e)<span class="sc">^</span><span class="dv">2</span> <span class="sc">+</span> (F2_i <span class="sc">-</span> F2_e)<span class="sc">^</span><span class="dv">2</span>))</span>
<span id="cb42-11"><a href="#cb42-11" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb42-12"><a href="#cb42-12" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(ED_emp_sp)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 7
  speaker context   F1_i   F1_e  F2_i  F2_e eucl_dist
  &lt;chr&gt;   &lt;fct&gt;    &lt;dbl&gt;  &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt;     &lt;dbl&gt;
1 ral130  oral    -1.04  -0.145 1.07  0.710     0.967
2 ral130  nasal   -0.667 -0.135 1.34  0.779     0.772
3 ral135  oral    -1.04  -0.154 0.902 0.667     0.919
4 ral135  nasal   -0.949 -0.513 1.15  0.989     0.464
5 ral139  oral    -0.819 -0.144 0.821 0.467     0.762
6 ral139  nasal   -0.528 -0.183 1.01  0.703     0.459</code></pre>
</div>
</div>
<p>This leaves us with “vestigial” formant-by-vowel columns: they’re not hurting anyone, and they could be of interest, so I’ll leave them in.</p>
<p>We calculate modelled ED from the “fitted” modelled predictions: we calculate one value from each draw. We don’t do any averaging, because each draw is from the distribution of the mean itself.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb44"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb44-1"><a href="#cb44-1" aria-hidden="true" tabindex="-1"></a>ED_mod_sp <span class="ot">&lt;-</span> fitted_sp <span class="sc">%&gt;%</span></span>
<span id="cb44-2"><a href="#cb44-2" aria-hidden="true" tabindex="-1"></a>  <span class="co"># reshape the dataframe so that "i" and "e" are in separate columns</span></span>
<span id="cb44-3"><a href="#cb44-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">pivot_wider</span>(<span class="at">names_from=</span><span class="st">"vowel"</span>, <span class="at">values_from=</span><span class="fu">c</span>(<span class="st">"F1"</span>, <span class="st">"F2"</span>)) <span class="sc">%&gt;%</span></span>
<span id="cb44-4"><a href="#cb44-4" aria-hidden="true" tabindex="-1"></a>  <span class="co"># now apply the pythagorean theorem, which defines Euclidean distance</span></span>
<span id="cb44-5"><a href="#cb44-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">eucl_dist =</span> <span class="fu">sqrt</span>((F1_i <span class="sc">-</span> F1_e)<span class="sc">^</span><span class="dv">2</span> <span class="sc">+</span> (F2_i <span class="sc">-</span> F2_e)<span class="sc">^</span><span class="dv">2</span>))</span>
<span id="cb44-6"><a href="#cb44-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb44-7"><a href="#cb44-7" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(ED_mod_sp)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 8
  context speaker .draw   F1_i    F1_e  F2_i  F2_e eucl_dist
  &lt;fct&gt;   &lt;chr&gt;   &lt;int&gt;  &lt;dbl&gt;   &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt;     &lt;dbl&gt;
1 oral    ral130      1 -1.00  -0.159  1.03  0.724     0.898
2 oral    ral130      2 -0.986 -0.0643 1.03  0.610     1.01 
3 oral    ral130      3 -1.00   0.0147 1.00  0.678     1.07 
4 oral    ral130      4 -0.964 -0.0113 0.944 0.723     0.978
5 oral    ral130      5 -0.962  0.0446 0.971 0.682     1.05 
6 oral    ral130      6 -1.10  -0.0247 1.03  0.678     1.14 </code></pre>
</div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb46"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb46-1"><a href="#cb46-1" aria-hidden="true" tabindex="-1"></a>ED_mod_av <span class="ot">&lt;-</span> fitted_av <span class="sc">%&gt;%</span></span>
<span id="cb46-2"><a href="#cb46-2" aria-hidden="true" tabindex="-1"></a>  <span class="co"># reshape the dataframe so that "i" and "e" are in separate columns</span></span>
<span id="cb46-3"><a href="#cb46-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">pivot_wider</span>(<span class="at">names_from=</span><span class="st">"vowel"</span>, <span class="at">values_from=</span><span class="fu">c</span>(<span class="st">"F1"</span>, <span class="st">"F2"</span>)) <span class="sc">%&gt;%</span></span>
<span id="cb46-4"><a href="#cb46-4" aria-hidden="true" tabindex="-1"></a>  <span class="co"># now apply the pythagorean theorem, which defines Euclidean distance</span></span>
<span id="cb46-5"><a href="#cb46-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">eucl_dist =</span> <span class="fu">sqrt</span>((F1_i <span class="sc">-</span> F1_e)<span class="sc">^</span><span class="dv">2</span> <span class="sc">+</span> (F2_i <span class="sc">-</span> F2_e)<span class="sc">^</span><span class="dv">2</span>))</span>
<span id="cb46-6"><a href="#cb46-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb46-7"><a href="#cb46-7" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(ED_mod_av)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 7
  context .draw   F1_i    F1_e  F2_i  F2_e eucl_dist
  &lt;fct&gt;   &lt;int&gt;  &lt;dbl&gt;   &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt;     &lt;dbl&gt;
1 oral        1 -0.946 -0.0650 0.929 0.705     0.909
2 oral        2 -0.883 -0.0781 0.895 0.668     0.836
3 oral        3 -0.921 -0.0361 0.893 0.542     0.952
4 oral        4 -0.905 -0.0409 0.773 0.509     0.904
5 oral        5 -0.858  0.0152 0.757 0.573     0.892
6 oral        6 -0.892 -0.0157 0.819 0.521     0.926</code></pre>
</div>
</div>
<p>We have now calculated the overlap (or distance) in the form of Bhattacharyya affinity and Euclidean distance, from both the empirical and modelled distributions, for each individual speaker and for the “average speaker” (modelled only). From the modelled distributions, we have calculated 100 different values, each from a different draw from the posterior, for each speaker and context. This gives us <em>distributions</em> for BA and ED.</p>
<p>Now that we have by-speaker distributions of BA, let’s calculate the by-speaker confidence intervals (CIs) for prenasal and preoral BA. We can also calculate the difference between prenasal and preoral BA, and the CI for the difference. To do this, we first take the difference by draw, then calculate the CI based on the distribution of the difference, just like for the BA values.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb48"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb48-1"><a href="#cb48-1" aria-hidden="true" tabindex="-1"></a>BA_mod_sp_wide <span class="ot">&lt;-</span> BA_mod_sp <span class="sc">%&gt;%</span></span>
<span id="cb48-2"><a href="#cb48-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">pivot_wider</span>(<span class="at">names_from=</span>context, <span class="at">values_from=</span>bhatt_aff) <span class="sc">%&gt;%</span></span>
<span id="cb48-3"><a href="#cb48-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">diff=</span>nasal<span class="sc">-</span>oral) <span class="sc">%&gt;%</span></span>
<span id="cb48-4"><a href="#cb48-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(speaker) <span class="sc">%&gt;%</span></span>
<span id="cb48-5"><a href="#cb48-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mean_qi</span>()</span>
<span id="cb48-6"><a href="#cb48-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb48-7"><a href="#cb48-7" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(BA_mod_sp_wide)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 13
  speaker  oral oral.lower oral.upper nasal nasal.lower nasal.upper  diff
  &lt;chr&gt;   &lt;dbl&gt;      &lt;dbl&gt;      &lt;dbl&gt; &lt;dbl&gt;       &lt;dbl&gt;       &lt;dbl&gt; &lt;dbl&gt;
1 ral130  0.523      0.293      0.689 0.768       0.572       0.910 0.246
2 ral135  0.573      0.414      0.727 0.877       0.751       0.948 0.304
3 ral139  0.596      0.387      0.770 0.873       0.769       0.950 0.278
4 ral216  0.618      0.384      0.816 0.904       0.796       0.956 0.286
5 ral303  0.616      0.400      0.804 0.812       0.631       0.935 0.196
6 ral342  0.585      0.348      0.792 0.877       0.736       0.955 0.292
# ℹ 5 more variables: diff.lower &lt;dbl&gt;, diff.upper &lt;dbl&gt;, .width &lt;dbl&gt;,
#   .point &lt;chr&gt;, .interval &lt;chr&gt;</code></pre>
</div>
</div>
<p>We can also pivot the empirical dataframe for easier plotting (below) and calculate the difference, but there is not a clear way to calculate confidence intervals.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb50"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb50-1"><a href="#cb50-1" aria-hidden="true" tabindex="-1"></a>BA_emp_sp_wide <span class="ot">&lt;-</span> BA_emp_sp <span class="sc">%&gt;%</span></span>
<span id="cb50-2"><a href="#cb50-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">pivot_wider</span>(<span class="at">names_from=</span>context, <span class="at">values_from=</span>bhatt_aff) <span class="sc">%&gt;%</span></span>
<span id="cb50-3"><a href="#cb50-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">diff=</span>nasal<span class="sc">-</span>oral)</span>
<span id="cb50-4"><a href="#cb50-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb50-5"><a href="#cb50-5" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(BA_emp_sp_wide)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 4
  speaker  oral nasal  diff
  &lt;chr&gt;   &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt;
1 ral130  0.519 0.812 0.293
2 ral135  0.608 0.861 0.253
3 ral139  0.697 0.904 0.206
4 ral216  0.689 0.887 0.198
5 ral303  0.765 0.890 0.125
6 ral342  0.663 0.875 0.212</code></pre>
</div>
</div>
<p>Now let’s do the same thing for modelled and empirical Euclidean distance:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb52"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb52-1"><a href="#cb52-1" aria-hidden="true" tabindex="-1"></a>ED_mod_sp_wide <span class="ot">&lt;-</span> ED_mod_sp <span class="sc">%&gt;%</span></span>
<span id="cb52-2"><a href="#cb52-2" aria-hidden="true" tabindex="-1"></a>  dplyr<span class="sc">::</span><span class="fu">select</span>(speaker, context, eucl_dist, .draw) <span class="sc">%&gt;%</span></span>
<span id="cb52-3"><a href="#cb52-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">pivot_wider</span>(<span class="at">names_from=</span>context, <span class="at">values_from=</span>eucl_dist) <span class="sc">%&gt;%</span></span>
<span id="cb52-4"><a href="#cb52-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">diff=</span>nasal<span class="sc">-</span>oral) <span class="sc">%&gt;%</span></span>
<span id="cb52-5"><a href="#cb52-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(speaker) <span class="sc">%&gt;%</span></span>
<span id="cb52-6"><a href="#cb52-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mean_qi</span>()</span>
<span id="cb52-7"><a href="#cb52-7" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb52-8"><a href="#cb52-8" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(ED_mod_sp_wide)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 13
  speaker  oral oral.lower oral.upper nasal nasal.lower nasal.upper   diff
  &lt;chr&gt;   &lt;dbl&gt;      &lt;dbl&gt;      &lt;dbl&gt; &lt;dbl&gt;       &lt;dbl&gt;       &lt;dbl&gt;  &lt;dbl&gt;
1 ral130  1.04       0.852       1.22 0.597      0.435        0.842 -0.441
2 ral135  0.974      0.832       1.11 0.304      0.135        0.492 -0.670
3 ral139  0.924      0.729       1.06 0.359      0.141        0.564 -0.565
4 ral216  0.866      0.666       1.01 0.228      0.0582       0.416 -0.638
5 ral303  0.885      0.676       1.08 0.502      0.266        0.711 -0.383
6 ral342  0.923      0.795       1.05 0.331      0.102        0.551 -0.592
# ℹ 5 more variables: diff.lower &lt;dbl&gt;, diff.upper &lt;dbl&gt;, .width &lt;dbl&gt;,
#   .point &lt;chr&gt;, .interval &lt;chr&gt;</code></pre>
</div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb54"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb54-1"><a href="#cb54-1" aria-hidden="true" tabindex="-1"></a>ED_emp_sp_wide <span class="ot">&lt;-</span> ED_emp_sp <span class="sc">%&gt;%</span></span>
<span id="cb54-2"><a href="#cb54-2" aria-hidden="true" tabindex="-1"></a>  dplyr<span class="sc">::</span><span class="fu">select</span>(speaker, context, eucl_dist) <span class="sc">%&gt;%</span></span>
<span id="cb54-3"><a href="#cb54-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">pivot_wider</span>(<span class="at">names_from=</span>context, <span class="at">values_from=</span>eucl_dist) <span class="sc">%&gt;%</span></span>
<span id="cb54-4"><a href="#cb54-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">diff=</span>nasal<span class="sc">-</span>oral)</span>
<span id="cb54-5"><a href="#cb54-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb54-6"><a href="#cb54-6" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(ED_emp_sp_wide)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 4
  speaker  oral nasal   diff
  &lt;chr&gt;   &lt;dbl&gt; &lt;dbl&gt;  &lt;dbl&gt;
1 ral130  0.967 0.772 -0.195
2 ral135  0.919 0.464 -0.455
3 ral139  0.762 0.459 -0.303
4 ral216  0.729 0.494 -0.236
5 ral303  0.894 0.546 -0.349
6 ral342  0.890 0.482 -0.408</code></pre>
</div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb56"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb56-1"><a href="#cb56-1" aria-hidden="true" tabindex="-1"></a>p_BA_mod <span class="ot">&lt;-</span> BA_mod_sp_wide <span class="sc">%&gt;%</span></span>
<span id="cb56-2"><a href="#cb56-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>oral, <span class="at">y=</span>nasal, <span class="at">color=</span>speaker)) <span class="sc">+</span></span>
<span id="cb56-3"><a href="#cb56-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb56-4"><a href="#cb56-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">xlim</span>(<span class="dv">0</span>, <span class="dv">1</span>) <span class="sc">+</span> <span class="fu">ylim</span>(<span class="dv">0</span>, <span class="dv">1</span>) <span class="sc">+</span></span>
<span id="cb56-5"><a href="#cb56-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_abline</span>(<span class="at">lty=</span><span class="dv">3</span>) <span class="sc">+</span></span>
<span id="cb56-6"><a href="#cb56-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">coord_fixed</span>() <span class="sc">+</span></span>
<span id="cb56-7"><a href="#cb56-7" aria-hidden="true" tabindex="-1"></a>  <span class="fu">xlab</span>(<span class="st">'preoral BA'</span>) <span class="sc">+</span> <span class="fu">ylab</span>(<span class="st">'prenasal BA'</span>)</span>
<span id="cb56-8"><a href="#cb56-8" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb56-9"><a href="#cb56-9" aria-hidden="true" tabindex="-1"></a>p_BA_emp <span class="ot">&lt;-</span> BA_emp_sp_wide <span class="sc">%&gt;%</span></span>
<span id="cb56-10"><a href="#cb56-10" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>oral, <span class="at">y=</span>nasal, <span class="at">color=</span>speaker)) <span class="sc">+</span></span>
<span id="cb56-11"><a href="#cb56-11" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb56-12"><a href="#cb56-12" aria-hidden="true" tabindex="-1"></a>  <span class="fu">xlim</span>(<span class="dv">0</span>, <span class="dv">1</span>) <span class="sc">+</span> <span class="fu">ylim</span>(<span class="dv">0</span>, <span class="dv">1</span>) <span class="sc">+</span></span>
<span id="cb56-13"><a href="#cb56-13" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_abline</span>(<span class="at">lty=</span><span class="dv">3</span>) <span class="sc">+</span></span>
<span id="cb56-14"><a href="#cb56-14" aria-hidden="true" tabindex="-1"></a>  <span class="fu">coord_fixed</span>() <span class="sc">+</span></span>
<span id="cb56-15"><a href="#cb56-15" aria-hidden="true" tabindex="-1"></a>  <span class="fu">xlab</span>(<span class="st">'preoral BA'</span>) <span class="sc">+</span> <span class="fu">ylab</span>(<span class="st">'prenasal BA'</span>)</span>
<span id="cb56-16"><a href="#cb56-16" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb56-17"><a href="#cb56-17" aria-hidden="true" tabindex="-1"></a>p_ED_mod <span class="ot">&lt;-</span> ED_mod_sp_wide <span class="sc">%&gt;%</span></span>
<span id="cb56-18"><a href="#cb56-18" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>oral, <span class="at">y=</span>nasal, <span class="at">color=</span>speaker)) <span class="sc">+</span></span>
<span id="cb56-19"><a href="#cb56-19" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb56-20"><a href="#cb56-20" aria-hidden="true" tabindex="-1"></a>  <span class="fu">xlim</span>(<span class="dv">0</span>, <span class="fl">1.5</span>) <span class="sc">+</span> <span class="fu">ylim</span>(<span class="dv">0</span>, <span class="fl">1.5</span>) <span class="sc">+</span></span>
<span id="cb56-21"><a href="#cb56-21" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_abline</span>(<span class="at">lty=</span><span class="dv">3</span>) <span class="sc">+</span></span>
<span id="cb56-22"><a href="#cb56-22" aria-hidden="true" tabindex="-1"></a>  <span class="fu">coord_fixed</span>() <span class="sc">+</span></span>
<span id="cb56-23"><a href="#cb56-23" aria-hidden="true" tabindex="-1"></a>  <span class="fu">xlab</span>(<span class="st">'preoral ED'</span>) <span class="sc">+</span> <span class="fu">ylab</span>(<span class="st">'prenasal ED'</span>)</span>
<span id="cb56-24"><a href="#cb56-24" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb56-25"><a href="#cb56-25" aria-hidden="true" tabindex="-1"></a>p_ED_emp <span class="ot">&lt;-</span> ED_emp_sp_wide <span class="sc">%&gt;%</span></span>
<span id="cb56-26"><a href="#cb56-26" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>oral, <span class="at">y=</span>nasal, <span class="at">color=</span>speaker)) <span class="sc">+</span></span>
<span id="cb56-27"><a href="#cb56-27" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb56-28"><a href="#cb56-28" aria-hidden="true" tabindex="-1"></a>  <span class="fu">xlim</span>(<span class="dv">0</span>, <span class="fl">1.5</span>) <span class="sc">+</span> <span class="fu">ylim</span>(<span class="dv">0</span>, <span class="fl">1.5</span>) <span class="sc">+</span></span>
<span id="cb56-29"><a href="#cb56-29" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_abline</span>(<span class="at">lty=</span><span class="dv">3</span>) <span class="sc">+</span></span>
<span id="cb56-30"><a href="#cb56-30" aria-hidden="true" tabindex="-1"></a>  <span class="fu">coord_fixed</span>() <span class="sc">+</span></span>
<span id="cb56-31"><a href="#cb56-31" aria-hidden="true" tabindex="-1"></a>  <span class="fu">xlab</span>(<span class="st">'preoral ED'</span>) <span class="sc">+</span> <span class="fu">ylab</span>(<span class="st">'prenasal ED'</span>)</span>
<span id="cb56-32"><a href="#cb56-32" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb56-33"><a href="#cb56-33" aria-hidden="true" tabindex="-1"></a>(p_BA_mod <span class="sc">+</span> p_BA_emp) <span class="sc">/</span> <span class="co"># + plot_layout(axes='collect')</span></span>
<span id="cb56-34"><a href="#cb56-34" aria-hidden="true" tabindex="-1"></a>  (p_ED_mod <span class="sc">+</span> p_ED_emp) <span class="sc">+</span> <span class="co"># I want to collect the axes, but the command isn't working; see this patchwork issue: https://github.com/thomasp85/patchwork/issues/366</span></span>
<span id="cb56-35"><a href="#cb56-35" aria-hidden="true" tabindex="-1"></a>  <span class="fu">plot_layout</span>(<span class="at">guides=</span><span class="st">'collect'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="MMO_tutorial_files/figure-html/unnamed-chunk-9-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>We can also make plots showing confidence intervals for each speaker.</p>
<p>Bhattacharyya affinity:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb57"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb57-1"><a href="#cb57-1" aria-hidden="true" tabindex="-1"></a>p_BA_oral <span class="ot">&lt;-</span> BA_mod_sp_wide <span class="sc">%&gt;%</span></span>
<span id="cb57-2"><a href="#cb57-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span><span class="fu">reorder</span>(speaker, oral), <span class="at">y=</span>oral, <span class="at">color=</span>speaker)) <span class="sc">+</span></span>
<span id="cb57-3"><a href="#cb57-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb57-4"><a href="#cb57-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_linerange</span>(<span class="fu">aes</span>(<span class="at">ymin=</span>oral.lower, <span class="at">ymax=</span>oral.upper)) <span class="sc">+</span></span>
<span id="cb57-5"><a href="#cb57-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme</span>(<span class="at">axis.title.x=</span><span class="fu">element_blank</span>())</span>
<span id="cb57-6"><a href="#cb57-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb57-7"><a href="#cb57-7" aria-hidden="true" tabindex="-1"></a>p_BA_nasal <span class="ot">&lt;-</span> BA_mod_sp_wide <span class="sc">%&gt;%</span></span>
<span id="cb57-8"><a href="#cb57-8" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span><span class="fu">reorder</span>(speaker, nasal), <span class="at">y=</span>nasal, <span class="at">color=</span>speaker)) <span class="sc">+</span></span>
<span id="cb57-9"><a href="#cb57-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb57-10"><a href="#cb57-10" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_linerange</span>(<span class="fu">aes</span>(<span class="at">ymin=</span>nasal.lower, <span class="at">ymax=</span>nasal.upper)) <span class="sc">+</span></span>
<span id="cb57-11"><a href="#cb57-11" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme</span>(<span class="at">axis.title.x=</span><span class="fu">element_blank</span>())</span>
<span id="cb57-12"><a href="#cb57-12" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb57-13"><a href="#cb57-13" aria-hidden="true" tabindex="-1"></a>p_BA_diff <span class="ot">&lt;-</span> BA_mod_sp_wide <span class="sc">%&gt;%</span></span>
<span id="cb57-14"><a href="#cb57-14" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span><span class="fu">reorder</span>(speaker, diff), <span class="at">y=</span>diff, <span class="at">color=</span>speaker)) <span class="sc">+</span></span>
<span id="cb57-15"><a href="#cb57-15" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb57-16"><a href="#cb57-16" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_linerange</span>(<span class="fu">aes</span>(<span class="at">ymin=</span>diff.lower, <span class="at">ymax=</span>diff.upper)) <span class="sc">+</span></span>
<span id="cb57-17"><a href="#cb57-17" aria-hidden="true" tabindex="-1"></a>  <span class="fu">xlab</span>(<span class="st">'speaker'</span>)</span>
<span id="cb57-18"><a href="#cb57-18" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb57-19"><a href="#cb57-19" aria-hidden="true" tabindex="-1"></a>p_BA_oral <span class="sc">/</span></span>
<span id="cb57-20"><a href="#cb57-20" aria-hidden="true" tabindex="-1"></a>  p_BA_nasal <span class="sc">/</span></span>
<span id="cb57-21"><a href="#cb57-21" aria-hidden="true" tabindex="-1"></a>  p_BA_diff <span class="sc">+</span></span>
<span id="cb57-22"><a href="#cb57-22" aria-hidden="true" tabindex="-1"></a>  <span class="fu">plot_layout</span>(<span class="at">guides=</span><span class="st">'collect'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="MMO_tutorial_files/figure-html/unnamed-chunk-10-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>And Euclidean distance:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb58"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb58-1"><a href="#cb58-1" aria-hidden="true" tabindex="-1"></a>p_ED_oral <span class="ot">&lt;-</span> ED_mod_sp_wide <span class="sc">%&gt;%</span></span>
<span id="cb58-2"><a href="#cb58-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span><span class="fu">reorder</span>(speaker, <span class="sc">-</span>oral), <span class="at">y=</span>oral, <span class="at">color=</span>speaker)) <span class="sc">+</span></span>
<span id="cb58-3"><a href="#cb58-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb58-4"><a href="#cb58-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_linerange</span>(<span class="fu">aes</span>(<span class="at">ymin=</span>oral.lower, <span class="at">ymax=</span>oral.upper)) <span class="sc">+</span></span>
<span id="cb58-5"><a href="#cb58-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme</span>(<span class="at">axis.title.x=</span><span class="fu">element_blank</span>())</span>
<span id="cb58-6"><a href="#cb58-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb58-7"><a href="#cb58-7" aria-hidden="true" tabindex="-1"></a>p_ED_nasal <span class="ot">&lt;-</span> ED_mod_sp_wide <span class="sc">%&gt;%</span></span>
<span id="cb58-8"><a href="#cb58-8" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span><span class="fu">reorder</span>(speaker, <span class="sc">-</span>nasal), <span class="at">y=</span>nasal, <span class="at">color=</span>speaker)) <span class="sc">+</span></span>
<span id="cb58-9"><a href="#cb58-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb58-10"><a href="#cb58-10" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_linerange</span>(<span class="fu">aes</span>(<span class="at">ymin=</span>nasal.lower, <span class="at">ymax=</span>nasal.upper)) <span class="sc">+</span></span>
<span id="cb58-11"><a href="#cb58-11" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme</span>(<span class="at">axis.title.x=</span><span class="fu">element_blank</span>())</span>
<span id="cb58-12"><a href="#cb58-12" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb58-13"><a href="#cb58-13" aria-hidden="true" tabindex="-1"></a>p_ED_diff <span class="ot">&lt;-</span> ED_mod_sp_wide <span class="sc">%&gt;%</span></span>
<span id="cb58-14"><a href="#cb58-14" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span><span class="fu">reorder</span>(speaker, <span class="sc">-</span>diff), <span class="at">y=</span>diff, <span class="at">color=</span>speaker)) <span class="sc">+</span></span>
<span id="cb58-15"><a href="#cb58-15" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb58-16"><a href="#cb58-16" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_linerange</span>(<span class="fu">aes</span>(<span class="at">ymin=</span>diff.lower, <span class="at">ymax=</span>diff.upper)) <span class="sc">+</span></span>
<span id="cb58-17"><a href="#cb58-17" aria-hidden="true" tabindex="-1"></a>  <span class="fu">xlab</span>(<span class="st">'speaker'</span>)</span>
<span id="cb58-18"><a href="#cb58-18" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb58-19"><a href="#cb58-19" aria-hidden="true" tabindex="-1"></a>p_ED_oral <span class="sc">/</span></span>
<span id="cb58-20"><a href="#cb58-20" aria-hidden="true" tabindex="-1"></a>  p_ED_nasal <span class="sc">/</span></span>
<span id="cb58-21"><a href="#cb58-21" aria-hidden="true" tabindex="-1"></a>  p_ED_diff <span class="sc">+</span></span>
<span id="cb58-22"><a href="#cb58-22" aria-hidden="true" tabindex="-1"></a>  <span class="fu">plot_layout</span>(<span class="at">guides=</span><span class="st">'collect'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="MMO_tutorial_files/figure-html/unnamed-chunk-11-1.png" class="img-fluid" width="672"></p>
</div>
</div>
</section>
<section id="summary" class="level2">
<h2 class="anchored" data-anchor-id="summary">Summary</h2>
<p>In this tutorial, you have learned the tools to calculate vowel overlap from modelled distributions. Specifically, you have learned:</p>
<ul>
<li>How to <a href="#Model_Fitting">fit a multivariate Bayesian model</a></li>
<li>How to <a href="#Simulate">simulate a distribution</a> from the fitted model
<ul>
<li>You should also have the tools to decide what kinds of simulations your analysis calls for</li>
</ul></li>
<li>How to <a href="#uncertainty">calculate uncertainty</a> in the posteriors and in quantities computed from posteriors</li>
<li>How to <a href="#Calculate_Overlap">calculate measures of overlap</a> from the simulated distributions
<ul>
<li>We showed the process for two example measures: Euclidean distance and Bhattacharyya affinity</li>
</ul></li>
</ul>
<p>Finally, while the example application here is vowel merger, we invite you to follow this method on any type of analysis where you find it useful.</p>

</section>


<div id="quarto-appendix" class="default"><section class="quarto-appendix-contents" role="doc-bibliography"><h2 class="anchored quarto-appendix-heading">References</h2><div id="refs" class="references csl-bib-body hanging-indent" role="list">
<div id="ref-burkner_advanced_2018" class="csl-entry" role="listitem">
Bürkner, Paul-Christian. 2018. <span>“Advanced <span>Bayesian</span> Multilevel Modeling with the <span>R</span> Package Brms.”</span> <em>The R Journal</em> 10 (1): 395–411. <a href="https://doi.org/10.32614/RJ-2018-017">https://doi.org/10.32614/RJ-2018-017</a>.
</div>
<div id="ref-calenge_the_2006" class="csl-entry" role="listitem">
Calenge, Clement. 2006. <span>“The Package Adehabitat for the <span>R</span> Software: <span>A</span> Tool for the Analysis of Space and Habitat Use by Animals.”</span> <em>Ecological Modelling</em> 197: 1035.
</div>
<div id="ref-dodsworth_language_2020" class="csl-entry" role="listitem">
Dodsworth, Robin, and Richard A. Benton. 2020. <em>Language Variation and Change in Social Networks: <span>A</span> Bipartite Approach</em>. Routledge.
</div>
<div id="ref-kay_tidybayes_2020" class="csl-entry" role="listitem">
Kay, Matthew. 2020. <span>“Tidybayes: <span>Tidy</span> Data and Geoms for <span>Bayesian</span> Models.”</span> <em>R Package Version</em> 2 (1): 1.
</div>
<div id="ref-kelley_comparison_2020" class="csl-entry" role="listitem">
Kelley, Matthew C., and Benjamin V. Tucker. 2020. <span>“A Comparison of Four Vowel Overlap Measures.”</span> <em>The Journal of the Acoustical Society of America</em> 147 (1): 137–45. <a href="https://doi.org/10.1121/10.0000494">https://doi.org/10.1121/10.0000494</a>.
</div>
<div id="ref-labov_atlas_2006" class="csl-entry" role="listitem">
Labov, William, Sharon Ash, and Charles Boberg. 2006. <em>The <span>Atlas</span> of <span>North</span> <span>American</span> <span>English</span>: <span>Phonetics</span>, <span>Phonology</span> and <span>Sound</span> <span>Change</span></em>. De Gruyter Mouton. <a href="https://doi.org/10.1515/9783110167467">https://doi.org/10.1515/9783110167467</a>.
</div>
<div id="ref-lobanov_classification_1971" class="csl-entry" role="listitem">
Lobanov, Boris. 1971. <span>“Classification of <span>Russian</span> Vowels Spoken by Different Speakers.”</span> <em>Journal of the Acoustical Society of America</em> 49 (2B): 606–8. <a href="https://pubs.aip.org/asa/jasa/article-abstract/49/2B/606/747097/Classification-of-Russian-Vowels-Spoken-by">https://pubs.aip.org/asa/jasa/article-abstract/49/2B/606/747097/Classification-of-Russian-Vowels-Spoken-by</a>.
</div>
<div id="ref-nycz_best_2014" class="csl-entry" role="listitem">
Nycz, Jennifer, and Lauren Hall-Lew. 2014. <span>“Best Practices in Measuring Vowel Merger.”</span> <em>Proceedings of Meetings on Acoustics</em> 20 (1): 060008. <a href="https://doi.org/10.1121/1.4894063">https://doi.org/10.1121/1.4894063</a>.
</div>
<div id="ref-smith_modelled_2024" class="csl-entry" role="listitem">
Smith, Irene, Morgan Sonderegger, and The Spade Consortium. 2024. <span>“<span>Modelled</span> <span>Multivariate</span> <span>Overlap</span>: <span>A</span> Method for Measuring Vowel Merger.”</span> In <em>Interspeech 2024</em>, 1–4. ISCA.
</div>
<div id="ref-sonderegger_managing_2022" class="csl-entry" role="listitem">
Sonderegger, Morgan, Jane Stuart-Smith, Michael McAuliffe, Rachel Macdonald, and Tyler Kendall. 2022. <span>“Managing Data for Integrated Speech Corpus Analysis in <em><span>SPeech</span> <span>Across</span> <span>Dialects</span> of <span>English</span></em> (<span>SPADE</span>).”</span> In <em>The <span>Open</span> <span>Handbook</span> of <span>Linguistic</span> <span>Data</span> <span>Management</span></em>, edited by Andrea L Berez-Kroeker, Bradley McDonnell, Eve Koller, and Lauren B Collister, 195–207.
</div>
<div id="ref-stan_rstan" class="csl-entry" role="listitem">
Stan Development Team. n.d. <span>“<span>RStan</span>: The <span>R</span> Interface to <span>Stan</span>.”</span> <a href="https://mc-stan.org/">https://mc-stan.org/</a>.
</div>
</div></section><section id="footnotes" class="footnotes footnotes-end-of-document" role="doc-endnotes"><h2 class="anchored quarto-appendix-heading">Footnotes</h2>

<ol>
<li id="fn1"><p>The ability to implement an overlap measure in more than 2 dimensions may vary from one measure to the next. For example, the R implementation of Bhattacharyya affinity in <code>adehabitatHR</code> only allows for 2-dimensional distributions, but Bhattacharyya affinity is theoretically defined for an arbitrary number of dimensions.<a href="#fnref1" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
</ol>
</section></div></main>
<!-- /main column -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
  const toggleBodyColorMode = (bsSheetEl) => {
    const mode = bsSheetEl.getAttribute("data-mode");
    const bodyEl = window.document.querySelector("body");
    if (mode === "dark") {
      bodyEl.classList.add("quarto-dark");
      bodyEl.classList.remove("quarto-light");
    } else {
      bodyEl.classList.add("quarto-light");
      bodyEl.classList.remove("quarto-dark");
    }
  }
  const toggleBodyColorPrimary = () => {
    const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
    if (bsSheetEl) {
      toggleBodyColorMode(bsSheetEl);
    }
  }
  toggleBodyColorPrimary();  
  const icon = "";
  const anchorJS = new window.AnchorJS();
  anchorJS.options = {
    placement: 'right',
    icon: icon
  };
  anchorJS.add('.anchored');
  const isCodeAnnotation = (el) => {
    for (const clz of el.classList) {
      if (clz.startsWith('code-annotation-')) {                     
        return true;
      }
    }
    return false;
  }
  const clipboard = new window.ClipboardJS('.code-copy-button', {
    text: function(trigger) {
      const codeEl = trigger.previousElementSibling.cloneNode(true);
      for (const childEl of codeEl.children) {
        if (isCodeAnnotation(childEl)) {
          childEl.remove();
        }
      }
      return codeEl.innerText;
    }
  });
  clipboard.on('success', function(e) {
    // button target
    const button = e.trigger;
    // don't keep focus
    button.blur();
    // flash "checked"
    button.classList.add('code-copy-button-checked');
    var currentTitle = button.getAttribute("title");
    button.setAttribute("title", "Copied!");
    let tooltip;
    if (window.bootstrap) {
      button.setAttribute("data-bs-toggle", "tooltip");
      button.setAttribute("data-bs-placement", "left");
      button.setAttribute("data-bs-title", "Copied!");
      tooltip = new bootstrap.Tooltip(button, 
        { trigger: "manual", 
          customClass: "code-copy-button-tooltip",
          offset: [0, -8]});
      tooltip.show();    
    }
    setTimeout(function() {
      if (tooltip) {
        tooltip.hide();
        button.removeAttribute("data-bs-title");
        button.removeAttribute("data-bs-toggle");
        button.removeAttribute("data-bs-placement");
      }
      button.setAttribute("title", currentTitle);
      button.classList.remove('code-copy-button-checked');
    }, 1000);
    // clear code selection
    e.clearSelection();
  });
  function tippyHover(el, contentFn) {
    const config = {
      allowHTML: true,
      content: contentFn,
      maxWidth: 500,
      delay: 100,
      arrow: false,
      appendTo: function(el) {
          return el.parentElement;
      },
      interactive: true,
      interactiveBorder: 10,
      theme: 'quarto',
      placement: 'bottom-start'
    };
    window.tippy(el, config); 
  }
  const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
  for (var i=0; i<noterefs.length; i++) {
    const ref = noterefs[i];
    tippyHover(ref, function() {
      // use id or data attribute instead here
      let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
      try { href = new URL(href).hash; } catch {}
      const id = href.replace(/^#\/?/, "");
      const note = window.document.getElementById(id);
      return note.innerHTML;
    });
  }
      let selectedAnnoteEl;
      const selectorForAnnotation = ( cell, annotation) => {
        let cellAttr = 'data-code-cell="' + cell + '"';
        let lineAttr = 'data-code-annotation="' +  annotation + '"';
        const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
        return selector;
      }
      const selectCodeLines = (annoteEl) => {
        const doc = window.document;
        const targetCell = annoteEl.getAttribute("data-target-cell");
        const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
        const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
        const lines = annoteSpan.getAttribute("data-code-lines").split(",");
        const lineIds = lines.map((line) => {
          return targetCell + "-" + line;
        })
        let top = null;
        let height = null;
        let parent = null;
        if (lineIds.length > 0) {
            //compute the position of the single el (top and bottom and make a div)
            const el = window.document.getElementById(lineIds[0]);
            top = el.offsetTop;
            height = el.offsetHeight;
            parent = el.parentElement.parentElement;
          if (lineIds.length > 1) {
            const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
            const bottom = lastEl.offsetTop + lastEl.offsetHeight;
            height = bottom - top;
          }
          if (top !== null && height !== null && parent !== null) {
            // cook up a div (if necessary) and position it 
            let div = window.document.getElementById("code-annotation-line-highlight");
            if (div === null) {
              div = window.document.createElement("div");
              div.setAttribute("id", "code-annotation-line-highlight");
              div.style.position = 'absolute';
              parent.appendChild(div);
            }
            div.style.top = top - 2 + "px";
            div.style.height = height + 4 + "px";
            let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
            if (gutterDiv === null) {
              gutterDiv = window.document.createElement("div");
              gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
              gutterDiv.style.position = 'absolute';
              const codeCell = window.document.getElementById(targetCell);
              const gutter = codeCell.querySelector('.code-annotation-gutter');
              gutter.appendChild(gutterDiv);
            }
            gutterDiv.style.top = top - 2 + "px";
            gutterDiv.style.height = height + 4 + "px";
          }
          selectedAnnoteEl = annoteEl;
        }
      };
      const unselectCodeLines = () => {
        const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
        elementsIds.forEach((elId) => {
          const div = window.document.getElementById(elId);
          if (div) {
            div.remove();
          }
        });
        selectedAnnoteEl = undefined;
      };
      // Attach click handler to the DT
      const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
      for (const annoteDlNode of annoteDls) {
        annoteDlNode.addEventListener('click', (event) => {
          const clickedEl = event.target;
          if (clickedEl !== selectedAnnoteEl) {
            unselectCodeLines();
            const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
            if (activeEl) {
              activeEl.classList.remove('code-annotation-active');
            }
            selectCodeLines(clickedEl);
            clickedEl.classList.add('code-annotation-active');
          } else {
            // Unselect the line
            unselectCodeLines();
            clickedEl.classList.remove('code-annotation-active');
          }
        });
      }
  const findCites = (el) => {
    const parentEl = el.parentElement;
    if (parentEl) {
      const cites = parentEl.dataset.cites;
      if (cites) {
        return {
          el,
          cites: cites.split(' ')
        };
      } else {
        return findCites(el.parentElement)
      }
    } else {
      return undefined;
    }
  };
  var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
  for (var i=0; i<bibliorefs.length; i++) {
    const ref = bibliorefs[i];
    const citeInfo = findCites(ref);
    if (citeInfo) {
      tippyHover(citeInfo.el, function() {
        var popup = window.document.createElement('div');
        citeInfo.cites.forEach(function(cite) {
          var citeDiv = window.document.createElement('div');
          citeDiv.classList.add('hanging-indent');
          citeDiv.classList.add('csl-entry');
          var biblioDiv = window.document.getElementById('ref-' + cite);
          if (biblioDiv) {
            citeDiv.innerHTML = biblioDiv.innerHTML;
          }
          popup.appendChild(citeDiv);
        });
        return popup.innerHTML;
      });
    }
  }
});
</script>
</div> <!-- /content -->


</body></html>