-
Notifications
You must be signed in to change notification settings - Fork 0
/
MMO_tutorial.html
1375 lines (1346 loc) · 146 KB
/
MMO_tutorial.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
<meta charset="utf-8">
<meta name="generator" content="quarto-1.3.433">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
<meta name="author" content="Irene B. R. Smith">
<meta name="dcterms.date" content="2024-09-01">
<title>MMO Tutorial</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
width: 0.8em;
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
vertical-align: middle;
}
/* CSS for syntax highlighting */
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
}
pre.numberSource { margin-left: 3em; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
/* CSS for citations */
div.csl-bib-body { }
div.csl-entry {
clear: both;
}
.hanging-indent div.csl-entry {
margin-left:2em;
text-indent:-2em;
}
div.csl-left-margin {
min-width:2em;
float:left;
}
div.csl-right-inline {
margin-left:2em;
padding-left:1em;
}
div.csl-indent {
margin-left: 2em;
}</style>
<script src="MMO_tutorial_files/libs/clipboard/clipboard.min.js"></script>
<script src="MMO_tutorial_files/libs/quarto-html/quarto.js"></script>
<script src="MMO_tutorial_files/libs/quarto-html/popper.min.js"></script>
<script src="MMO_tutorial_files/libs/quarto-html/tippy.umd.min.js"></script>
<script src="MMO_tutorial_files/libs/quarto-html/anchor.min.js"></script>
<link href="MMO_tutorial_files/libs/quarto-html/tippy.css" rel="stylesheet">
<link href="MMO_tutorial_files/libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="MMO_tutorial_files/libs/bootstrap/bootstrap.min.js"></script>
<link href="MMO_tutorial_files/libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="MMO_tutorial_files/libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
</head>
<body>
<div id="quarto-content" class="page-columns page-rows-contents page-layout-article">
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
<div class="quarto-alternate-formats"><h2>Other Formats</h2><ul><li><a href="MMO_tutorial.pdf"><i class="bi bi-file-pdf"></i>PDF</a></li></ul></div></div>
<main class="content" id="quarto-document-content">
<header id="title-block-header" class="quarto-title-block default">
<div class="quarto-title">
<h1 class="title">MMO Tutorial</h1>
</div>
<div class="quarto-title-meta-author">
<div class="quarto-title-meta-heading">Author</div>
<div class="quarto-title-meta-heading">Affiliation</div>
<div class="quarto-title-meta-contents">
<p class="author"><a href="http://www.irenebrsmith.com">Irene B. R. Smith</a> </p>
</div>
<div class="quarto-title-meta-contents">
<p class="affiliation">
McGill University
</p>
</div>
</div>
<div class="quarto-title-meta">
<div>
<div class="quarto-title-meta-heading">Published</div>
<div class="quarto-title-meta-contents">
<p class="date">September 1, 2024</p>
</div>
</div>
</div>
</header>
<section id="introduction" class="level2">
<h2 class="anchored" data-anchor-id="introduction">Introduction</h2>
<p>To skip ahead to running code, go to <a href="#Packages">the next section</a>.</p>
<p>This tutorial is meant to accompany <span class="citation" data-cites="smith_modelled_2024">Smith, Sonderegger, and Spade Consortium (<a href="#ref-smith_modelled_2024" role="doc-biblioref">2024</a>)</span> (<a href="https://arxiv.org/pdf/2406.16319">preprint</a>), which describes a new method for calculating vowel merger: Modelled Multivariate Overlap (MMO). The application discussed in the paper and shown in this tutorial is the PIN-PEN merger <span class="citation" data-cites="labov_atlas_2006">(<a href="#ref-labov_atlas_2006" role="doc-biblioref">Labov, Ash, and Boberg 2006</a>)</span>. We operationalize the degree of merger between two vowels as the amount of distributional overlap in F1 / F2 space between those two vowels. While the paper and the tutorial both focus on vowel merger, the method applies more generally to quantifying the overlap between distributions of any kind, including distributions in more than 2 dimensions<a href="#fn1" class="footnote-ref" id="fnref1" role="doc-noteref"><sup>1</sup></a>. Additionally, this method is flexible in that it can be applied for any measure of overlap that can be calculated from empirical distributions. Here, we show the method applied to calculate Bhattacharyya affinity and Euclidean distance, but it could also be used to calculate Pillai or scores any of the other methods described in <span class="citation" data-cites="nycz_best_2014">Nycz and Hall-Lew (<a href="#ref-nycz_best_2014" role="doc-biblioref">2014</a>)</span> or <span class="citation" data-cites="kelley_comparison_2020">Kelley and Tucker (<a href="#ref-kelley_comparison_2020" role="doc-biblioref">2020</a>)</span>.</p>
<p>The steps for calculating MMO are outlined below. For more details, see <span class="citation" data-cites="smith_modelled_2024">Smith, Sonderegger, and Spade Consortium (<a href="#ref-smith_modelled_2024" role="doc-biblioref">2024</a>)</span>. The steps are written for the PIN-PEN merger application that is carried throughout this tutorial, so “F1,” “F2,” “vowel,” and “context” might be different quantities in your specific application.</p>
<ol type="1">
<li><strong>Preprocessing</strong></li>
</ol>
<ul>
<li>Likely includes some form of normalization of F1 and F2 , so that overlap measures are comparable across speakers</li>
<li>After normalization, we discard all vowels except for the two that are interest here: /ɪ/ and /ɛ/</li>
<li>In addition to these two steps, you can do whatever data cleaning or transformations you like, such as centering and scaling predictors, or eliminating problem tokens</li>
</ul>
<ol start="2" type="1">
<li><strong>Modelling</strong>: Fit a multivariate model to jointly predict F1 and F2.</li>
</ol>
<ul>
<li>Bayesian multivariate modelling is an extremely convenient way to implement MMO, because it directly models distributions</li>
<li>If you have another way to generate a modelled distribution, go for it</li>
<li>If fitting Bayesian models is intimidating for you, the code for fitting a model in <code>brms</code> that is included here, along with basic knowledge of <code>lmer</code>-style model syntax, should be enough to get you fitting your own Bayesian models</li>
<li>The Interspeech2024 subfolder of this repository has examples of more complex <code>brms</code> models, specifically, with modelled variance and nested random effects structures</li>
</ul>
<ol start="3" type="1">
<li><strong>Simulation</strong>: Using the model that was fit in Step 2, simulate tokens of each vowel</li>
</ol>
<ul>
<li>Vary or hold constant other variables as appropriate
<ul>
<li>For example, for the PIN-PEN merger, <code>context</code> (prenasal or preoral) needs to be accounted for, so we simulate tokens for each <code>vowel</code>-by-<code>context</code> pair</li>
<li>If <code>duration</code> and <code>speaker gender</code> were included as model predictors, then it might make sense to hold <code>duration</code> constant (for example, at its average value across all tokens of all speakers) and to set <code>speaker gender</code> to the appropriate value for each speaker</li>
</ul></li>
<li>Simulations can be made either for each individual speaker, or for the “average” speaker</li>
<li>Below, we show how to make both of these kinds of simulations using the <code>tidybayes</code> package</li>
</ul>
<ol start="4" type="1">
<li><strong>Overlap calculation</strong>: Now that you have simulated an empirical distribution, you can calculate any measure of overlap you like.</li>
</ol>
<ul>
<li>This tutorial shows how to calculate Bhattacharyya affinity and Euclidean distance</li>
</ul>
<p>Finally, this tutorial is designed such that it can reasonably be run locally on a personal computer. The most resource-intensive step is fitting the Bayesian model: if this is too much for your computer, you can reduce the dataset even further than shown here. After fitting the model once, the <code>brm</code> line will read the file instead of fitting the model, and spare your computer the need to re-fit the model from scratch.</p>
<p>It is possible that, if you are fitting models on corpus data, you will need a more powerful computer than your personal computer. Making model predictions and calculating Bhattacharyya affinity can also be very resource-intensive, so I would also recommend calculating those externally and saving the output. The Interspeech2024 folder has examples of scripts for doing this.</p>
</section>
<section id="Packages" class="level2">
<h2 class="anchored" data-anchor-id="Packages">Packages</h2>
<p>The following packages were used to carry out the core parts of the analysis:</p>
<ul>
<li><p><code>brms</code> <span class="citation" data-cites="burkner_advanced_2018">(<a href="#ref-burkner_advanced_2018" role="doc-biblioref">Bürkner 2018</a>)</span> is used to fit the Bayesian model. It uses <code>RStan</code> <span class="citation" data-cites="stan_rstan">(<a href="#ref-stan_rstan" role="doc-biblioref">Stan Development Team, n.d.</a>)</span>, which needs to be manually installed. Instructions for installing <code>RStan</code> can be found <a href="https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started">here</a>. Once <code>RStan</code> is installed, <code>brms</code> can simply be installed using <code>install.packages("brms")</code> (more information <a href="https://paul-buerkner.github.io/brms/">here</a>).</p></li>
<li><p><code>tidybayes</code> <span class="citation" data-cites="kay_tidybayes_2020">(<a href="#ref-kay_tidybayes_2020" role="doc-biblioref">Kay 2020</a>)</span> is used to get model predictions and to summarize model predictions.</p></li>
<li><p><code>adehabitatHR</code><span class="citation" data-cites="calenge_the_2006">(<a href="#ref-calenge_the_2006" role="doc-biblioref">Calenge 2006</a>)</span> is the package used to compute Bhattacharyya affinity. The code below is adapted from <a href="https://joeystanley.com/blog/a-tutorial-in-calculating-vowel-overlap/">Joey Stanley’s tutorial</a>.</p></li>
<li><p><code>tidyverse</code> is used for <code>ggplot2</code> and <code>dplyr</code> functionality.</p></li>
</ul>
<div class="cell">
<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(brms)</span>
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(tidybayes)</span>
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(adehabitatHR)</span>
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(tidyverse)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>For visualizing results, we use the following libraries:</p>
<ul>
<li><code>patchwork</code> to help arrange <code>ggplot2</code> plots</li>
<li><code>khroma</code> for extra color palettes</li>
</ul>
<div class="cell">
<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(patchwork)</span>
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(khroma)</span>
<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a><span class="co"># set global plot theme to theme_minimal</span></span>
<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a><span class="fu">theme_set</span>(<span class="fu">theme_minimal</span>())</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
</section>
<section id="data" class="level2">
<h2 class="anchored" data-anchor-id="data">Data</h2>
<p>We’ll be using the Raleigh corpus <span class="citation" data-cites="dodsworth_language_2020">(<a href="#ref-dodsworth_language_2020" role="doc-biblioref">Dodsworth and Benton 2020</a>)</span> from the SPADE project <span class="citation" data-cites="sonderegger_managing_2022">(<a href="#ref-sonderegger_managing_2022" role="doc-biblioref">Sonderegger et al. 2022</a>)</span>, available on <a href="https://osf.io/79jgu">OSF</a>. The data has been Lobanov normalized <span class="citation" data-cites="lobanov_classification_1971">(<a href="#ref-lobanov_classification_1971" role="doc-biblioref">Lobanov 1971</a>)</span> and subsetted to include stressed tokens of /ɪ/ and /ɛ/ from 10 speakers. To see the preprocessing details, look at the script create_tutorial_dataset.R.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>raleigh_data <span class="ot"><-</span> <span class="fu">readRDS</span>(<span class="st">"data/Raleigh_subset.rds"</span>)</span>
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(raleigh_data)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 6
# Groups: speaker [1]
speaker vowel context word F1 F2
<chr> <fct> <fct> <chr> <dbl> <dbl>
1 ral135 e oral BET -0.377 1.29
2 ral135 i oral IT -0.962 0.429
3 ral135 i nasal IN -1.33 0.936
4 ral135 i oral IT -1.19 0.910
5 ral135 i nasal IN -0.761 1.96
6 ral135 i oral IS -1.21 0.368</code></pre>
</div>
</div>
<p>There is currently too much data to fit the model on my computer, so we randomly pick 50 tokens of each <code>vowel</code>-by-<code>context</code> pair for each speaker. If you’re still having trouble fitting a model, you could try sampling even fewer tokens.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="co"># set seed so that code is replicable</span></span>
<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">22</span>)</span>
<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a>raleigh_data_50 <span class="ot"><-</span> raleigh_data <span class="sc">%>%</span></span>
<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(speaker, context, vowel) <span class="sc">%>%</span> </span>
<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">slice_sample</span>(<span class="at">n=</span><span class="dv">50</span>) <span class="sc">%>%</span></span>
<span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
</section>
<section id="exploratory-plots" class="level2">
<h2 class="anchored" data-anchor-id="exploratory-plots">Exploratory plots</h2>
<p>This section is not strictly necessary. To skip ahead, go to <a href="#Model_Fitting">Model Fitting</a>.</p>
<p>Let’s make some exploratory plots to set up our expectations.</p>
<p>First, let’s just start with looking at the <code>vowel</code>-by-<code>context</code> distributions in F1 / F2 space for each speaker.</p>
<p>If you want, you can change the vowel labels to their IPA symbols by mutating the <code>vowel</code> column before passing the data to <code>ggplot</code>. I’ll do this once to demonstrate, but in the remaining plots, ‘/ɪ/’ and ‘/ɛ/’ will show up as ‘i’ and ‘e’.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>raleigh_data <span class="sc">%>%</span></span>
<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">vowel=</span><span class="fu">factor</span>(vowel, <span class="at">levels=</span><span class="fu">c</span>(<span class="st">'i'</span>, <span class="st">'e'</span>), <span class="fu">c</span>(<span class="st">'/ɪ/'</span>, <span class="st">'/ɛ/'</span>))) <span class="sc">%>%</span></span>
<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>F2, <span class="at">y=</span>F1, <span class="at">color=</span>context)) <span class="sc">+</span></span>
<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">stat_ellipse</span>(<span class="at">level=</span><span class="fl">0.66</span>, <span class="fu">aes</span>(<span class="at">lty=</span>vowel)) <span class="sc">+</span></span>
<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_x_reverse</span>() <span class="sc">+</span> <span class="fu">scale_y_reverse</span>() <span class="sc">+</span></span>
<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">coord_cartesian</span>(<span class="at">xlim=</span><span class="fu">c</span>(<span class="dv">2</span>, <span class="sc">-</span><span class="dv">2</span>), <span class="at">ylim=</span><span class="fu">c</span>(<span class="dv">2</span>, <span class="sc">-</span><span class="dv">2</span>), <span class="at">clip=</span><span class="st">'off'</span>) <span class="sc">+</span></span>
<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">facet_wrap</span>(<span class="sc">~</span>speaker) <span class="sc">+</span></span>
<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">theme</span>(<span class="at">aspect.ratio=</span><span class="dv">1</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="MMO_tutorial_files/figure-html/empirical_distributions-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>Let’s look at each speaker’s category center on the same graph. The lines connecting the centers represent the Euclidean distance between vowel for each context (prenasal and preoral).</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>raleigh_data <span class="sc">%>%</span></span>
<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%>%</span> </span>
<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarize</span>(<span class="fu">across</span>(<span class="fu">c</span>(F1, F2), mean)) <span class="sc">%>%</span></span>
<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">%>%</span></span>
<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>F2, <span class="at">y=</span>F1, <span class="at">color=</span>context, <span class="at">shape=</span>vowel)) <span class="sc">+</span></span>
<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">stat_ellipse</span>(<span class="at">data=</span>raleigh_data, <span class="at">level=</span><span class="fl">0.66</span>, <span class="fu">aes</span>(<span class="at">lty=</span>vowel)) <span class="sc">+</span></span>
<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>(<span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(context, speaker))) <span class="sc">+</span></span>
<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_x_reverse</span>() <span class="sc">+</span> <span class="fu">scale_y_reverse</span>() <span class="sc">+</span></span>
<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">coord_cartesian</span>(<span class="at">xlim=</span><span class="fu">c</span>(<span class="dv">2</span>, <span class="sc">-</span><span class="dv">2</span>), <span class="at">ylim=</span><span class="fu">c</span>(<span class="dv">2</span>, <span class="sc">-</span><span class="dv">2</span>), <span class="at">clip=</span><span class="st">'off'</span>) <span class="sc">+</span></span>
<span id="cb7-11"><a href="#cb7-11" aria-hidden="true" tabindex="-1"></a> <span class="fu">facet_wrap</span>(<span class="sc">~</span>speaker) <span class="sc">+</span></span>
<span id="cb7-12"><a href="#cb7-12" aria-hidden="true" tabindex="-1"></a> <span class="fu">theme</span>(<span class="at">aspect.ratio=</span><span class="dv">1</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="MMO_tutorial_files/figure-html/distribution_centers-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>The average (across speakers) effect of context on vowel for both F1 and F2.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>p_av_F1 <span class="ot"><-</span> raleigh_data <span class="sc">%>%</span></span>
<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a> <span class="co"># give all speakers equal weight</span></span>
<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%>%</span> </span>
<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarize</span>(<span class="at">F1=</span><span class="fu">mean</span>(F1)) <span class="sc">%>%</span></span>
<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">%>%</span></span>
<span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a> <span class="co"># now average all speakers together</span></span>
<span id="cb8-7"><a href="#cb8-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(vowel, context) <span class="sc">%>%</span></span>
<span id="cb8-8"><a href="#cb8-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarize</span>(<span class="at">F1=</span><span class="fu">mean</span>(F1)) <span class="sc">%>%</span></span>
<span id="cb8-9"><a href="#cb8-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">%>%</span></span>
<span id="cb8-10"><a href="#cb8-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>vowel, <span class="at">y=</span>F1, <span class="at">color=</span>context, <span class="at">fill=</span>context)) <span class="sc">+</span></span>
<span id="cb8-11"><a href="#cb8-11" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_point</span>(<span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb8-12"><a href="#cb8-12" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>(<span class="fu">aes</span>(<span class="at">group=</span>context)) <span class="sc">+</span></span>
<span id="cb8-13"><a href="#cb8-13" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_y_reverse</span>() </span>
<span id="cb8-14"><a href="#cb8-14" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb8-15"><a href="#cb8-15" aria-hidden="true" tabindex="-1"></a>p_av_F2 <span class="ot"><-</span> raleigh_data <span class="sc">%>%</span></span>
<span id="cb8-16"><a href="#cb8-16" aria-hidden="true" tabindex="-1"></a> <span class="co"># give all speakers equal weight</span></span>
<span id="cb8-17"><a href="#cb8-17" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%>%</span> </span>
<span id="cb8-18"><a href="#cb8-18" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarize</span>(<span class="at">F2=</span><span class="fu">mean</span>(F2)) <span class="sc">%>%</span></span>
<span id="cb8-19"><a href="#cb8-19" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">%>%</span></span>
<span id="cb8-20"><a href="#cb8-20" aria-hidden="true" tabindex="-1"></a> <span class="co"># now average all speakers together</span></span>
<span id="cb8-21"><a href="#cb8-21" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(vowel, context) <span class="sc">%>%</span></span>
<span id="cb8-22"><a href="#cb8-22" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarize</span>(<span class="at">F2=</span><span class="fu">mean</span>(F2)) <span class="sc">%>%</span></span>
<span id="cb8-23"><a href="#cb8-23" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">%>%</span></span>
<span id="cb8-24"><a href="#cb8-24" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>vowel, <span class="at">y=</span>F2, <span class="at">color=</span>context, <span class="at">fill=</span>context)) <span class="sc">+</span></span>
<span id="cb8-25"><a href="#cb8-25" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_point</span>(<span class="at">shape=</span><span class="dv">24</span>, <span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb8-26"><a href="#cb8-26" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>(<span class="at">lty=</span><span class="dv">2</span>, <span class="fu">aes</span>(<span class="at">group=</span>context), <span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb8-27"><a href="#cb8-27" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_y_reverse</span>() </span>
<span id="cb8-28"><a href="#cb8-28" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb8-29"><a href="#cb8-29" aria-hidden="true" tabindex="-1"></a>p_av_F1 <span class="sc">+</span> p_av_F2 <span class="sc">+</span> <span class="fu">plot_layout</span>(<span class="at">guides=</span><span class="st">'collect'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="MMO_tutorial_files/figure-html/F1_vowel_by_context_emp_av-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>Now we can make the same plot but with each individual speaker’s interactions:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>p_F1_sp_emp <span class="ot"><-</span> raleigh_data <span class="sc">%>%</span></span>
<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%>%</span> </span>
<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarize</span>(<span class="at">F1=</span><span class="fu">mean</span>(F1)) <span class="sc">%>%</span></span>
<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">%>%</span></span>
<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>vowel, <span class="at">y=</span>F1, <span class="at">color=</span>context, <span class="at">fill=</span>context)) <span class="sc">+</span></span>
<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_point</span>(<span class="at">shape=</span><span class="dv">21</span>, <span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb9-7"><a href="#cb9-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>(<span class="at">lty=</span><span class="dv">1</span>, <span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(context, speaker))) <span class="sc">+</span></span>
<span id="cb9-8"><a href="#cb9-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_y_reverse</span>()</span>
<span id="cb9-9"><a href="#cb9-9" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb9-10"><a href="#cb9-10" aria-hidden="true" tabindex="-1"></a>p_F2_sp_emp <span class="ot"><-</span> raleigh_data <span class="sc">%>%</span></span>
<span id="cb9-11"><a href="#cb9-11" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%>%</span> </span>
<span id="cb9-12"><a href="#cb9-12" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarize</span>(<span class="at">F2=</span><span class="fu">mean</span>(F2)) <span class="sc">%>%</span></span>
<span id="cb9-13"><a href="#cb9-13" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">%>%</span></span>
<span id="cb9-14"><a href="#cb9-14" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>vowel, <span class="at">y=</span>F2, <span class="at">color=</span>context, <span class="at">fill=</span>context)) <span class="sc">+</span></span>
<span id="cb9-15"><a href="#cb9-15" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_point</span>(<span class="at">shape=</span><span class="dv">24</span>, <span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb9-16"><a href="#cb9-16" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>(<span class="at">lty=</span><span class="dv">2</span>, <span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(context, speaker)), <span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb9-17"><a href="#cb9-17" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_y_reverse</span>()</span>
<span id="cb9-18"><a href="#cb9-18" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb9-19"><a href="#cb9-19" aria-hidden="true" tabindex="-1"></a>p_F1_sp_emp <span class="sc">+</span> p_F2_sp_emp <span class="sc">+</span> <span class="fu">plot_layout</span>(<span class="at">guides=</span><span class="st">'collect'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="MMO_tutorial_files/figure-html/F1_vowel_by_context_emp_sp-1.png" class="img-fluid" width="672"></p>
</div>
</div>
</section>
<section id="Model_Fitting" class="level2">
<h2 class="anchored" data-anchor-id="Model_Fitting">Model fitting</h2>
<p>We are going to fit a simple multivariate Bayesian model (similar to the “minimal” model in our Interspeech paper <span class="citation" data-cites="smith_modelled_2024">(<a href="#ref-smith_modelled_2024" role="doc-biblioref">Smith, Sonderegger, and Spade Consortium 2024</a>)</span>). For more model syntax examples (including more complex model structures, such as nested random effects, modelling variance, and predictors with nonlinear effects), see the model-fitting scripts in the Interspeech folder of the github repository.</p>
<p>The model we are fitting jointly models F1 and F2, which is a key feature of the method we’re proposing. The remainder of the model structure is quite simple. The fixed predictors are <code>context</code>, <code>vowel</code>, and their interaction. There are by-<code>word</code> and by-<code>speaker</code> random intercepts, and by-<code>speaker</code> random slopes of of <code>vowel</code>, <code>context</code>, and their interaction.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb10"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="co"># "p" and "q" are just dummy variables and can be anything</span></span>
<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a>F1 <span class="ot">=</span> <span class="fu">bf</span>(F1 <span class="sc">~</span> </span>
<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a> context<span class="sc">*</span>vowel <span class="sc">+</span></span>
<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a> (<span class="dv">1</span><span class="sc">|</span>p<span class="sc">|</span>word) <span class="sc">+</span> (<span class="dv">1</span><span class="sc">+</span>context<span class="sc">*</span>vowel<span class="sc">|</span>q<span class="sc">|</span>speaker))</span>
<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a>F2 <span class="ot">=</span> <span class="fu">bf</span>(F2 <span class="sc">~</span> </span>
<span id="cb10-7"><a href="#cb10-7" aria-hidden="true" tabindex="-1"></a> context<span class="sc">*</span>vowel <span class="sc">+</span></span>
<span id="cb10-8"><a href="#cb10-8" aria-hidden="true" tabindex="-1"></a> (<span class="dv">1</span><span class="sc">|</span>p<span class="sc">|</span>word) <span class="sc">+</span> (<span class="dv">1</span><span class="sc">+</span>context<span class="sc">*</span>vowel<span class="sc">|</span>q<span class="sc">|</span>speaker))</span>
<span id="cb10-9"><a href="#cb10-9" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb10-10"><a href="#cb10-10" aria-hidden="true" tabindex="-1"></a>model <span class="ot"><-</span> <span class="fu">brm</span>(F1 <span class="sc">+</span> F2 <span class="sc">+</span> </span>
<span id="cb10-11"><a href="#cb10-11" aria-hidden="true" tabindex="-1"></a> <span class="co"># set_rescor(TRUE) means that correlations between F1 & F2 </span></span>
<span id="cb10-12"><a href="#cb10-12" aria-hidden="true" tabindex="-1"></a> <span class="co"># will be modelled.</span></span>
<span id="cb10-13"><a href="#cb10-13" aria-hidden="true" tabindex="-1"></a> <span class="fu">set_rescor</span>(<span class="cn">TRUE</span>),</span>
<span id="cb10-14"><a href="#cb10-14" aria-hidden="true" tabindex="-1"></a> <span class="co"># we want to use the smaller dataset so it doesn't take </span></span>
<span id="cb10-15"><a href="#cb10-15" aria-hidden="true" tabindex="-1"></a> <span class="co"># forever to fit</span></span>
<span id="cb10-16"><a href="#cb10-16" aria-hidden="true" tabindex="-1"></a> <span class="at">data=</span>raleigh_data_50, </span>
<span id="cb10-17"><a href="#cb10-17" aria-hidden="true" tabindex="-1"></a> <span class="co"># fitting a gaussian model (more details below): this is the </span></span>
<span id="cb10-18"><a href="#cb10-18" aria-hidden="true" tabindex="-1"></a> <span class="co"># default, but I'm making it explicit here for pedagogical</span></span>
<span id="cb10-19"><a href="#cb10-19" aria-hidden="true" tabindex="-1"></a> <span class="co"># purposes</span></span>
<span id="cb10-20"><a href="#cb10-20" aria-hidden="true" tabindex="-1"></a> <span class="at">family=</span>gaussian,</span>
<span id="cb10-21"><a href="#cb10-21" aria-hidden="true" tabindex="-1"></a> <span class="co"># if this file exists, this call will just read in the file. </span></span>
<span id="cb10-22"><a href="#cb10-22" aria-hidden="true" tabindex="-1"></a> <span class="co"># Otherwise, the call will save the model here. </span></span>
<span id="cb10-23"><a href="#cb10-23" aria-hidden="true" tabindex="-1"></a> <span class="at">file=</span><span class="st">"tutorial_model"</span>,</span>
<span id="cb10-24"><a href="#cb10-24" aria-hidden="true" tabindex="-1"></a> <span class="co"># Set prior for correlations. All other parameters just use </span></span>
<span id="cb10-25"><a href="#cb10-25" aria-hidden="true" tabindex="-1"></a> <span class="co"># default priors. </span></span>
<span id="cb10-26"><a href="#cb10-26" aria-hidden="true" tabindex="-1"></a> <span class="at">prior =</span> <span class="fu">c</span>(<span class="fu">prior</span>(<span class="fu">lkj</span>(<span class="fl">1.5</span>), <span class="at">class =</span> cor)), </span>
<span id="cb10-27"><a href="#cb10-27" aria-hidden="true" tabindex="-1"></a> <span class="co"># my computer has 8 cores, so I'm using 4 here, change </span></span>
<span id="cb10-28"><a href="#cb10-28" aria-hidden="true" tabindex="-1"></a> <span class="co"># according to your own machine. Ideally, you want to use the </span></span>
<span id="cb10-29"><a href="#cb10-29" aria-hidden="true" tabindex="-1"></a> <span class="co"># same number of cores as chains, so that all the chains can </span></span>
<span id="cb10-30"><a href="#cb10-30" aria-hidden="true" tabindex="-1"></a> <span class="co"># run in parallel.</span></span>
<span id="cb10-31"><a href="#cb10-31" aria-hidden="true" tabindex="-1"></a> <span class="at">chains=</span><span class="dv">4</span>, <span class="at">cores=</span><span class="dv">4</span>, </span>
<span id="cb10-32"><a href="#cb10-32" aria-hidden="true" tabindex="-1"></a> <span class="co"># If there are warnings, consider increasing the number of</span></span>
<span id="cb10-33"><a href="#cb10-33" aria-hidden="true" tabindex="-1"></a> <span class="co"># iterations.</span></span>
<span id="cb10-34"><a href="#cb10-34" aria-hidden="true" tabindex="-1"></a> <span class="at">iter =</span> <span class="dv">4000</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>The key difference between a Bayesian model (like the one we just fit) and a frequentist model (which we could fit with very similar model syntax in <code>lmer</code>) is that the Bayesian model is modelling the <em>distributions</em> of the output variables, whereas frequentist models fit a point prediction. While I am not here to take an ideological stance, the fact that Bayesian models inherently predict distributions is extremely useful for the present application.</p>
</section>
<section id="Simulate" class="level2">
<h2 class="anchored" data-anchor-id="Simulate">Simulate distributions</h2>
<p>To understand more about what exactly “drawing from the model” is doing, check out the <a href="http://mjskay.github.io/tidybayes/articles/tidy-brms.html">tidybayes help page</a>.</p>
<p>Using <code>tidybayes</code>, we can simulate draws of the <em>parameters</em> (using <code>add_epred_draws</code>) or of the <em>data</em> (using <code>add_predicted_draws</code>). Which one you choose to do should be determined by the quantity you want to compute from the predictions. We’ll show both in this tutorial, because the two measures of overlap that we will be calculating (Bhattacharyya affinity and Euclidean distance) necessitate different kinds of predictions.</p>
<p>First, we need to know what parameter values to predict for. For our application, we at least want to get predicted values for each combination of <code>vowel</code> and <code>context</code>. Because we have by-<code>speaker</code> random intercepts with <code>vowel</code>-by-<code>context</code> random slopes, we can make predictions for each <code>speaker</code>-by-<code>vowel</code>-by-<code>context</code> pair. We can also make predictions for the “average” speaker by discarding the entire random effects structure. We will do both. (All of the analysis is done for the “average” word, i.e., the by-word random intercepts are always ignored. Looking at lexical effects would open a whole new set of possible research questions, so be my guest if this is of interest.)</p>
<p>Additionally, we also want to make “average” speaker predictions, so we’ll just ignore the random effects structure entirely, i.e., predict F1 and F2 as a function of just <code>vowel</code> and <code>context</code>.</p>
<p>Let’s make a dataframe with every combination of vowel and context: these are what we use to make the “average” speaker simulations. You unfortunately need to reset the factor levels, since the <code>levels()</code> function (not to be confused with the <code>levels</code> argument to the <code>factor()</code> function) makes a vector of character strings and not factors.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="co"># these are both vectors of characters, but their order is determined by the </span></span>
<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a><span class="co"># order in the model</span></span>
<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a>vowel_levels <span class="ot"><-</span> <span class="fu">levels</span>(model<span class="sc">$</span>data<span class="sc">$</span>vowel) </span>
<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a>context_levels <span class="ot"><-</span> <span class="fu">levels</span>(model<span class="sc">$</span>data<span class="sc">$</span>context)</span>
<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-6"><a href="#cb11-6" aria-hidden="true" tabindex="-1"></a>av_preds <span class="ot"><-</span> <span class="fu">expand_grid</span>(<span class="at">context =</span> <span class="fu">factor</span>(context_levels,</span>
<span id="cb11-7"><a href="#cb11-7" aria-hidden="true" tabindex="-1"></a> <span class="at">levels=</span>context_levels),</span>
<span id="cb11-8"><a href="#cb11-8" aria-hidden="true" tabindex="-1"></a> <span class="at">vowel =</span> <span class="fu">factor</span>(vowel_levels,</span>
<span id="cb11-9"><a href="#cb11-9" aria-hidden="true" tabindex="-1"></a> <span class="at">levels=</span>vowel_levels))</span>
<span id="cb11-10"><a href="#cb11-10" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-11"><a href="#cb11-11" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(av_preds)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 4 × 2
context vowel
<fct> <fct>
1 oral i
2 oral e
3 nasal i
4 nasal e </code></pre>
</div>
</div>
<p>Similarly, let’s make a dataframe with every combination of speaker, vowel, and context. We use this one to make all of our by-speaker simulations.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>sp_preds <span class="ot"><-</span> <span class="fu">expand_grid</span>(<span class="at">context =</span> <span class="fu">factor</span>(context_levels,</span>
<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a> <span class="at">levels=</span>context_levels),</span>
<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a> <span class="at">vowel =</span> <span class="fu">factor</span>(vowel_levels,</span>
<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a> <span class="at">levels=</span>vowel_levels),</span>
<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a> <span class="at">speaker =</span> <span class="fu">unique</span>(model<span class="sc">$</span>data<span class="sc">$</span>speaker))</span>
<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb13-7"><a href="#cb13-7" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(sp_preds)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 3
context vowel speaker
<fct> <fct> <chr>
1 oral i ral130
2 oral i ral135
3 oral i ral139
4 oral i ral216
5 oral i ral303
6 oral i ral342 </code></pre>
</div>
</div>
<p>We’ll also define two variables: <code>num_draws</code>, which is the number of draws to take, and <code>num_points</code>, which is the number of <em>points per draw</em> to take for calculating Bhattacharyya affinity. You can change these to smaller numbers if your computer is struggling, or to bigger numbers if you have the resources. I would in particular recommend changing <code>num_points</code> if you want better estimates of Bhattacharyya affinity.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb15"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>num_draws<span class="ot">=</span><span class="dv">100</span></span>
<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>num_points<span class="ot">=</span><span class="dv">25</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Now, let’s make our first simulations. Here, we are making by-speaker predictions for the F1 and F2 <em>means</em> as a function of context and vowel. The resulting distribution is the posterior for the <em>means</em> of the data, not the distributions for the data itself. This is what we want in order to compute Euclidean distance.</p>
<p>When computing Euclidean distance from empirical distributions, we get the distribution center by averaging across tokens, but that isn’t necessary if we are predicting from the model because we already have a distribution of the estimate for the mean from the model we fit! We still take multiple draws of the mean, because that allows us to calculate credible intervals later on.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb16"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>fitted_sp <span class="ot"><-</span> sp_preds <span class="sc">%>%</span> </span>
<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">add_epred_draws</span>(model, </span>
<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a> <span class="co"># include only the part of the random effects that we're </span></span>
<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a> <span class="co"># interested in</span></span>
<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a> <span class="at">re_formula =</span> <span class="sc">~</span>(<span class="dv">1</span><span class="sc">+</span>context<span class="sc">*</span>vowel<span class="sc">|</span> q<span class="sc">|</span>speaker),</span>
<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a> <span class="at">ndraws=</span>num_draws) <span class="sc">%>%</span></span>
<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">%>%</span></span>
<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a> <span class="co"># pivot so that F1 and F2 each get a column</span></span>
<span id="cb16-9"><a href="#cb16-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">pivot_wider</span>(<span class="at">names_from=</span><span class="st">".category"</span>, <span class="at">values_from=</span><span class="st">".epred"</span>) <span class="sc">%>%</span></span>
<span id="cb16-10"><a href="#cb16-10" aria-hidden="true" tabindex="-1"></a> <span class="co"># we never use the `.row`, `.chain`, or `.iteration` columns, so let's get rid </span></span>
<span id="cb16-11"><a href="#cb16-11" aria-hidden="true" tabindex="-1"></a> <span class="co"># of them</span></span>
<span id="cb16-12"><a href="#cb16-12" aria-hidden="true" tabindex="-1"></a> dplyr<span class="sc">::</span><span class="fu">select</span>(<span class="sc">-</span>.row, <span class="sc">-</span>.chain, <span class="sc">-</span>.iteration)</span>
<span id="cb16-13"><a href="#cb16-13" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb16-14"><a href="#cb16-14" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(fitted_sp)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 6
context vowel speaker .draw F1 F2
<fct> <fct> <chr> <int> <dbl> <dbl>
1 oral i ral130 1 -1.00 1.03
2 oral i ral130 2 -0.986 1.03
3 oral i ral130 3 -1.00 1.00
4 oral i ral130 4 -0.964 0.944
5 oral i ral130 5 -0.962 0.971
6 oral i ral130 6 -1.10 1.03 </code></pre>
</div>
</div>
<p>Now we do the exact same thing, but this time, we exclude the random effects in order to draw from the distribution of the means for the <em>average</em> speaker.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb18"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>fitted_av <span class="ot"><-</span> av_preds <span class="sc">%>%</span> </span>
<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">add_epred_draws</span>(model, </span>
<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a> <span class="co"># re_formula, needs to be NA (which keeps it empty), rather </span></span>
<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a> <span class="co"># than NULL, which is the "default," i.e., complete random </span></span>
<span id="cb18-5"><a href="#cb18-5" aria-hidden="true" tabindex="-1"></a> <span class="co"># effects structure (including by-word random effects)</span></span>
<span id="cb18-6"><a href="#cb18-6" aria-hidden="true" tabindex="-1"></a> <span class="at">re_formula=</span><span class="cn">NA</span>, </span>
<span id="cb18-7"><a href="#cb18-7" aria-hidden="true" tabindex="-1"></a> <span class="at">ndraws=</span>num_draws) <span class="sc">%>%</span></span>
<span id="cb18-8"><a href="#cb18-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">%>%</span></span>
<span id="cb18-9"><a href="#cb18-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">pivot_wider</span>(<span class="at">names_from=</span><span class="st">".category"</span>, <span class="at">values_from=</span><span class="st">".epred"</span>) <span class="sc">%>%</span></span>
<span id="cb18-10"><a href="#cb18-10" aria-hidden="true" tabindex="-1"></a> dplyr<span class="sc">::</span><span class="fu">select</span>(<span class="sc">-</span>.row, <span class="sc">-</span>.chain, <span class="sc">-</span>.iteration)</span>
<span id="cb18-11"><a href="#cb18-11" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb18-12"><a href="#cb18-12" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(fitted_av)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 5
context vowel .draw F1 F2
<fct> <fct> <int> <dbl> <dbl>
1 oral i 1 -0.946 0.929
2 oral i 2 -0.883 0.895
3 oral i 3 -0.921 0.893
4 oral i 4 -0.905 0.773
5 oral i 5 -0.858 0.757
6 oral i 6 -0.892 0.819</code></pre>
</div>
</div>
<p>Each row in <code>fitted_sp</code> and <code>fitted_av</code> should be interpreted as a possible value of the mean. Because we have 100 such values, we can use this variation to estimate our uncertainty in an estimate of the mean (or of any quantity calculated from these dataframes).</p>
<p>Now, instead of simulating the means, let’s simulate the data. For this, we use <code>add_predicted_draws</code>, but aside from that, the procedure is the same as above. Here, we’re not actually using these two dataframes to calculate a measure of overlap, but these are the dataframes that are directly comparable to the empirical distributions, so we do want to look at them.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb20"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a>predicted_sp <span class="ot"><-</span> sp_preds <span class="sc">%>%</span> </span>
<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">add_predicted_draws</span>(model, </span>
<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a> <span class="at">re_formula =</span> <span class="sc">~</span>(<span class="dv">1</span><span class="sc">+</span>context<span class="sc">*</span>vowel<span class="sc">|</span>q<span class="sc">|</span>speaker),</span>
<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a> <span class="at">ndraws=</span>num_draws) <span class="sc">%>%</span></span>
<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">%>%</span></span>
<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">pivot_wider</span>(<span class="at">names_from=</span><span class="st">".category"</span>, <span class="at">values_from=</span><span class="st">".prediction"</span>) <span class="sc">%>%</span></span>
<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a> dplyr<span class="sc">::</span><span class="fu">select</span>(<span class="sc">-</span>.row, <span class="sc">-</span>.chain, <span class="sc">-</span>.iteration)</span>
<span id="cb20-8"><a href="#cb20-8" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb20-9"><a href="#cb20-9" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(predicted_sp)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 6
context vowel speaker .draw F1 F2
<fct> <fct> <chr> <int> <dbl> <dbl>
1 oral i ral130 1 -0.658 1.39
2 oral i ral130 2 -0.751 0.975
3 oral i ral130 3 -0.514 0.264
4 oral i ral130 4 -1.21 0.894
5 oral i ral130 5 -0.695 0.713
6 oral i ral130 6 -1.33 1.09 </code></pre>
</div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb22"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a>predicted_av <span class="ot"><-</span> av_preds <span class="sc">%>%</span> </span>
<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">add_predicted_draws</span>(model, </span>
<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a> <span class="at">re_formula=</span><span class="cn">NA</span>,</span>
<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a> <span class="at">ndraws=</span>num_draws) <span class="sc">%>%</span></span>
<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">%>%</span></span>
<span id="cb22-6"><a href="#cb22-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">pivot_wider</span>(<span class="at">names_from=</span><span class="st">".category"</span>, <span class="at">values_from=</span><span class="st">".prediction"</span>) <span class="sc">%>%</span></span>
<span id="cb22-7"><a href="#cb22-7" aria-hidden="true" tabindex="-1"></a> dplyr<span class="sc">::</span><span class="fu">select</span>(<span class="sc">-</span>.row, <span class="sc">-</span>.chain, <span class="sc">-</span>.iteration)</span>
<span id="cb22-8"><a href="#cb22-8" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb22-9"><a href="#cb22-9" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(predicted_sp)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 6
context vowel speaker .draw F1 F2
<fct> <fct> <chr> <int> <dbl> <dbl>
1 oral i ral130 1 -0.658 1.39
2 oral i ral130 2 -0.751 0.975
3 oral i ral130 3 -0.514 0.264
4 oral i ral130 4 -1.21 0.894
5 oral i ral130 5 -0.695 0.713
6 oral i ral130 6 -1.33 1.09 </code></pre>
</div>
</div>
<p>We need to make special dataframes for the Bhattacharyya affinity calculations because we want to be able to calculate uncertainty. Bhattacharyya affinity is calculated by first estimating the underlying distribution of the data and therefore takes multiple datapoints to estimate (the more, the better). If we used the <code>predicted_sp</code> and <code>predicted_av</code> dataframes to calculate BA, then we would only get one BA estimate per person, and would not be able to estimate uncertainty. Instead, the approach is to simulate multiple values from the posterior predictive distribution using a fixed value for each of the parameters, i.e., make multiple output values from a single draw. We still take multiple draws, and calculate Bhattacharyya affinity <em>for each draw</em>. To accomplish this, we pass in the same combination of predictor variables multiple times, since we get one of each output for each set of predictors that are passed in.</p>
<p>To start, let’s define a function for repeating a dataframe <code>df</code> <code>n_rep</code> times. We don’t need to do this, but it makes the code for getting the predicted dataframes cleaner.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb24"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a>rep_df <span class="ot"><-</span> <span class="cf">function</span>(df, n_reps)</span>
<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a>{</span>
<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a> df_repeated <span class="ot"><-</span> <span class="fu">do.call</span>(<span class="st">"rbind"</span>, <span class="fu">replicate</span>(n_reps, df, <span class="at">simplify =</span> <span class="cn">FALSE</span>))</span>
<span id="cb24-4"><a href="#cb24-4" aria-hidden="true" tabindex="-1"></a>}</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Because we only need these special repeated dataframes to calculate BA (which requires a distribution to compute), we just need to draw from posterior predictions, not from the fitted means. What we’re doing here is getting <code>num_points</code> (i.e., 25) predictions <em>for each draw</em>, i.e, we assume a fixed set of parameter values and then get 25 values from the posterior predicted distribution assuming those values. We do this <code>n_draws</code> times, which in the end will give us a <em>distribution</em> of possible BA values.</p>
<p>Now we can get the predicted draws just like we did before, with the additional step of repeating the input dataframe <code>num_points</code> times:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb25"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a>predicted_sp_BA <span class="ot"><-</span> sp_preds <span class="sc">%>%</span> </span>
<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a> <span class="co"># repeat the `sp_preds` dataframe `num_points` times </span></span>
<span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">rep_df</span>(num_points) <span class="sc">%>%</span></span>
<span id="cb25-4"><a href="#cb25-4" aria-hidden="true" tabindex="-1"></a> <span class="co"># now run `add_predicted_draws` on the repeated dataframe</span></span>
<span id="cb25-5"><a href="#cb25-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">add_predicted_draws</span>(model, </span>
<span id="cb25-6"><a href="#cb25-6" aria-hidden="true" tabindex="-1"></a> <span class="at">re_formula =</span> <span class="sc">~</span>(<span class="dv">1</span><span class="sc">+</span>context<span class="sc">*</span>vowel<span class="sc">|</span>q<span class="sc">|</span>speaker),</span>
<span id="cb25-7"><a href="#cb25-7" aria-hidden="true" tabindex="-1"></a> <span class="at">ndraws=</span>num_draws) <span class="sc">%>%</span></span>
<span id="cb25-8"><a href="#cb25-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">%>%</span></span>
<span id="cb25-9"><a href="#cb25-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">pivot_wider</span>(<span class="at">names_from=</span><span class="st">".category"</span>, <span class="at">values_from=</span><span class="st">".prediction"</span>) <span class="sc">%>%</span></span>
<span id="cb25-10"><a href="#cb25-10" aria-hidden="true" tabindex="-1"></a> <span class="co"># .row now contains meaningful information, but excluding it still doesn't </span></span>
<span id="cb25-11"><a href="#cb25-11" aria-hidden="true" tabindex="-1"></a> <span class="co"># cause problems</span></span>
<span id="cb25-12"><a href="#cb25-12" aria-hidden="true" tabindex="-1"></a> dplyr<span class="sc">::</span><span class="fu">select</span>(<span class="sc">-</span>.row, <span class="sc">-</span>.chain, <span class="sc">-</span>.iteration)</span>
<span id="cb25-13"><a href="#cb25-13" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb25-14"><a href="#cb25-14" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(predicted_sp_BA)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 6
context vowel speaker .draw F1 F2
<fct> <fct> <chr> <int> <dbl> <dbl>
1 oral i ral130 1 -0.744 0.396
2 oral i ral130 2 -0.980 1.47
3 oral i ral130 3 -0.750 0.583
4 oral i ral130 4 -1.10 1.15
5 oral i ral130 5 -1.43 0.180
6 oral i ral130 6 -1.14 1.22 </code></pre>
</div>
</div>
<p>Taking “BA draws” for the average speaker is likewise exactly analygous to what we have already done:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb27"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a>predicted_av_BA <span class="ot"><-</span> av_preds <span class="sc">%>%</span> </span>
<span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">rep_df</span>(num_points) <span class="sc">%>%</span></span>
<span id="cb27-3"><a href="#cb27-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">add_predicted_draws</span>(model, </span>
<span id="cb27-4"><a href="#cb27-4" aria-hidden="true" tabindex="-1"></a> <span class="at">re_formula=</span><span class="cn">NA</span>,</span>
<span id="cb27-5"><a href="#cb27-5" aria-hidden="true" tabindex="-1"></a> <span class="at">ndraws=</span>num_draws) <span class="sc">%>%</span></span>
<span id="cb27-6"><a href="#cb27-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">%>%</span></span>
<span id="cb27-7"><a href="#cb27-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">pivot_wider</span>(<span class="at">names_from=</span><span class="st">".category"</span>, <span class="at">values_from=</span><span class="st">".prediction"</span>) <span class="sc">%>%</span></span>
<span id="cb27-8"><a href="#cb27-8" aria-hidden="true" tabindex="-1"></a> dplyr<span class="sc">::</span><span class="fu">select</span>(<span class="sc">-</span>.row, <span class="sc">-</span>.chain, <span class="sc">-</span>.iteration)</span>
<span id="cb27-9"><a href="#cb27-9" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb27-10"><a href="#cb27-10" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(predicted_av_BA)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 5
context vowel .draw F1 F2
<fct> <fct> <int> <dbl> <dbl>
1 oral i 1 -0.107 0.323
2 oral i 2 -1.01 0.934
3 oral i 3 -1.01 0.883
4 oral i 4 -0.795 0.897
5 oral i 5 -0.358 1.59
6 oral i 6 -0.762 0.605</code></pre>
</div>
</div>
</section>
<section id="empirical-plots-revisited" class="level2">
<h2 class="anchored" data-anchor-id="empirical-plots-revisited">Empirical plots, revisited</h2>
<p>If you are not interested in the empirical plots, skip to the <a href="#uncertainty">discussion of uncertainty</a>.</p>
<p>Now that we have our simulated distributions, we can compare them to the empirical distributions. This is a good sanity check and may also reveal how the model “helps us.”</p>
<p>We’ll start by plotting the modelled distributions next to the empirical distributions (repeated from above).</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb29"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a>p_dist_modelled <span class="ot"><-</span> fitted_sp <span class="sc">%>%</span></span>
<span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%>%</span> </span>
<span id="cb29-3"><a href="#cb29-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarize</span>(<span class="at">F1=</span><span class="fu">mean</span>(F1), <span class="at">F2=</span><span class="fu">mean</span>(F2)) <span class="sc">%>%</span></span>
<span id="cb29-4"><a href="#cb29-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">%>%</span></span>
<span id="cb29-5"><a href="#cb29-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>F2, <span class="at">y=</span>F1, <span class="at">color=</span>context, <span class="at">shape=</span>vowel)) <span class="sc">+</span></span>
<span id="cb29-6"><a href="#cb29-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_point</span>(<span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb29-7"><a href="#cb29-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">stat_ellipse</span>(<span class="at">data=</span>predicted_sp, </span>
<span id="cb29-8"><a href="#cb29-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(speaker, vowel, context), <span class="at">lty=</span>vowel),</span>
<span id="cb29-9"><a href="#cb29-9" aria-hidden="true" tabindex="-1"></a> <span class="at">level=</span><span class="fl">0.66</span>, <span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb29-10"><a href="#cb29-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>(<span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(context, speaker)), <span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb29-11"><a href="#cb29-11" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_x_reverse</span>() <span class="sc">+</span> <span class="fu">scale_y_reverse</span>() <span class="sc">+</span></span>
<span id="cb29-12"><a href="#cb29-12" aria-hidden="true" tabindex="-1"></a> <span class="fu">coord_cartesian</span>(<span class="at">xlim=</span><span class="fu">c</span>(<span class="dv">2</span>, <span class="sc">-</span><span class="dv">2</span>), <span class="at">ylim=</span><span class="fu">c</span>(<span class="dv">2</span>, <span class="sc">-</span><span class="dv">2</span>), <span class="at">clip=</span><span class="st">'off'</span>) <span class="sc">+</span></span>
<span id="cb29-13"><a href="#cb29-13" aria-hidden="true" tabindex="-1"></a> <span class="fu">labs</span>(<span class="at">title=</span><span class="st">'modelled'</span>) <span class="sc">+</span></span>
<span id="cb29-14"><a href="#cb29-14" aria-hidden="true" tabindex="-1"></a> <span class="fu">facet_wrap</span>(<span class="sc">~</span>speaker, <span class="at">ncol=</span><span class="dv">2</span>) <span class="sc">+</span></span>
<span id="cb29-15"><a href="#cb29-15" aria-hidden="true" tabindex="-1"></a> <span class="fu">theme</span>(<span class="at">aspect.ratio=</span><span class="dv">1</span>)</span>
<span id="cb29-16"><a href="#cb29-16" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb29-17"><a href="#cb29-17" aria-hidden="true" tabindex="-1"></a>p_dist_empirical <span class="ot"><-</span> raleigh_data <span class="sc">%>%</span></span>
<span id="cb29-18"><a href="#cb29-18" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%>%</span> </span>
<span id="cb29-19"><a href="#cb29-19" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarize</span>(<span class="fu">across</span>(<span class="fu">c</span>(F1, F2), mean)) <span class="sc">%>%</span></span>
<span id="cb29-20"><a href="#cb29-20" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">%>%</span></span>
<span id="cb29-21"><a href="#cb29-21" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>F2, <span class="at">y=</span>F1, <span class="at">color=</span>context, <span class="at">shape=</span>vowel)) <span class="sc">+</span></span>
<span id="cb29-22"><a href="#cb29-22" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb29-23"><a href="#cb29-23" aria-hidden="true" tabindex="-1"></a> <span class="fu">stat_ellipse</span>(<span class="at">data=</span>raleigh_data, </span>
<span id="cb29-24"><a href="#cb29-24" aria-hidden="true" tabindex="-1"></a> <span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(speaker, vowel, context), <span class="at">lty=</span>vowel),</span>
<span id="cb29-25"><a href="#cb29-25" aria-hidden="true" tabindex="-1"></a> <span class="at">level=</span><span class="fl">0.66</span>) <span class="sc">+</span></span>
<span id="cb29-26"><a href="#cb29-26" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>(<span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(context, speaker))) <span class="sc">+</span></span>
<span id="cb29-27"><a href="#cb29-27" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_x_reverse</span>() <span class="sc">+</span> <span class="fu">scale_y_reverse</span>() <span class="sc">+</span></span>
<span id="cb29-28"><a href="#cb29-28" aria-hidden="true" tabindex="-1"></a> <span class="fu">coord_cartesian</span>(<span class="at">xlim=</span><span class="fu">c</span>(<span class="dv">2</span>, <span class="sc">-</span><span class="dv">2</span>), <span class="at">ylim=</span><span class="fu">c</span>(<span class="dv">2</span>, <span class="sc">-</span><span class="dv">2</span>), <span class="at">clip=</span><span class="st">'off'</span>) <span class="sc">+</span></span>
<span id="cb29-29"><a href="#cb29-29" aria-hidden="true" tabindex="-1"></a> <span class="fu">facet_wrap</span>(<span class="sc">~</span>speaker, <span class="at">ncol=</span><span class="dv">2</span>) <span class="sc">+</span></span>
<span id="cb29-30"><a href="#cb29-30" aria-hidden="true" tabindex="-1"></a> <span class="fu">labs</span>(<span class="at">title=</span><span class="st">'empirical'</span>) <span class="sc">+</span></span>
<span id="cb29-31"><a href="#cb29-31" aria-hidden="true" tabindex="-1"></a> <span class="fu">theme</span>(<span class="at">axis.title.y=</span><span class="fu">element_blank</span>(),</span>
<span id="cb29-32"><a href="#cb29-32" aria-hidden="true" tabindex="-1"></a> <span class="at">aspect.ratio=</span><span class="dv">1</span>)</span>
<span id="cb29-33"><a href="#cb29-33" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb29-34"><a href="#cb29-34" aria-hidden="true" tabindex="-1"></a>p_dist_modelled <span class="sc">+</span> p_dist_empirical</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="MMO_tutorial_files/figure-html/modelled_centered-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>Notice that all of the ellipses in the modelled distributions have roughly the same size and orientation: this is because, with the way the model that fit them is defined, we have fit a single variance-covariance matrix. If we want variability to be allowed to vary across speakers, contexts, and vowels (which we probably do), we have to model variance too. An example of how to do this is in the scripts ending in “_exp” in the Interspeech2024/model_fitting folder.</p>
<p>We can also plot the by-speaker interactions, like we did for the empirical data:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb30"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a>p_F1_interaction_modelled <span class="ot"><-</span> fitted_sp <span class="sc">%>%</span></span>
<span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%>%</span> </span>
<span id="cb30-3"><a href="#cb30-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarize</span>(<span class="at">F1=</span><span class="fu">mean</span>(F1)) <span class="sc">%>%</span></span>
<span id="cb30-4"><a href="#cb30-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">%>%</span></span>
<span id="cb30-5"><a href="#cb30-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>vowel, <span class="at">y=</span>F1, <span class="at">color=</span>context)) <span class="sc">+</span></span>
<span id="cb30-6"><a href="#cb30-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_point</span>(<span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb30-7"><a href="#cb30-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_y_reverse</span>() <span class="sc">+</span></span>
<span id="cb30-8"><a href="#cb30-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">labs</span>(<span class="at">title=</span><span class="st">'modelled'</span>) <span class="sc">+</span></span>
<span id="cb30-9"><a href="#cb30-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>(<span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(context, speaker)), <span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb30-10"><a href="#cb30-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">ylim</span>(<span class="fl">0.2</span>, <span class="sc">-</span><span class="fl">1.2</span>)</span>
<span id="cb30-11"><a href="#cb30-11" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb30-12"><a href="#cb30-12" aria-hidden="true" tabindex="-1"></a>p_F1_interaction_empirical <span class="ot"><-</span> raleigh_data <span class="sc">%>%</span></span>
<span id="cb30-13"><a href="#cb30-13" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%>%</span> </span>
<span id="cb30-14"><a href="#cb30-14" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarize</span>(<span class="at">F1=</span><span class="fu">mean</span>(F1)) <span class="sc">%>%</span></span>
<span id="cb30-15"><a href="#cb30-15" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">%>%</span></span>
<span id="cb30-16"><a href="#cb30-16" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>vowel, <span class="at">y=</span>F1, <span class="at">color=</span>context)) <span class="sc">+</span></span>
<span id="cb30-17"><a href="#cb30-17" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb30-18"><a href="#cb30-18" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_y_reverse</span>() <span class="sc">+</span></span>
<span id="cb30-19"><a href="#cb30-19" aria-hidden="true" tabindex="-1"></a> <span class="fu">labs</span>(<span class="at">title=</span><span class="st">'empirical'</span>) <span class="sc">+</span></span>
<span id="cb30-20"><a href="#cb30-20" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>(<span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(context, speaker))) <span class="sc">+</span></span>
<span id="cb30-21"><a href="#cb30-21" aria-hidden="true" tabindex="-1"></a> <span class="fu">ylim</span>(<span class="fl">0.2</span>, <span class="sc">-</span><span class="fl">1.2</span>) <span class="sc">+</span></span>
<span id="cb30-22"><a href="#cb30-22" aria-hidden="true" tabindex="-1"></a> <span class="fu">theme</span>(<span class="at">axis.title.y=</span><span class="fu">element_blank</span>())</span>
<span id="cb30-23"><a href="#cb30-23" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb30-24"><a href="#cb30-24" aria-hidden="true" tabindex="-1"></a>p_F2_interaction_modelled <span class="ot"><-</span> fitted_sp <span class="sc">%>%</span></span>
<span id="cb30-25"><a href="#cb30-25" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%>%</span> </span>
<span id="cb30-26"><a href="#cb30-26" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarize</span>(<span class="at">F2=</span><span class="fu">mean</span>(F2)) <span class="sc">%>%</span></span>
<span id="cb30-27"><a href="#cb30-27" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">%>%</span></span>
<span id="cb30-28"><a href="#cb30-28" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>vowel, <span class="at">y=</span>F2, <span class="at">color=</span>context, <span class="at">fill=</span>context)) <span class="sc">+</span></span>
<span id="cb30-29"><a href="#cb30-29" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_point</span>(<span class="at">shape=</span><span class="dv">24</span>, <span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb30-30"><a href="#cb30-30" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_y_reverse</span>() <span class="sc">+</span></span>
<span id="cb30-31"><a href="#cb30-31" aria-hidden="true" tabindex="-1"></a> <span class="co"># labs(title='modelled') +</span></span>
<span id="cb30-32"><a href="#cb30-32" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>(<span class="at">lty=</span><span class="dv">2</span>, <span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(context, speaker)),</span>
<span id="cb30-33"><a href="#cb30-33" aria-hidden="true" tabindex="-1"></a> <span class="at">show.legend=</span><span class="cn">FALSE</span>) <span class="sc">+</span></span>
<span id="cb30-34"><a href="#cb30-34" aria-hidden="true" tabindex="-1"></a> <span class="fu">ylim</span>(<span class="fl">1.5</span>, <span class="fl">0.3</span>)</span>
<span id="cb30-35"><a href="#cb30-35" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb30-36"><a href="#cb30-36" aria-hidden="true" tabindex="-1"></a>p_F2_interaction_empirical <span class="ot"><-</span> raleigh_data <span class="sc">%>%</span></span>
<span id="cb30-37"><a href="#cb30-37" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%>%</span> </span>
<span id="cb30-38"><a href="#cb30-38" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarize</span>(<span class="at">F2=</span><span class="fu">mean</span>(F2)) <span class="sc">%>%</span></span>
<span id="cb30-39"><a href="#cb30-39" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">%>%</span></span>
<span id="cb30-40"><a href="#cb30-40" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>vowel, <span class="at">y=</span>F2, <span class="at">color=</span>context, <span class="at">fill=</span>context)) <span class="sc">+</span></span>
<span id="cb30-41"><a href="#cb30-41" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_point</span>(<span class="at">shape=</span><span class="dv">24</span>) <span class="sc">+</span></span>
<span id="cb30-42"><a href="#cb30-42" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_y_reverse</span>() <span class="sc">+</span></span>
<span id="cb30-43"><a href="#cb30-43" aria-hidden="true" tabindex="-1"></a> <span class="co"># labs(title='empirical') +</span></span>
<span id="cb30-44"><a href="#cb30-44" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>(<span class="at">lty=</span><span class="dv">2</span>, <span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(context, speaker))) <span class="sc">+</span></span>
<span id="cb30-45"><a href="#cb30-45" aria-hidden="true" tabindex="-1"></a> <span class="fu">ylim</span>(<span class="fl">1.5</span>, <span class="fl">0.3</span>) <span class="sc">+</span></span>
<span id="cb30-46"><a href="#cb30-46" aria-hidden="true" tabindex="-1"></a> <span class="fu">theme</span>(<span class="at">axis.title.y=</span><span class="fu">element_blank</span>())</span>
<span id="cb30-47"><a href="#cb30-47" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb30-48"><a href="#cb30-48" aria-hidden="true" tabindex="-1"></a>(p_F1_interaction_modelled <span class="sc">+</span> p_F1_interaction_empirical) <span class="sc">/</span></span>
<span id="cb30-49"><a href="#cb30-49" aria-hidden="true" tabindex="-1"></a> (p_F2_interaction_modelled <span class="sc">+</span> p_F2_interaction_empirical)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="MMO_tutorial_files/figure-html/F1_interaction_sp-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<section id="uncertainty" class="level3">
<h3 class="anchored" data-anchor-id="uncertainty">Uncertainty</h3>
<p>Now, we’ll make some plots that we couldn’t make from the empirical data: plots showing uncertainty!</p>
<p>Let’s make a dataframe with an estimate of the mean F1 and F2, along with 95% credible intervals. <code>tidybayes</code> has a useful set of functions for doing this: we’ll use <code>mean_qi()</code>, which gives you the mean and 95% quantile intervals (unless otherwise specified), but there are other options (any combination of mean, median, or mode, plus qi (quantile interval), hdi (highest-density interval), or hdci (highest-density continuous interval), e.g., <code>mode_hdci()</code>). For estimating uncertainty (in the form of 95% credible intervals), we will default to using <code>mean_qi()</code> in this tutorial.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb31"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a>sp_estimates <span class="ot"><-</span> fitted_sp <span class="sc">%>%</span></span>
<span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(speaker, vowel, context) <span class="sc">%>%</span></span>
<span id="cb31-3"><a href="#cb31-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">mean_qi</span>() <span class="sc">%>%</span> </span>
<span id="cb31-4"><a href="#cb31-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>()</span>
<span id="cb31-5"><a href="#cb31-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb31-6"><a href="#cb31-6" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(sp_estimates)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 12
speaker vowel context F1 F1.lower F1.upper F2 F2.lower F2.upper .width
<chr> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ral130 i oral -1.02 -1.14 -0.895 0.985 0.874 1.12 0.95
2 ral130 i nasal -0.534 -0.674 -0.398 1.14 0.986 1.30 0.95
3 ral130 e oral -0.0294 -0.125 0.102 0.672 0.535 0.816 0.95
4 ral130 e nasal -0.0957 -0.195 0.0147 0.748 0.626 0.891 0.95
5 ral135 i oral -0.961 -1.07 -0.858 0.886 0.741 0.991 0.95
6 ral135 i nasal -0.798 -0.928 -0.696 0.964 0.801 1.12 0.95
# ℹ 2 more variables: .point <chr>, .interval <chr></code></pre>
</div>
</div>
<p>Now we can use this dataframe to show uncertainty. For example, we can plot errorbars on the by-speaker vowel-by-context estimates</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb33"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a>sp_estimates <span class="sc">%>%</span></span>
<span id="cb33-2"><a href="#cb33-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>vowel, <span class="at">color=</span>context, <span class="at">fill=</span>context)) <span class="sc">+</span></span>
<span id="cb33-3"><a href="#cb33-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_point</span>(<span class="at">shape=</span><span class="dv">21</span>, <span class="fu">aes</span>(<span class="at">y=</span>F1)) <span class="sc">+</span></span>
<span id="cb33-4"><a href="#cb33-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_errorbar</span>(<span class="fu">aes</span>(<span class="at">ymin=</span>F1.lower, <span class="at">ymax=</span>F1.upper)) <span class="sc">+</span></span>
<span id="cb33-5"><a href="#cb33-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>(<span class="at">lty=</span><span class="dv">1</span>, <span class="fu">aes</span>(<span class="at">y=</span>F1, <span class="at">group=</span>context)) <span class="sc">+</span></span>
<span id="cb33-6"><a href="#cb33-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">facet_wrap</span>(<span class="sc">~</span>speaker)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="MMO_tutorial_files/figure-html/unnamed-chunk-3-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>Another way to visualize the uncertainty is to plot all of the mean draws together, like so:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb34"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a>fitted_sp <span class="sc">%>%</span></span>
<span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>vowel, <span class="at">y=</span>F1, <span class="at">color=</span>context)) <span class="sc">+</span></span>
<span id="cb34-3"><a href="#cb34-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>(<span class="fu">aes</span>(<span class="at">group=</span><span class="fu">interaction</span>(context, .draw)), <span class="at">alpha=</span><span class="fl">0.25</span>) <span class="sc">+</span> </span>
<span id="cb34-4"><a href="#cb34-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">facet_wrap</span>(<span class="sc">~</span>speaker)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="MMO_tutorial_files/figure-html/unnamed-chunk-4-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>The good thing about this way of viewing uncertainty is that it demonstrates that the variability is actually <em>less</em> than what just the errorbars show, i.e., the draws as plotted here vary mostly in their intercepts, not their slopes.</p>
</section>
</section>
<section id="Calculate_Overlap" class="level2">
<h2 class="anchored" data-anchor-id="Calculate_Overlap">Overlap calculations</h2>
<p>Now that we have predicted and fitted dataframes that “look like” the raw data, we can calculate any measure of overlap we like.</p>
<p>Let’s define a function to calculate Bhattacharyya affinity. For further explanation, see <a href="https://joeystanley.com/blog/a-tutorial-in-calculating-vowel-overlap/">Joey Stanley’s thorough tutorial</a>.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb35"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a>bhatt <span class="ot"><-</span> <span class="cf">function</span> (F1, F2, vowel) </span>
<span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a>{</span>
<span id="cb35-3"><a href="#cb35-3" aria-hidden="true" tabindex="-1"></a> vowel_data <span class="ot"><-</span> <span class="fu">droplevels</span>(<span class="fu">data.frame</span>(vowel))</span>
<span id="cb35-4"><a href="#cb35-4" aria-hidden="true" tabindex="-1"></a> </span>
<span id="cb35-5"><a href="#cb35-5" aria-hidden="true" tabindex="-1"></a> sp_df <span class="ot"><-</span> <span class="fu">tryCatch</span>(</span>
<span id="cb35-6"><a href="#cb35-6" aria-hidden="true" tabindex="-1"></a> <span class="at">expr=</span><span class="fu">SpatialPointsDataFrame</span>(<span class="fu">na.omit</span>(<span class="fu">cbind</span>(F1, F2)), vowel_data), </span>
<span id="cb35-7"><a href="#cb35-7" aria-hidden="true" tabindex="-1"></a> <span class="at">error=</span><span class="cf">function</span>(e){<span class="cn">NA</span>})</span>
<span id="cb35-8"><a href="#cb35-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">tryCatch</span>(</span>
<span id="cb35-9"><a href="#cb35-9" aria-hidden="true" tabindex="-1"></a> <span class="at">expr =</span> {<span class="fu">kerneloverlap</span>(sp_df, <span class="at">method=</span><span class="st">'BA'</span>)[<span class="dv">1</span>,<span class="dv">2</span>]}, </span>
<span id="cb35-10"><a href="#cb35-10" aria-hidden="true" tabindex="-1"></a> <span class="at">error =</span> <span class="cf">function</span>(e){<span class="cn">NA</span>})</span>
<span id="cb35-11"><a href="#cb35-11" aria-hidden="true" tabindex="-1"></a>}</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>First, let’s calculate BA from the empirical distributions:</p>
<div class="cell" data-hash="MMO_tutorial_cache/html/bhatt_calc_emp_e1d2188da6b2328867549b1e3b59a835">
<div class="sourceCode cell-code" id="cb36"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a>BA_emp_sp <span class="ot"><-</span> raleigh_data <span class="sc">%>%</span></span>
<span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(speaker, context) <span class="sc">%>%</span> <span class="co"># calcualte for each speaker and context</span></span>
<span id="cb36-3"><a href="#cb36-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(<span class="at">bhatt_aff =</span> <span class="fu">bhatt</span>(F1, F2, vowel)) <span class="sc">%>%</span></span>
<span id="cb36-4"><a href="#cb36-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>()</span>
<span id="cb36-5"><a href="#cb36-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb36-6"><a href="#cb36-6" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(BA_emp_sp)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 3
speaker context bhatt_aff
<chr> <fct> <dbl>
1 ral130 oral 0.519
2 ral130 nasal 0.812
3 ral135 oral 0.608
4 ral135 nasal 0.861
5 ral139 oral 0.697
6 ral139 nasal 0.904</code></pre>
</div>
</div>
<p>Notice that we have gotten rid of the <code>vowel</code> column: because we are calculating the overlap <em>between vowels</em>, we will always lose the vowel column when we calculate overlap.</p>
<p>We calculate the by-speaker modelled BA in much the same way:</p>
<div class="cell" data-hash="MMO_tutorial_cache/html/bhatt_calc_sp_eeea4d20c2b977442c6911d026fa3e22">
<div class="sourceCode cell-code" id="cb38"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a>BA_mod_sp <span class="ot"><-</span> predicted_sp_BA <span class="sc">%>%</span></span>
<span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(speaker, context, .draw) <span class="sc">%>%</span> <span class="co"># calculate for each speaker, context, AND draw</span></span>
<span id="cb38-3"><a href="#cb38-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(<span class="at">bhatt_aff =</span> <span class="fu">bhatt</span>(F1, F2, vowel)) <span class="sc">%>%</span></span>
<span id="cb38-4"><a href="#cb38-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>()</span>
<span id="cb38-5"><a href="#cb38-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb38-6"><a href="#cb38-6" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(BA_mod_sp)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 4
speaker context .draw bhatt_aff
<chr> <fct> <int> <dbl>
1 ral130 oral 1 0.361
2 ral130 oral 2 0.558
3 ral130 oral 3 0.524
4 ral130 oral 4 0.603
5 ral130 oral 5 0.432
6 ral130 oral 6 0.495</code></pre>
</div>
</div>
<p>And finally, the average-speaker modelled BA:</p>
<div class="cell" data-hash="MMO_tutorial_cache/html/bhatt_calc_av_0e577543a046a15151ac690568aea1df">
<div class="sourceCode cell-code" id="cb40"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a>BA_mod_av <span class="ot"><-</span> predicted_av_BA <span class="sc">%>%</span></span>
<span id="cb40-2"><a href="#cb40-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(context, .draw) <span class="sc">%>%</span> <span class="co"># calculate for each context draw</span></span>
<span id="cb40-3"><a href="#cb40-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(<span class="at">bhatt_aff =</span> <span class="fu">bhatt</span>(F1, F2, vowel)) <span class="sc">%>%</span></span>
<span id="cb40-4"><a href="#cb40-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>()</span>
<span id="cb40-5"><a href="#cb40-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb40-6"><a href="#cb40-6" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(BA_mod_av)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 3
context .draw bhatt_aff
<fct> <int> <dbl>
1 oral 1 0.685
2 oral 2 0.587
3 oral 3 0.562
4 oral 4 0.648
5 oral 5 0.482
6 oral 6 0.690</code></pre>
</div>
</div>
<p>For calculating Euclidean distance, we need to take a slightly different approach for the empirical distributions than for the modelled distributions. Let’s do the empirical distributions first:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb42"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a>ED_emp_sp <span class="ot"><-</span> raleigh_data <span class="sc">%>%</span></span>
<span id="cb42-2"><a href="#cb42-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(speaker, context, vowel) <span class="sc">%>%</span></span>
<span id="cb42-3"><a href="#cb42-3" aria-hidden="true" tabindex="-1"></a> <span class="co"># first, find the average for each speaker, context, vowel pair</span></span>
<span id="cb42-4"><a href="#cb42-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(<span class="at">F1=</span><span class="fu">mean</span>(F1),</span>
<span id="cb42-5"><a href="#cb42-5" aria-hidden="true" tabindex="-1"></a> <span class="at">F2=</span><span class="fu">mean</span>(F2)) <span class="sc">%>%</span></span>
<span id="cb42-6"><a href="#cb42-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">%>%</span></span>
<span id="cb42-7"><a href="#cb42-7" aria-hidden="true" tabindex="-1"></a> <span class="co"># reshape the dataframe so that "i" and "e" are in separate columns</span></span>
<span id="cb42-8"><a href="#cb42-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">pivot_wider</span>(<span class="at">names_from=</span><span class="st">"vowel"</span>, <span class="at">values_from=</span><span class="fu">c</span>(<span class="st">"F1"</span>, <span class="st">"F2"</span>)) <span class="sc">%>%</span></span>
<span id="cb42-9"><a href="#cb42-9" aria-hidden="true" tabindex="-1"></a> <span class="co"># now apply the pythagorean theorem, which defines Euclidean distance</span></span>
<span id="cb42-10"><a href="#cb42-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">eucl_dist =</span> <span class="fu">sqrt</span>((F1_i <span class="sc">-</span> F1_e)<span class="sc">^</span><span class="dv">2</span> <span class="sc">+</span> (F2_i <span class="sc">-</span> F2_e)<span class="sc">^</span><span class="dv">2</span>))</span>
<span id="cb42-11"><a href="#cb42-11" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb42-12"><a href="#cb42-12" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(ED_emp_sp)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 7
speaker context F1_i F1_e F2_i F2_e eucl_dist
<chr> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ral130 oral -1.04 -0.145 1.07 0.710 0.967
2 ral130 nasal -0.667 -0.135 1.34 0.779 0.772
3 ral135 oral -1.04 -0.154 0.902 0.667 0.919
4 ral135 nasal -0.949 -0.513 1.15 0.989 0.464
5 ral139 oral -0.819 -0.144 0.821 0.467 0.762
6 ral139 nasal -0.528 -0.183 1.01 0.703 0.459</code></pre>
</div>
</div>
<p>This leaves us with “vestigial” formant-by-vowel columns: they’re not hurting anyone, and they could be of interest, so I’ll leave them in.</p>
<p>We calculate modelled ED from the “fitted” modelled predictions: we calculate one value from each draw. We don’t do any averaging, because each draw is from the distribution of the mean itself.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb44"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb44-1"><a href="#cb44-1" aria-hidden="true" tabindex="-1"></a>ED_mod_sp <span class="ot"><-</span> fitted_sp <span class="sc">%>%</span></span>
<span id="cb44-2"><a href="#cb44-2" aria-hidden="true" tabindex="-1"></a> <span class="co"># reshape the dataframe so that "i" and "e" are in separate columns</span></span>
<span id="cb44-3"><a href="#cb44-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">pivot_wider</span>(<span class="at">names_from=</span><span class="st">"vowel"</span>, <span class="at">values_from=</span><span class="fu">c</span>(<span class="st">"F1"</span>, <span class="st">"F2"</span>)) <span class="sc">%>%</span></span>
<span id="cb44-4"><a href="#cb44-4" aria-hidden="true" tabindex="-1"></a> <span class="co"># now apply the pythagorean theorem, which defines Euclidean distance</span></span>
<span id="cb44-5"><a href="#cb44-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">eucl_dist =</span> <span class="fu">sqrt</span>((F1_i <span class="sc">-</span> F1_e)<span class="sc">^</span><span class="dv">2</span> <span class="sc">+</span> (F2_i <span class="sc">-</span> F2_e)<span class="sc">^</span><span class="dv">2</span>))</span>
<span id="cb44-6"><a href="#cb44-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb44-7"><a href="#cb44-7" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(ED_mod_sp)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 8
context speaker .draw F1_i F1_e F2_i F2_e eucl_dist
<fct> <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
1 oral ral130 1 -1.00 -0.159 1.03 0.724 0.898
2 oral ral130 2 -0.986 -0.0643 1.03 0.610 1.01
3 oral ral130 3 -1.00 0.0147 1.00 0.678 1.07
4 oral ral130 4 -0.964 -0.0113 0.944 0.723 0.978
5 oral ral130 5 -0.962 0.0446 0.971 0.682 1.05
6 oral ral130 6 -1.10 -0.0247 1.03 0.678 1.14 </code></pre>
</div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb46"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb46-1"><a href="#cb46-1" aria-hidden="true" tabindex="-1"></a>ED_mod_av <span class="ot"><-</span> fitted_av <span class="sc">%>%</span></span>
<span id="cb46-2"><a href="#cb46-2" aria-hidden="true" tabindex="-1"></a> <span class="co"># reshape the dataframe so that "i" and "e" are in separate columns</span></span>
<span id="cb46-3"><a href="#cb46-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">pivot_wider</span>(<span class="at">names_from=</span><span class="st">"vowel"</span>, <span class="at">values_from=</span><span class="fu">c</span>(<span class="st">"F1"</span>, <span class="st">"F2"</span>)) <span class="sc">%>%</span></span>
<span id="cb46-4"><a href="#cb46-4" aria-hidden="true" tabindex="-1"></a> <span class="co"># now apply the pythagorean theorem, which defines Euclidean distance</span></span>
<span id="cb46-5"><a href="#cb46-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">eucl_dist =</span> <span class="fu">sqrt</span>((F1_i <span class="sc">-</span> F1_e)<span class="sc">^</span><span class="dv">2</span> <span class="sc">+</span> (F2_i <span class="sc">-</span> F2_e)<span class="sc">^</span><span class="dv">2</span>))</span>
<span id="cb46-6"><a href="#cb46-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb46-7"><a href="#cb46-7" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(ED_mod_av)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 7
context .draw F1_i F1_e F2_i F2_e eucl_dist
<fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
1 oral 1 -0.946 -0.0650 0.929 0.705 0.909
2 oral 2 -0.883 -0.0781 0.895 0.668 0.836
3 oral 3 -0.921 -0.0361 0.893 0.542 0.952
4 oral 4 -0.905 -0.0409 0.773 0.509 0.904
5 oral 5 -0.858 0.0152 0.757 0.573 0.892
6 oral 6 -0.892 -0.0157 0.819 0.521 0.926</code></pre>
</div>
</div>
<p>We have now calculated the overlap (or distance) in the form of Bhattacharyya affinity and Euclidean distance, from both the empirical and modelled distributions, for each individual speaker and for the “average speaker” (modelled only). From the modelled distributions, we have calculated 100 different values, each from a different draw from the posterior, for each speaker and context. This gives us <em>distributions</em> for BA and ED.</p>
<p>Now that we have by-speaker distributions of BA, let’s calculate the by-speaker confidence intervals (CIs) for prenasal and preoral BA. We can also calculate the difference between prenasal and preoral BA, and the CI for the difference. To do this, we first take the difference by draw, then calculate the CI based on the distribution of the difference, just like for the BA values.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb48"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb48-1"><a href="#cb48-1" aria-hidden="true" tabindex="-1"></a>BA_mod_sp_wide <span class="ot"><-</span> BA_mod_sp <span class="sc">%>%</span></span>
<span id="cb48-2"><a href="#cb48-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">pivot_wider</span>(<span class="at">names_from=</span>context, <span class="at">values_from=</span>bhatt_aff) <span class="sc">%>%</span></span>
<span id="cb48-3"><a href="#cb48-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">diff=</span>nasal<span class="sc">-</span>oral) <span class="sc">%>%</span></span>
<span id="cb48-4"><a href="#cb48-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(speaker) <span class="sc">%>%</span></span>
<span id="cb48-5"><a href="#cb48-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">mean_qi</span>()</span>
<span id="cb48-6"><a href="#cb48-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb48-7"><a href="#cb48-7" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(BA_mod_sp_wide)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 13
speaker oral oral.lower oral.upper nasal nasal.lower nasal.upper diff
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ral130 0.523 0.293 0.689 0.768 0.572 0.910 0.246
2 ral135 0.573 0.414 0.727 0.877 0.751 0.948 0.304
3 ral139 0.596 0.387 0.770 0.873 0.769 0.950 0.278
4 ral216 0.618 0.384 0.816 0.904 0.796 0.956 0.286
5 ral303 0.616 0.400 0.804 0.812 0.631 0.935 0.196
6 ral342 0.585 0.348 0.792 0.877 0.736 0.955 0.292
# ℹ 5 more variables: diff.lower <dbl>, diff.upper <dbl>, .width <dbl>,
# .point <chr>, .interval <chr></code></pre>
</div>
</div>
<p>We can also pivot the empirical dataframe for easier plotting (below) and calculate the difference, but there is not a clear way to calculate confidence intervals.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb50"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb50-1"><a href="#cb50-1" aria-hidden="true" tabindex="-1"></a>BA_emp_sp_wide <span class="ot"><-</span> BA_emp_sp <span class="sc">%>%</span></span>
<span id="cb50-2"><a href="#cb50-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">pivot_wider</span>(<span class="at">names_from=</span>context, <span class="at">values_from=</span>bhatt_aff) <span class="sc">%>%</span></span>
<span id="cb50-3"><a href="#cb50-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">diff=</span>nasal<span class="sc">-</span>oral)</span>
<span id="cb50-4"><a href="#cb50-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb50-5"><a href="#cb50-5" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(BA_emp_sp_wide)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 4
speaker oral nasal diff
<chr> <dbl> <dbl> <dbl>
1 ral130 0.519 0.812 0.293
2 ral135 0.608 0.861 0.253
3 ral139 0.697 0.904 0.206
4 ral216 0.689 0.887 0.198
5 ral303 0.765 0.890 0.125
6 ral342 0.663 0.875 0.212</code></pre>
</div>
</div>
<p>Now let’s do the same thing for modelled and empirical Euclidean distance:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb52"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb52-1"><a href="#cb52-1" aria-hidden="true" tabindex="-1"></a>ED_mod_sp_wide <span class="ot"><-</span> ED_mod_sp <span class="sc">%>%</span></span>
<span id="cb52-2"><a href="#cb52-2" aria-hidden="true" tabindex="-1"></a> dplyr<span class="sc">::</span><span class="fu">select</span>(speaker, context, eucl_dist, .draw) <span class="sc">%>%</span></span>
<span id="cb52-3"><a href="#cb52-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">pivot_wider</span>(<span class="at">names_from=</span>context, <span class="at">values_from=</span>eucl_dist) <span class="sc">%>%</span></span>
<span id="cb52-4"><a href="#cb52-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">diff=</span>nasal<span class="sc">-</span>oral) <span class="sc">%>%</span></span>
<span id="cb52-5"><a href="#cb52-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(speaker) <span class="sc">%>%</span></span>
<span id="cb52-6"><a href="#cb52-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">mean_qi</span>()</span>
<span id="cb52-7"><a href="#cb52-7" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb52-8"><a href="#cb52-8" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(ED_mod_sp_wide)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 13
speaker oral oral.lower oral.upper nasal nasal.lower nasal.upper diff
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ral130 1.04 0.852 1.22 0.597 0.435 0.842 -0.441
2 ral135 0.974 0.832 1.11 0.304 0.135 0.492 -0.670
3 ral139 0.924 0.729 1.06 0.359 0.141 0.564 -0.565
4 ral216 0.866 0.666 1.01 0.228 0.0582 0.416 -0.638
5 ral303 0.885 0.676 1.08 0.502 0.266 0.711 -0.383
6 ral342 0.923 0.795 1.05 0.331 0.102 0.551 -0.592
# ℹ 5 more variables: diff.lower <dbl>, diff.upper <dbl>, .width <dbl>,
# .point <chr>, .interval <chr></code></pre>
</div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb54"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb54-1"><a href="#cb54-1" aria-hidden="true" tabindex="-1"></a>ED_emp_sp_wide <span class="ot"><-</span> ED_emp_sp <span class="sc">%>%</span></span>
<span id="cb54-2"><a href="#cb54-2" aria-hidden="true" tabindex="-1"></a> dplyr<span class="sc">::</span><span class="fu">select</span>(speaker, context, eucl_dist) <span class="sc">%>%</span></span>
<span id="cb54-3"><a href="#cb54-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">pivot_wider</span>(<span class="at">names_from=</span>context, <span class="at">values_from=</span>eucl_dist) <span class="sc">%>%</span></span>
<span id="cb54-4"><a href="#cb54-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">diff=</span>nasal<span class="sc">-</span>oral)</span>
<span id="cb54-5"><a href="#cb54-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb54-6"><a href="#cb54-6" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(ED_emp_sp_wide)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 4
speaker oral nasal diff
<chr> <dbl> <dbl> <dbl>
1 ral130 0.967 0.772 -0.195
2 ral135 0.919 0.464 -0.455
3 ral139 0.762 0.459 -0.303
4 ral216 0.729 0.494 -0.236
5 ral303 0.894 0.546 -0.349
6 ral342 0.890 0.482 -0.408</code></pre>
</div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb56"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb56-1"><a href="#cb56-1" aria-hidden="true" tabindex="-1"></a>p_BA_mod <span class="ot"><-</span> BA_mod_sp_wide <span class="sc">%>%</span></span>
<span id="cb56-2"><a href="#cb56-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>oral, <span class="at">y=</span>nasal, <span class="at">color=</span>speaker)) <span class="sc">+</span></span>
<span id="cb56-3"><a href="#cb56-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb56-4"><a href="#cb56-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">xlim</span>(<span class="dv">0</span>, <span class="dv">1</span>) <span class="sc">+</span> <span class="fu">ylim</span>(<span class="dv">0</span>, <span class="dv">1</span>) <span class="sc">+</span></span>
<span id="cb56-5"><a href="#cb56-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_abline</span>(<span class="at">lty=</span><span class="dv">3</span>) <span class="sc">+</span></span>
<span id="cb56-6"><a href="#cb56-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">coord_fixed</span>() <span class="sc">+</span></span>
<span id="cb56-7"><a href="#cb56-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">xlab</span>(<span class="st">'preoral BA'</span>) <span class="sc">+</span> <span class="fu">ylab</span>(<span class="st">'prenasal BA'</span>)</span>
<span id="cb56-8"><a href="#cb56-8" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb56-9"><a href="#cb56-9" aria-hidden="true" tabindex="-1"></a>p_BA_emp <span class="ot"><-</span> BA_emp_sp_wide <span class="sc">%>%</span></span>
<span id="cb56-10"><a href="#cb56-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>oral, <span class="at">y=</span>nasal, <span class="at">color=</span>speaker)) <span class="sc">+</span></span>
<span id="cb56-11"><a href="#cb56-11" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb56-12"><a href="#cb56-12" aria-hidden="true" tabindex="-1"></a> <span class="fu">xlim</span>(<span class="dv">0</span>, <span class="dv">1</span>) <span class="sc">+</span> <span class="fu">ylim</span>(<span class="dv">0</span>, <span class="dv">1</span>) <span class="sc">+</span></span>
<span id="cb56-13"><a href="#cb56-13" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_abline</span>(<span class="at">lty=</span><span class="dv">3</span>) <span class="sc">+</span></span>
<span id="cb56-14"><a href="#cb56-14" aria-hidden="true" tabindex="-1"></a> <span class="fu">coord_fixed</span>() <span class="sc">+</span></span>
<span id="cb56-15"><a href="#cb56-15" aria-hidden="true" tabindex="-1"></a> <span class="fu">xlab</span>(<span class="st">'preoral BA'</span>) <span class="sc">+</span> <span class="fu">ylab</span>(<span class="st">'prenasal BA'</span>)</span>
<span id="cb56-16"><a href="#cb56-16" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb56-17"><a href="#cb56-17" aria-hidden="true" tabindex="-1"></a>p_ED_mod <span class="ot"><-</span> ED_mod_sp_wide <span class="sc">%>%</span></span>
<span id="cb56-18"><a href="#cb56-18" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x=</span>oral, <span class="at">y=</span>nasal, <span class="at">color=</span>speaker)) <span class="sc">+</span></span>
<span id="cb56-19"><a href="#cb56-19" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb56-20"><a href="#cb56-20" aria-hidden="true" tabindex="-1"></a> <span class="fu">xlim</span>(<span class="dv">0</span>, <span class="fl">1.5</span>) <span class="sc">+</span> <span class="fu">ylim</span>(<span class="dv">0</span>, <span class="fl">1.5</span>) <span class="sc">+</span></span>
<span id="cb56-21"><a href="#cb56-21" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_abline</span>(<span class="at">lty=</span><span class="dv">3</span>) <span class="sc">+</span></span>