bayes_simulation.html

<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>

<meta charset="utf-8">
<meta name="generator" content="quarto-1.6.1">

<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">


<title>31&nbsp; Bayesian Analysis by Simulation – Resampling statistics</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
  width: 0.8em;
  margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 
  vertical-align: middle;
}
/* CSS for syntax highlighting */
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { display: inline-block; text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
  { counter-reset: source-line 0; }
pre.numberSource code > span
  { position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
  { content: counter(source-line);
    position: relative; left: -1em; text-align: right; vertical-align: baseline;
    border: none; display: inline-block;
    -webkit-touch-callout: none; -webkit-user-select: none;
    -khtml-user-select: none; -moz-user-select: none;
    -ms-user-select: none; user-select: none;
    padding: 0 4px; width: 4em;
  }
pre.numberSource { margin-left: 3em;  padding-left: 4px; }
div.sourceCode
  {   }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
/* CSS for citations */
div.csl-bib-body { }
div.csl-entry {
  clear: both;
  margin-bottom: 0em;
}
.hanging-indent div.csl-entry {
  margin-left:2em;
  text-indent:-2em;
}
div.csl-left-margin {
  min-width:2em;
  float:left;
}
div.csl-right-inline {
  margin-left:2em;
  padding-left:1em;
}
div.csl-indent {
  margin-left: 2em;
}</style>


<script src="site_libs/quarto-nav/quarto-nav.js"></script>
<script src="site_libs/quarto-nav/headroom.min.js"></script>
<script src="site_libs/clipboard/clipboard.min.js"></script>
<script src="site_libs/quarto-search/autocomplete.umd.js"></script>
<script src="site_libs/quarto-search/fuse.min.js"></script>
<script src="site_libs/quarto-search/quarto-search.js"></script>
<meta name="quarto:offset" content="./">
<link href="./references.html" rel="next">
<link href="./how_big_sample.html" rel="prev">
<script src="site_libs/quarto-html/quarto.js"></script>
<script src="site_libs/quarto-html/popper.min.js"></script>
<script src="site_libs/quarto-html/tippy.umd.min.js"></script>
<script src="site_libs/quarto-html/anchor.min.js"></script>
<link href="site_libs/quarto-html/tippy.css" rel="stylesheet">
<link href="site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="site_libs/bootstrap/bootstrap.min.js"></script>
<link href="site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
<script id="quarto-search-options" type="application/json">{
  "location": "sidebar",
  "copy-button": false,
  "collapse-after": 3,
  "panel-placement": "start",
  "type": "textbox",
  "limit": 50,
  "keyboard-shortcut": [
    "f",
    "/",
    "s"
  ],
  "show-item-context": false,
  "language": {
    "search-no-results-text": "No results",
    "search-matching-documents-text": "matching documents",
    "search-copy-link-title": "Copy link to search",
    "search-hide-matches-text": "Hide additional matches",
    "search-more-match-text": "more match in this document",
    "search-more-matches-text": "more matches in this document",
    "search-clear-button-title": "Clear",
    "search-text-placeholder": "",
    "search-detached-cancel-button-title": "Cancel",
    "search-submit-button-title": "Submit",
    "search-label": "Search"
  }
}</script>
<script type="text/javascript">
  $(document).ready(function() {
    $("table").addClass('lightable-paper lightable-striped lightable-hover')
  });
</script>

  <script src="https://cdnjs.cloudflare.com/polyfill/v3/polyfill.min.js?features=es6"></script>
  <script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js" type="text/javascript"></script>

<script type="text/javascript">
const typesetMath = (el) => {
  if (window.MathJax) {
    // MathJax Typeset
    window.MathJax.typeset([el]);
  } else if (window.katex) {
    // KaTeX Render
    var mathElements = el.getElementsByClassName("math");
    var macros = [];
    for (var i = 0; i < mathElements.length; i++) {
      var texText = mathElements[i].firstChild;
      if (mathElements[i].tagName == "SPAN") {
        window.katex.render(texText.data, mathElements[i], {
          displayMode: mathElements[i].classList.contains('display'),
          throwOnError: false,
          macros: macros,
          fleqn: false
        });
      }
    }
  }
}
window.Quarto = {
  typesetMath
};
</script>

<link rel="stylesheet" href="style.css">
<link rel="stylesheet" href="font-awesome.min.css">
</head>

<body class="nav-sidebar floating">

<div id="quarto-search-results"></div>
  <header id="quarto-header" class="headroom fixed-top">
  <nav class="quarto-secondary-nav">
    <div class="container-fluid d-flex">
      <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
        <i class="bi bi-layout-text-sidebar-reverse"></i>
      </button>
        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./bayes_simulation.html"><span class="chapter-number">31</span>&nbsp; <span class="chapter-title">Bayesian Analysis by Simulation</span></a></li></ol></nav>
        <a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">      
        </a>
      <button type="button" class="btn quarto-search-button" aria-label="Search" onclick="window.quartoOpenSearch();">
        <i class="bi bi-search"></i>
      </button>
    </div>
  </nav>
</header>
<!-- content -->
<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article">
<!-- sidebar -->
  <nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation floating overflow-auto">
    <div class="pt-lg-2 mt-2 text-left sidebar-header">
    <div class="sidebar-title mb-0 py-0">
      <a href="./">Resampling statistics</a> 
    </div>
      </div>
        <div class="mt-2 flex-shrink-0 align-items-center">
        <div class="sidebar-search">
        <div id="quarto-search" class="" title="Search"></div>
        </div>
        </div>
    <div class="sidebar-menu-container"> 
    <ul class="list-unstyled mt-1">
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./index.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text">R version</span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./preface_third.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text">Preface to the third edition</span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./preface_second.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text">Preface to the second edition</span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./intro.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./resampling_method.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">The resampling method</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./what_is_probability.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">What is probability?</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./about_technology.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Introducing R and the Jupyter notebook</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./resampling_with_code.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Resampling with code</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./resampling_with_code2.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">6</span>&nbsp; <span class="chapter-title">More resampling with code</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./sampling_tools.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">7</span>&nbsp; <span class="chapter-title">Tools for samples and sampling</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./probability_theory_1a.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">8</span>&nbsp; <span class="chapter-title">Probability Theory, Part 1</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./probability_theory_1b.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">9</span>&nbsp; <span class="chapter-title">Probability Theory Part I (continued)</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./more_sampling_tools.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">10</span>&nbsp; <span class="chapter-title">Two puzzles and more tools</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./probability_theory_2_compound.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">11</span>&nbsp; <span class="chapter-title">Probability Theory, Part 2: Compound Probability</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./probability_theory_3.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Probability Theory, Part 3</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./probability_theory_4_finite.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">Probability Theory, Part 4: Estimating Probabilities from Finite Universes</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./sampling_variability.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">14</span>&nbsp; <span class="chapter-title">On Variability in Sampling</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./monte_carlo.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">15</span>&nbsp; <span class="chapter-title">The Procedures of Monte Carlo Simulation (and Resampling)</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./standard_scores.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">16</span>&nbsp; <span class="chapter-title">Ranks, Quantiles and Standard Scores</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./inference_ideas.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">17</span>&nbsp; <span class="chapter-title">The Basic Ideas in Statistical Inference</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./inference_intro.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">18</span>&nbsp; <span class="chapter-title">Introduction to Statistical Inference</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./point_estimation.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">19</span>&nbsp; <span class="chapter-title">Point Estimation</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./framing_questions.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">20</span>&nbsp; <span class="chapter-title">Framing Statistical Questions</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./testing_counts_1.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">21</span>&nbsp; <span class="chapter-title">Hypothesis-Testing with Counted Data, Part 1</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./significance.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">22</span>&nbsp; <span class="chapter-title">The Concept of Statistical Significance in Testing Hypotheses</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./testing_counts_2.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">23</span>&nbsp; <span class="chapter-title">The Statistics of Hypothesis-Testing with Counted Data, Part 2</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./testing_measured.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">24</span>&nbsp; <span class="chapter-title">The Statistics of Hypothesis-Testing With Measured Data</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./testing_procedures.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">25</span>&nbsp; <span class="chapter-title">General Procedures for Testing Hypotheses</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./confidence_1.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">26</span>&nbsp; <span class="chapter-title">Confidence Intervals, Part 1: Assessing the Accuracy of Samples</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./confidence_2.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">27</span>&nbsp; <span class="chapter-title">Confidence Intervals, Part 2: The Two Approaches to Estimating Confidence Intervals</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./reliability_average.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">28</span>&nbsp; <span class="chapter-title">Some Last Words About the Reliability of Sample Averages</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./correlation_causation.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">29</span>&nbsp; <span class="chapter-title">Correlation and Causation</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./how_big_sample.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">30</span>&nbsp; <span class="chapter-title">How Large a Sample?</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./bayes_simulation.html" class="sidebar-item-text sidebar-link active">
 <span class="menu-text"><span class="chapter-number">31</span>&nbsp; <span class="chapter-title">Bayesian Analysis by Simulation</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./references.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text">References</span></a>
  </div>
</li>
        <li class="sidebar-item sidebar-item-section">
      <div class="sidebar-item-container"> 
            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true">
 <span class="menu-text">Appendices</span></a>
          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true" aria-label="Toggle section">
            <i class="bi bi-chevron-right ms-2"></i>
          </a> 
      </div>
      <ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">  
          <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./exercise_solutions.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">A</span>&nbsp; <span class="chapter-title">Exercise Solutions</span></span></a>
  </div>
</li>
          <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./technical_note.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">B</span>&nbsp; <span class="chapter-title">Technical Note to the Professional Reader</span></span></a>
  </div>
</li>
          <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./acknowlegements.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">C</span>&nbsp; <span class="chapter-title">Acknowledgements</span></span></a>
  </div>
</li>
          <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./code_topics.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">D</span>&nbsp; <span class="chapter-title">Code topics</span></span></a>
  </div>
</li>
          <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./errors_suggestions.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">E</span>&nbsp; <span class="chapter-title">Errors and suggestions</span></span></a>
  </div>
</li>
      </ul>
  </li>
    </ul>
    </div>
</nav>
<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
<!-- margin-sidebar -->
    <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
        <nav id="TOC" role="doc-toc" class="toc-active">
    <h2 id="toc-title">Table of contents</h2>
   
  <ul>
  <li><a href="#simple-decision-problems" id="toc-simple-decision-problems" class="nav-link active" data-scroll-target="#simple-decision-problems"><span class="header-section-number">31.1</span> Simple decision problems</a>
  <ul class="collapse">
  <li><a href="#assessing-the-likelihood-that-a-used-car-will-be-sound" id="toc-assessing-the-likelihood-that-a-used-car-will-be-sound" class="nav-link" data-scroll-target="#assessing-the-likelihood-that-a-used-car-will-be-sound"><span class="header-section-number">31.1.1</span> Assessing the Likelihood That a Used Car Will Be Sound</a></li>
  <li><a href="#calculation-without-simulation" id="toc-calculation-without-simulation" class="nav-link" data-scroll-target="#calculation-without-simulation"><span class="header-section-number">31.1.2</span> Calculation without simulation</a></li>
  </ul></li>
  <li><a href="#probability-interpretation" id="toc-probability-interpretation" class="nav-link" data-scroll-target="#probability-interpretation"><span class="header-section-number">31.2</span> Probability interpretation</a>
  <ul class="collapse">
  <li><a href="#probability-from-proportion" id="toc-probability-from-proportion" class="nav-link" data-scroll-target="#probability-from-proportion"><span class="header-section-number">31.2.1</span> Probability from proportion</a></li>
  <li><a href="#probability-relationships-conditional-probability" id="toc-probability-relationships-conditional-probability" class="nav-link" data-scroll-target="#probability-relationships-conditional-probability"><span class="header-section-number">31.2.2</span> Probability relationships: conditional probability</a></li>
  <li><a href="#example-conditional-probability" id="toc-example-conditional-probability" class="nav-link" data-scroll-target="#example-conditional-probability"><span class="header-section-number">31.2.3</span> Example: conditional probability</a></li>
  <li><a href="#estimating-driving-risk-for-insurance-purposes" id="toc-estimating-driving-risk-for-insurance-purposes" class="nav-link" data-scroll-target="#estimating-driving-risk-for-insurance-purposes"><span class="header-section-number">31.2.4</span> Estimating Driving Risk for Insurance Purposes</a></li>
  <li><a href="#screening-for-disease" id="toc-screening-for-disease" class="nav-link" data-scroll-target="#screening-for-disease"><span class="header-section-number">31.2.5</span> Screening for Disease</a></li>
  </ul></li>
  <li><a href="#fundamental-problems-in-statistical-practice" id="toc-fundamental-problems-in-statistical-practice" class="nav-link" data-scroll-target="#fundamental-problems-in-statistical-practice"><span class="header-section-number">31.3</span> Fundamental problems in statistical practice</a></li>
  <li><a href="#problems-based-on-normal-and-other-distributions" id="toc-problems-based-on-normal-and-other-distributions" class="nav-link" data-scroll-target="#problems-based-on-normal-and-other-distributions"><span class="header-section-number">31.4</span> Problems based on normal and other distributions</a>
  <ul class="collapse">
  <li><a href="#an-intermediate-problem-in-conditional-probability" id="toc-an-intermediate-problem-in-conditional-probability" class="nav-link" data-scroll-target="#an-intermediate-problem-in-conditional-probability"><span class="header-section-number">31.4.1</span> An Intermediate Problem in Conditional Probability</a></li>
  <li><a href="#estimating-the-posterior-distribution" id="toc-estimating-the-posterior-distribution" class="nav-link" data-scroll-target="#estimating-the-posterior-distribution"><span class="header-section-number">31.4.2</span> Estimating the Posterior Distribution</a></li>
  </ul></li>
  <li><a href="#conclusion" id="toc-conclusion" class="nav-link" data-scroll-target="#conclusion"><span class="header-section-number">31.5</span> Conclusion</a></li>
  </ul>
</nav>
    </div>
<!-- main -->
<main class="content" id="quarto-document-content">

<header id="title-block-header" class="quarto-title-block default">
<div class="quarto-title">
<h1 class="title"><span id="sec-bayes-simulation" class="quarto-section-identifier"><span class="chapter-number">31</span>&nbsp; <span class="chapter-title">Bayesian Analysis by Simulation</span></span></h1>
</div>


<div class="quarto-title-meta">

    
  </div>
  

</header>


<blockquote class="blockquote">
<p>This branch of mathematics [probability] is the only one, I believe, in which good writers frequently get results entirely erroneous. <span class="citation" data-cites="peirce1923chance">(<a href="references.html#ref-peirce1923chance" role="doc-biblioref">Peirce 1923</a>, Doctrine of Chances, II)</span></p>
</blockquote>
<p>Bayesian analysis is a way of thinking about problems in probability and statistics that can help one reach otherwise-difficult decisions. It also can sometimes be used in science. The range of its recommended uses is controversial, but this chapter deals only with those uses of Bayesian analysis that are uncontroversial.</p>
<p>Better than defining Bayesian analysis in formal terms is to demonstrate its use. We shall start with the simplest sort of problem, and proceed gradually from there.</p>
<section id="simple-decision-problems" class="level2" data-number="31.1">
<h2 data-number="31.1" class="anchored" data-anchor-id="simple-decision-problems"><span class="header-section-number">31.1</span> Simple decision problems</h2>
<section id="assessing-the-likelihood-that-a-used-car-will-be-sound" class="level3" data-number="31.1.1">
<h3 data-number="31.1.1" class="anchored" data-anchor-id="assessing-the-likelihood-that-a-used-car-will-be-sound"><span class="header-section-number">31.1.1</span> Assessing the Likelihood That a Used Car Will Be Sound</h3>
<p>Consider a problem in estimating the soundness of a used car one considers purchasing (after <span class="citation" data-cites="wonnacott1990introductory">(<a href="references.html#ref-wonnacott1990introductory" role="doc-biblioref">Wonnacott and Wonnacott 1990, 93–94</a>)</span>). Seventy percent of the cars are known to be OK on average, and 30 percent are faulty. Of the cars that <em>are</em> really OK, a mechanic correctly identifies 80 percent as “OK” but says that 20 percent are “faulty”; of those that are faulty, the mechanic correctly identifies 90 percent as faulty and says (incorrectly) that 10 percent are OK.</p>
<p>We wish to know the probability that if the mechanic <em>says</em> a car is “OK,” it <em>really</em> is faulty. Phrased differently, what is the probability of a car being faulty if the mechanic said it was OK?</p>
<p>We can get the desired probabilities directly by simulation without knowing Bayes’ rule, as we shall see. But one must be able to model the physical problem correctly in order to proceed with the simulation; this requirement of a clearly visualized model is a strong point in favor of simulation.</p>
<ol type="1">
<li>Note that we are only interested in outcomes where the mechanic approved a car.</li>
<li>For each car, generate a label of either “faulty” or “working” with probabilities of 0.3 and 0.7, respectively.</li>
<li>For each <em>faulty car</em>, we generate one of two labels, “approved” or “not approved” with probabilities 0.1 and 0.9, respectively.</li>
<li>For each <em>working car</em>, we generate one of two labels, “approved” or “not approved” with probabilities 0.7 and 0.3, respectively.</li>
<li>Out of all cars “approved”, count how many are “faulty”. The ratio between these numbers is our answer.</li>
</ol>
<p>Here is the whole simulation of the car / mechanic problem:</p>
<div id="nte-bayes_cars" class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Note&nbsp;31.1: Notebook: Bayesian analysis of cars and mechanics
</div>
</div>
<div class="callout-body-container callout-body">
<div class="nb-links">
<p><a class="notebook-link" href="notebooks/bayes_cars.Rmd">Download notebook</a> <a class="interact-button" href="./interact/lab/index.html?path=bayes_cars.ipynb">Interact</a></p>
</div>
</div>
</div>
<div class="nb-start" name="bayes_cars" title="Bayesian analysis of cars and mechanics">

</div>
<div class="cell" data-layout-align="center">
<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a>n_trials <span class="ot">&lt;-</span> <span class="dv">10000</span>  <span class="co"># number of cars</span></span>
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Counters for number of approved, number of approved and faulty</span></span>
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a>approved <span class="ot">&lt;-</span> <span class="dv">0</span></span>
<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a>approved_and_faulty <span class="ot">&lt;-</span> <span class="dv">0</span></span>
<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> (i <span class="cf">in</span> <span class="dv">1</span><span class="sc">:</span>n_trials) {</span>
<span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Decide whether the car is faulty or working, with a probability of</span></span>
<span id="cb1-10"><a href="#cb1-10" aria-hidden="true" tabindex="-1"></a>    <span class="co"># 0.3 and 0.7 respectively</span></span>
<span id="cb1-11"><a href="#cb1-11" aria-hidden="true" tabindex="-1"></a>    car <span class="ot">&lt;-</span> <span class="fu">sample</span>(<span class="fu">c</span>(<span class="st">'faulty'</span>, <span class="st">'working'</span>), <span class="at">size=</span><span class="dv">1</span>, <span class="at">prob=</span><span class="fu">c</span>(<span class="fl">0.3</span>, <span class="fl">0.7</span>))</span>
<span id="cb1-12"><a href="#cb1-12" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-13"><a href="#cb1-13" aria-hidden="true" tabindex="-1"></a>    <span class="cf">if</span> (car <span class="sc">==</span> <span class="st">'faulty'</span>) {</span>
<span id="cb1-14"><a href="#cb1-14" aria-hidden="true" tabindex="-1"></a>        <span class="co"># What the mechanic says of a faulty car</span></span>
<span id="cb1-15"><a href="#cb1-15" aria-hidden="true" tabindex="-1"></a>        mechanic_says <span class="ot">&lt;-</span> <span class="fu">sample</span>(<span class="fu">c</span>(<span class="st">'approved'</span>, <span class="st">'not approved'</span>),</span>
<span id="cb1-16"><a href="#cb1-16" aria-hidden="true" tabindex="-1"></a>                                <span class="at">size=</span><span class="dv">1</span>,</span>
<span id="cb1-17"><a href="#cb1-17" aria-hidden="true" tabindex="-1"></a>                                <span class="at">prob=</span><span class="fu">c</span>(<span class="fl">0.1</span>, <span class="fl">0.9</span>))</span>
<span id="cb1-18"><a href="#cb1-18" aria-hidden="true" tabindex="-1"></a>    } <span class="cf">else</span> {</span>
<span id="cb1-19"><a href="#cb1-19" aria-hidden="true" tabindex="-1"></a>        <span class="co"># What the mechanic says of a working car</span></span>
<span id="cb1-20"><a href="#cb1-20" aria-hidden="true" tabindex="-1"></a>        mechanic_says <span class="ot">&lt;-</span> <span class="fu">sample</span>(<span class="fu">c</span>(<span class="st">'approved'</span>, <span class="st">'not approved'</span>),</span>
<span id="cb1-21"><a href="#cb1-21" aria-hidden="true" tabindex="-1"></a>                                <span class="at">size=</span><span class="dv">1</span>,</span>
<span id="cb1-22"><a href="#cb1-22" aria-hidden="true" tabindex="-1"></a>                                <span class="at">prob=</span><span class="fu">c</span>(<span class="fl">0.7</span>, <span class="fl">0.3</span>))</span>
<span id="cb1-23"><a href="#cb1-23" aria-hidden="true" tabindex="-1"></a>    }</span>
<span id="cb1-24"><a href="#cb1-24" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-25"><a href="#cb1-25" aria-hidden="true" tabindex="-1"></a>    <span class="cf">if</span> (mechanic_says <span class="sc">==</span> <span class="st">'approved'</span>) {</span>
<span id="cb1-26"><a href="#cb1-26" aria-hidden="true" tabindex="-1"></a>        approved <span class="ot">&lt;-</span>  approved <span class="sc">+</span> <span class="dv">1</span></span>
<span id="cb1-27"><a href="#cb1-27" aria-hidden="true" tabindex="-1"></a>        <span class="cf">if</span> (car <span class="sc">==</span> <span class="st">'faulty'</span>) {</span>
<span id="cb1-28"><a href="#cb1-28" aria-hidden="true" tabindex="-1"></a>            approved_and_faulty <span class="ot">&lt;-</span> approved_and_faulty <span class="sc">+</span> <span class="dv">1</span></span>
<span id="cb1-29"><a href="#cb1-29" aria-hidden="true" tabindex="-1"></a>        }</span>
<span id="cb1-30"><a href="#cb1-30" aria-hidden="true" tabindex="-1"></a>    }</span>
<span id="cb1-31"><a href="#cb1-31" aria-hidden="true" tabindex="-1"></a>}</span>
<span id="cb1-32"><a href="#cb1-32" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-33"><a href="#cb1-33" aria-hidden="true" tabindex="-1"></a>k <span class="ot">&lt;-</span> approved_and_faulty <span class="sc">/</span> approved</span>
<span id="cb1-34"><a href="#cb1-34" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-35"><a href="#cb1-35" aria-hidden="true" tabindex="-1"></a><span class="fu">message</span>(<span class="st">'Proportion of faulty cars of cars approved: '</span>, <span class="fu">round</span>(k, <span class="dv">2</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>Proportion of faulty cars of cars approved: 0.06</code></pre>
</div>
</div>
<p>The answer looks to be somewhere between 5 and 6%. The code clearly follows the description step by step, but it is also quite slow. If we can improve the code, we may be able to do our simulation with more cars, and get a more accurate answer.</p>
<p>Let’s use arrays to store the states of all cars in the lot simultaneously:</p>
<div class="cell" data-layout-align="center">
<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Number of cars; we made this number larger by a factor of 100</span></span>
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>n_trials <span class="ot">&lt;-</span> <span class="dv">1000000</span></span>
<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Generate an array with as many entries as there are cars, each</span></span>
<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a><span class="co"># being either 'working' or 'faulty'.</span></span>
<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a><span class="co"># We are taking a sample _with_ replacement.</span></span>
<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a>cars <span class="ot">&lt;-</span> <span class="fu">sample</span>(<span class="fu">c</span>(<span class="st">'working'</span>, <span class="st">'faulty'</span>),</span>
<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a>               <span class="at">size=</span>n_trials,</span>
<span id="cb3-9"><a href="#cb3-9" aria-hidden="true" tabindex="-1"></a>               <span class="at">replace=</span><span class="cn">TRUE</span>,</span>
<span id="cb3-10"><a href="#cb3-10" aria-hidden="true" tabindex="-1"></a>               <span class="at">prob=</span><span class="fu">c</span>(<span class="fl">0.7</span>, <span class="fl">0.3</span>))</span>
<span id="cb3-11"><a href="#cb3-11" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb3-12"><a href="#cb3-12" aria-hidden="true" tabindex="-1"></a><span class="co"># Count how many cars are working</span></span>
<span id="cb3-13"><a href="#cb3-13" aria-hidden="true" tabindex="-1"></a>n_working <span class="ot">&lt;-</span> <span class="fu">sum</span>(cars <span class="sc">==</span> <span class="st">'working'</span>)</span>
<span id="cb3-14"><a href="#cb3-14" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb3-15"><a href="#cb3-15" aria-hidden="true" tabindex="-1"></a><span class="co"># All the rest are faulty</span></span>
<span id="cb3-16"><a href="#cb3-16" aria-hidden="true" tabindex="-1"></a>n_faulty <span class="ot">&lt;-</span> n_trials <span class="sc">-</span> n_working</span>
<span id="cb3-17"><a href="#cb3-17" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb3-18"><a href="#cb3-18" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a new vector in which to store what a mechanic says</span></span>
<span id="cb3-19"><a href="#cb3-19" aria-hidden="true" tabindex="-1"></a><span class="co"># about the car: 'approved' or 'not approved'. We use</span></span>
<span id="cb3-20"><a href="#cb3-20" aria-hidden="true" tabindex="-1"></a><span class="co"># "character" to tell R these are strings.</span></span>
<span id="cb3-21"><a href="#cb3-21" aria-hidden="true" tabindex="-1"></a>mechanic_says <span class="ot">&lt;-</span> <span class="fu">character</span>(n_trials)</span>
<span id="cb3-22"><a href="#cb3-22" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb3-23"><a href="#cb3-23" aria-hidden="true" tabindex="-1"></a><span class="co"># We start with the working cars; what does the mechanic say about them?</span></span>
<span id="cb3-24"><a href="#cb3-24" aria-hidden="true" tabindex="-1"></a><span class="co"># Generate 'approved' or 'not approved' labels with the given probabilities.</span></span>
<span id="cb3-25"><a href="#cb3-25" aria-hidden="true" tabindex="-1"></a>mechanic_says[cars <span class="sc">==</span> <span class="st">'working'</span>] <span class="ot">&lt;-</span> <span class="fu">sample</span>(</span>
<span id="cb3-26"><a href="#cb3-26" aria-hidden="true" tabindex="-1"></a>    <span class="fu">c</span>(<span class="st">'approved'</span>, <span class="st">'not approved'</span>),</span>
<span id="cb3-27"><a href="#cb3-27" aria-hidden="true" tabindex="-1"></a>    <span class="at">size=</span>n_working,</span>
<span id="cb3-28"><a href="#cb3-28" aria-hidden="true" tabindex="-1"></a>    <span class="at">replace=</span><span class="cn">TRUE</span>,</span>
<span id="cb3-29"><a href="#cb3-29" aria-hidden="true" tabindex="-1"></a>    <span class="at">prob=</span><span class="fu">c</span>(<span class="fl">0.8</span>, <span class="fl">0.2</span>)</span>
<span id="cb3-30"><a href="#cb3-30" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb3-31"><a href="#cb3-31" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb3-32"><a href="#cb3-32" aria-hidden="true" tabindex="-1"></a><span class="co"># Similarly, for each faulty car, generate 'approved'/'not approved'</span></span>
<span id="cb3-33"><a href="#cb3-33" aria-hidden="true" tabindex="-1"></a><span class="co"># labels with the given probabilities.</span></span>
<span id="cb3-34"><a href="#cb3-34" aria-hidden="true" tabindex="-1"></a>mechanic_says[cars <span class="sc">==</span> <span class="st">'faulty'</span>] <span class="ot">&lt;-</span> <span class="fu">sample</span>(</span>
<span id="cb3-35"><a href="#cb3-35" aria-hidden="true" tabindex="-1"></a>    <span class="fu">c</span>(<span class="st">'approved'</span>, <span class="st">'not approved'</span>),</span>
<span id="cb3-36"><a href="#cb3-36" aria-hidden="true" tabindex="-1"></a>    <span class="at">size=</span>n_faulty,</span>
<span id="cb3-37"><a href="#cb3-37" aria-hidden="true" tabindex="-1"></a>    <span class="at">replace=</span><span class="cn">TRUE</span>,</span>
<span id="cb3-38"><a href="#cb3-38" aria-hidden="true" tabindex="-1"></a>    <span class="at">prob=</span><span class="fu">c</span>(<span class="fl">0.1</span>, <span class="fl">0.9</span>)</span>
<span id="cb3-39"><a href="#cb3-39" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb3-40"><a href="#cb3-40" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb3-41"><a href="#cb3-41" aria-hidden="true" tabindex="-1"></a><span class="co"># Identify all cars that were approved</span></span>
<span id="cb3-42"><a href="#cb3-42" aria-hidden="true" tabindex="-1"></a><span class="co"># This produces a binary mask, an array that looks like:</span></span>
<span id="cb3-43"><a href="#cb3-43" aria-hidden="true" tabindex="-1"></a><span class="co"># [True, False, False, True, ... ]</span></span>
<span id="cb3-44"><a href="#cb3-44" aria-hidden="true" tabindex="-1"></a>approved <span class="ot">&lt;-</span> (mechanic_says <span class="sc">==</span> <span class="st">'approved'</span>)</span>
<span id="cb3-45"><a href="#cb3-45" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb3-46"><a href="#cb3-46" aria-hidden="true" tabindex="-1"></a><span class="co"># Identify cars that are faulty AND were approved</span></span>
<span id="cb3-47"><a href="#cb3-47" aria-hidden="true" tabindex="-1"></a>faulty_but_approved <span class="ot">&lt;-</span> (cars <span class="sc">==</span> <span class="st">'faulty'</span>) <span class="sc">&amp;</span> approved</span>
<span id="cb3-48"><a href="#cb3-48" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb3-49"><a href="#cb3-49" aria-hidden="true" tabindex="-1"></a><span class="co"># Count the number of cars that are faulty but approved, as well as</span></span>
<span id="cb3-50"><a href="#cb3-50" aria-hidden="true" tabindex="-1"></a><span class="co"># the total number of cars that were approved</span></span>
<span id="cb3-51"><a href="#cb3-51" aria-hidden="true" tabindex="-1"></a>n_faulty_but_approved <span class="ot">&lt;-</span> <span class="fu">sum</span>(faulty_but_approved)</span>
<span id="cb3-52"><a href="#cb3-52" aria-hidden="true" tabindex="-1"></a>n_approved <span class="ot">&lt;-</span> <span class="fu">sum</span>(approved)</span>
<span id="cb3-53"><a href="#cb3-53" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb3-54"><a href="#cb3-54" aria-hidden="true" tabindex="-1"></a><span class="co"># Calculate the ratio, which is the answer we seek</span></span>
<span id="cb3-55"><a href="#cb3-55" aria-hidden="true" tabindex="-1"></a>k <span class="ot">&lt;-</span> n_faulty_but_approved <span class="sc">/</span> n_approved</span>
<span id="cb3-56"><a href="#cb3-56" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb3-57"><a href="#cb3-57" aria-hidden="true" tabindex="-1"></a><span class="fu">message</span>(<span class="st">'Proportion of faulty cars of cars approved: '</span>, <span class="fu">round</span>(k, <span class="dv">2</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>Proportion of faulty cars of cars approved: 0.05</code></pre>
</div>
</div>
<p>The code now runs much faster, and with a larger number of cars we see that the answer is closer to a 5% chance of a car being broken after it has been approved by a mechanic.</p>
<div class="nb-end">

</div>
<div class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
End of notebook: Bayesian analysis of cars and mechanics
</div>
</div>
<div class="callout-body-container callout-body">
<p><code>bayes_cars</code> starts at <a href="#nte-bayes_cars" class="quarto-xref">Note&nbsp;<span>31.1</span></a>.</p>
</div>
</div>
<!---
End of notebook
-->
</section>
<section id="calculation-without-simulation" class="level3" data-number="31.1.2">
<h3 data-number="31.1.2" class="anchored" data-anchor-id="calculation-without-simulation"><span class="header-section-number">31.1.2</span> Calculation without simulation</h3>
<p>Simulation forces us to model our problem clearly and concretely in code. Such code is most often easier to reason about than opaque statistical methods. Running the simulation gives a good sense of what the correct answer should be. Thereafter, we can still look into different — sometimes more elegant or accurate — ways of modeling and solving the problem.</p>
<p>Let’s examine the following diagram of our car selection:</p>
<p><img src="diagrams/car-tree.png" class="img-fluid"></p>
<p>We see that there are two paths, highlighted, that results in a car being approved by a mechanic. Either a car can be working, and correctly identified as such by a mechanic; or the car can be broken, while the mechanic mistakenly determines it to be working. Our question only pertains to these two paths, so we do not need to study the rest of the tree.</p>
<p>In the long run, in our simulation, about 70% of the cars will end with the label “working”, and about 30% will end up with the label “faulty”. We just took 10000 sample cars above but, in fact, the larger the number of cars we take, the closer we will get to 70% “working” and 30% “faulty”. So, with many samples, we can think of 70% of these samples flowing down the “working” path, and 30% flowing along the “faulty” path.</p>
<p>Now, we want to know, of all the cars approved by a mechanic, how many are faulty:</p>
<p><span class="math display">\[ \frac{\mathrm{cars_{\mathrm{faulty}}}}{\mathrm{cars}_{\mathrm{approved}}} \]</span></p>
<p>We follow the two highlighted paths in the tree:</p>
<ol type="1">
<li>Of a large sample of cars, 30% are faulty. Of these, 10% are approved by a mechanic. That is, 30% * 10% = 3% of all cars.</li>
<li>Of all cars, 70% work. Of these, 80% are approved by a mechanic. That is, 70% * 80% = 56% of all cars.</li>
</ol>
<p>The percentage of faulty cars, out of approved cars, becomes:</p>
<p><span class="math display">\[
3\% / (56\% + 3\%) = 5.08\%
\]</span></p>
<p>Notation-wise, it is a bit easier to calculate these sums using proportions rather than percentages:</p>
<ol type="1">
<li>Faulty cars approved by a mechanic: 0.3 * 0.1 = 0.03</li>
<li>Working cars approved by a mechanic: 0.7 * 0.8 = 0.56</li>
</ol>
<p>Fraction of faulty cars out of approved cars: 0.03 / (0.03 + 0.56) = 0.0508</p>
<p>We see that every time the tree branches, it filters the cars: some go to one branch, the rest to another. In our code, we used the AND (<code>&amp;</code>) operator to find the intersection between faulty AND approved cars, i.e., to filter out from all faulty cars only the cars that were ALSO approved.</p>
</section>
</section>
<section id="probability-interpretation" class="level2" data-number="31.2">
<h2 data-number="31.2" class="anchored" data-anchor-id="probability-interpretation"><span class="header-section-number">31.2</span> Probability interpretation</h2>
<section id="probability-from-proportion" class="level3" data-number="31.2.1">
<h3 data-number="31.2.1" class="anchored" data-anchor-id="probability-from-proportion"><span class="header-section-number">31.2.1</span> Probability from proportion</h3>
<p>In these examples, we often calculate proportions. In the given simulation:</p>
<ul>
<li>How many cars are approved by a mechanic? 59/100.</li>
<li>How many of those 59 were faulty? 3/59.</li>
</ul>
<p>We often also count how commonly events occur: “it rained 4 out of the 10 days”.</p>
<p>An extension of this idea is to <em>predict</em> the probability of an event occurring, based on what we had seen in the past. We can say “out of 100 days, there was some rain on 20 of them; we therefore estimate that the probability of rain occurring is 20/100”. Of course, this is not a complex or very accurate weather model; for that, we’d need to take other factors—such as season—into consideration. Overall, the more observations we have, the better our probability estimates become. We discussed this idea previously in “The Law of Large Numbers”.</p>
<!---
** TODO: REFERENCE SECTION ON LARGE NUMBERS **
-->
<section id="ratios-of-proportions" class="level4" data-number="31.2.1.1">
<h4 data-number="31.2.1.1" class="anchored" data-anchor-id="ratios-of-proportions"><span class="header-section-number">31.2.1.1</span> Ratios of proportions</h4>
<p>At our mechanic’s yard, we can ask “how many red cars here are faulty”? To calculate that, we’d first count the number of red cars, then the number of those red cars that are also broken, then calculate the ratio: <code>red_cars_faulty / red_cars</code>.</p>
<p>We could just as well have worked in percentages: <code>percentage_of_red_cars_broken / percentage_of_cars_that_are_red</code>, since that is <code>(red_cars_broken / 100) / (red_cars / 100)</code>—the same ratio calculated before.</p>
<p>Our point is that the denominator doesn’t matter when calculating ratios, so we could just as well have written:</p>
<p>(red_cars_broken / all_cars) / (red_cars / all_cars)</p>
<p>or</p>
<p><span class="math display">\[
P(\text{cars that are red and that are broken}) / P(\text{red cars})
\]</span></p>
<!---
** TODO: THE ABOVE MAY BE A SUBTLE POINT THAT NEEDS TO EXPANDED, BUT THE TEXT IS GETTING LONG AS-IS **
-->
</section>
</section>
<section id="probability-relationships-conditional-probability" class="level3" data-number="31.2.2">
<h3 data-number="31.2.2" class="anchored" data-anchor-id="probability-relationships-conditional-probability"><span class="header-section-number">31.2.2</span> Probability relationships: conditional probability</h3>
<p>Here’s one way of writing the probability that a car is broken:</p>
<p><span class="math display">\[
P(\text{car is broken})
\]</span></p>
<p>We can shorten “car is broken” to B, and write the same thing as:</p>
<p><span class="math display">\[
P(B)
\]</span></p>
<p>Similarly, we could write the probability that a car is red as:</p>
<p><span class="math display">\[
P(R)
\]</span></p>
<p>We might also want to express the <em>conditional probability</em>, as in the probability that the car is broken, <em>given that</em> we already know that the car is red:</p>
<p><span class="math display">\[
P(\text{car is broken GIVEN THAT car is red})
\]</span></p>
<p>That is getting getting pretty verbose, so we will shorten this as we did above:</p>
<p><span class="math display">\[
P(B \text{ GIVEN THAT } R)
\]</span></p>
<p>To make things even more compact, we write “GIVEN THAT” as a vertical bar <code>|</code> — so the whole thing becomes:</p>
<p><span class="math display">\[
P(B | R)
\]</span></p>
<p>We read this as “the probability that the car is broken given that the car is red”. Such a probability is known as a <em>conditional probability</em>. We discuss these in more detail in <a href="probability_theory_1a.html#sec-cond-uncond" class="quarto-xref"><span>Section 8.13</span></a>.</p>
<p>In our original problem, we ask what the chance is of a car being broken given that a mechanic approved it. As discussed under “Ratios of proportions”, it can be calculated with:</p>
<p><span class="math display">\[\begin{align*}
P(\text{car broken | mechanic approved}) = \\
P(\text{car broken and mechanic approved}) / P(\text{mechanic approved})
\end{align*}\]</span></p>
<p>We have already used <span class="math inline">\(B\)</span> to mean “broken” (above), so let us use <span class="math inline">\(A\)</span> to mean “mechanic approved”. Then we can write the statement above in a more compact way:</p>
<p><span class="math display">\[
P(B | A) = P(B \text{ and } A) / P(A)
\]</span></p>
<p>To put this generally, conditional probabilities for two events <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span> can be written as:</p>
<p><span class="math inline">\(P(X | Y) = P(X \text{ and } Y) / P(Y)\)</span></p>
<p>Where (again) <span class="math inline">\(\text{ and }\)</span> means that <em>both</em> events occur.</p>
</section>
<section id="example-conditional-probability" class="level3" data-number="31.2.3">
<h3 data-number="31.2.3" class="anchored" data-anchor-id="example-conditional-probability"><span class="header-section-number">31.2.3</span> Example: conditional probability</h3>
<p>Let’s discuss a very relevant example. You get a Covid test, and the test is negative. Now, you would like to know what the chance is of you having Covid.</p>
<p>We have the following information:</p>
<ul>
<li>1.5% of people in your area have Covid</li>
<li>The false positive rate of the tests (i.e., that they detect Covid when it is absent) is very low at 0.5%</li>
<li>The false negative rate (i.e., that they fail to detect Covid when it is present) is quite high at 40%</li>
</ul>
<p><img src="diagrams/covid-tree.png" class="img-fluid"></p>
<p>Again, we start with our simulation.</p>
<div id="nte-bayes_covid" class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Note&nbsp;31.2: Notebook: Bayesian analysis of Covid test result
</div>
</div>
<div class="callout-body-container callout-body">
<div class="nb-links">
<p><a class="notebook-link" href="notebooks/bayes_covid.Rmd">Download notebook</a> <a class="interact-button" href="./interact/lab/index.html?path=bayes_covid.ipynb">Interact</a></p>
</div>
</div>
</div>
<div class="nb-start" name="bayes_covid" title="Bayesian analysis of Covid test result">

</div>
<div class="cell" data-layout-align="center">
<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="co"># The number of people.</span></span>
<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>n_trials <span class="ot">&lt;-</span> <span class="dv">1000000</span></span>
<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a><span class="co"># For each person, generate a True or False label,</span></span>
<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a><span class="co"># indicating that they have / don't have Covid.</span></span>
<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a>person_has_covid <span class="ot">&lt;-</span> <span class="fu">sample</span>(</span>
<span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a>    <span class="fu">c</span>(<span class="cn">TRUE</span>, <span class="cn">FALSE</span>),</span>
<span id="cb5-8"><a href="#cb5-8" aria-hidden="true" tabindex="-1"></a>    <span class="at">size=</span>n_trials,</span>
<span id="cb5-9"><a href="#cb5-9" aria-hidden="true" tabindex="-1"></a>    <span class="at">replace=</span><span class="cn">TRUE</span>,</span>
<span id="cb5-10"><a href="#cb5-10" aria-hidden="true" tabindex="-1"></a>    <span class="at">prob=</span><span class="fu">c</span>(<span class="fl">0.015</span>, <span class="fl">0.985</span>)</span>
<span id="cb5-11"><a href="#cb5-11" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb5-12"><a href="#cb5-12" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb5-13"><a href="#cb5-13" aria-hidden="true" tabindex="-1"></a><span class="co"># Calculate the numbers of people with and without Covid.</span></span>
<span id="cb5-14"><a href="#cb5-14" aria-hidden="true" tabindex="-1"></a>n_with_covid <span class="ot">&lt;-</span> <span class="fu">sum</span>(person_has_covid)</span>
<span id="cb5-15"><a href="#cb5-15" aria-hidden="true" tabindex="-1"></a>n_without_covid <span class="ot">&lt;-</span> n_trials <span class="sc">-</span> n_with_covid</span>
<span id="cb5-16"><a href="#cb5-16" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb5-17"><a href="#cb5-17" aria-hidden="true" tabindex="-1"></a><span class="co"># In this array, we will store, for each person, whether they</span></span>
<span id="cb5-18"><a href="#cb5-18" aria-hidden="true" tabindex="-1"></a><span class="co"># had a positive or a negative test.</span></span>
<span id="cb5-19"><a href="#cb5-19" aria-hidden="true" tabindex="-1"></a>test_result <span class="ot">&lt;-</span> <span class="fu">logical</span>(n_trials)</span>
<span id="cb5-20"><a href="#cb5-20" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb5-21"><a href="#cb5-21" aria-hidden="true" tabindex="-1"></a><span class="co"># Draw test results for people with Covid.</span></span>
<span id="cb5-22"><a href="#cb5-22" aria-hidden="true" tabindex="-1"></a>test_result[person_has_covid] <span class="ot">&lt;-</span> <span class="fu">sample</span>(</span>
<span id="cb5-23"><a href="#cb5-23" aria-hidden="true" tabindex="-1"></a>    <span class="fu">c</span>(<span class="cn">TRUE</span>, <span class="cn">FALSE</span>),</span>
<span id="cb5-24"><a href="#cb5-24" aria-hidden="true" tabindex="-1"></a>    <span class="at">size=</span>n_with_covid,</span>
<span id="cb5-25"><a href="#cb5-25" aria-hidden="true" tabindex="-1"></a>    <span class="at">replace=</span><span class="cn">TRUE</span>,</span>
<span id="cb5-26"><a href="#cb5-26" aria-hidden="true" tabindex="-1"></a>    <span class="at">prob=</span><span class="fu">c</span>(<span class="fl">0.6</span>, <span class="fl">0.4</span>)</span>
<span id="cb5-27"><a href="#cb5-27" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb5-28"><a href="#cb5-28" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb5-29"><a href="#cb5-29" aria-hidden="true" tabindex="-1"></a><span class="co"># Draw test results for people without Covid.</span></span>
<span id="cb5-30"><a href="#cb5-30" aria-hidden="true" tabindex="-1"></a><span class="co"># !person_has_covid` flips all Boolean values from FALSE to TRUE</span></span>
<span id="cb5-31"><a href="#cb5-31" aria-hidden="true" tabindex="-1"></a><span class="co"># and from TRUE to FALSE.</span></span>
<span id="cb5-32"><a href="#cb5-32" aria-hidden="true" tabindex="-1"></a>test_result[<span class="sc">!</span>person_has_covid] <span class="ot">&lt;-</span> <span class="fu">sample</span>(</span>
<span id="cb5-33"><a href="#cb5-33" aria-hidden="true" tabindex="-1"></a>    <span class="fu">c</span>(<span class="cn">TRUE</span>, <span class="cn">FALSE</span>),</span>
<span id="cb5-34"><a href="#cb5-34" aria-hidden="true" tabindex="-1"></a>    <span class="at">size=</span>n_without_covid,</span>
<span id="cb5-35"><a href="#cb5-35" aria-hidden="true" tabindex="-1"></a>    <span class="at">replace=</span><span class="cn">TRUE</span>,</span>
<span id="cb5-36"><a href="#cb5-36" aria-hidden="true" tabindex="-1"></a>    <span class="at">prob=</span><span class="fu">c</span>(<span class="fl">0.005</span>, <span class="fl">0.995</span>)</span>
<span id="cb5-37"><a href="#cb5-37" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb5-38"><a href="#cb5-38" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb5-39"><a href="#cb5-39" aria-hidden="true" tabindex="-1"></a><span class="co"># Get the Covid statuses of all those with negative tests</span></span>
<span id="cb5-40"><a href="#cb5-40" aria-hidden="true" tabindex="-1"></a><span class="co"># (`test_result` is a Boolean mask, like `[TRUE, FALSE, FALSE, TRUE, ...]`,</span></span>
<span id="cb5-41"><a href="#cb5-41" aria-hidden="true" tabindex="-1"></a><span class="co"># and `!test_result` flips all Boolean values to `[FALSE, TRUE, TRUE, FALSE, ...]`.</span></span>
<span id="cb5-42"><a href="#cb5-42" aria-hidden="true" tabindex="-1"></a>covid_status_negative_test <span class="ot">&lt;-</span> person_has_covid[<span class="sc">!</span>test_result]</span>
<span id="cb5-43"><a href="#cb5-43" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb5-44"><a href="#cb5-44" aria-hidden="true" tabindex="-1"></a><span class="co"># Now, count how many with Covid had a negative test results.</span></span>
<span id="cb5-45"><a href="#cb5-45" aria-hidden="true" tabindex="-1"></a>n_with_covid_and_negative_test <span class="ot">&lt;-</span> <span class="fu">sum</span>(covid_status_negative_test)</span>
<span id="cb5-46"><a href="#cb5-46" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb5-47"><a href="#cb5-47" aria-hidden="true" tabindex="-1"></a><span class="co"># And how many people, overall, had negative test results.</span></span>
<span id="cb5-48"><a href="#cb5-48" aria-hidden="true" tabindex="-1"></a>n_with_negative_test <span class="ot">&lt;-</span> <span class="fu">length</span>(covid_status_negative_test)</span>
<span id="cb5-49"><a href="#cb5-49" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb5-50"><a href="#cb5-50" aria-hidden="true" tabindex="-1"></a>k <span class="ot">&lt;-</span> n_with_covid_and_negative_test <span class="sc">/</span> n_with_negative_test</span>
<span id="cb5-51"><a href="#cb5-51" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb5-52"><a href="#cb5-52" aria-hidden="true" tabindex="-1"></a><span class="fu">message</span>(<span class="st">'Proportion with Covid of those with negative test: '</span>, <span class="fu">round</span>(k, <span class="dv">4</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>Proportion with Covid of those with negative test: 0.0061</code></pre>
</div>
</div>
<p>This gives around 0.006 or 0.6%.</p>
<div class="nb-end">

</div>
<div class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
End of notebook: Bayesian analysis of Covid test result
</div>
</div>
<div class="callout-body-container callout-body">
<p><code>bayes_covid</code> starts at <a href="#nte-bayes_covid" class="quarto-xref">Note&nbsp;<span>31.2</span></a>.</p>
</div>
</div>
<!---
End of notebook.
-->
<p>Now that we have a rough indication of what the answer should be, let’s try and calculate it directly, based on the tree of information shown earlier.</p>
<p>We will use these abbreviations:</p>
<ul>
<li><span class="math inline">\(C^+\)</span> means Covid positive (you do actually have Covid).</li>
<li><span class="math inline">\(C^-\)</span> means Covid negative (you do <em>not</em> actually have Covid).</li>
<li><span class="math inline">\(T^+\)</span> means the Covid <em>test</em> was positive.</li>
<li><span class="math inline">\(T^-\)</span> means the Covid <em>test</em> was negative.</li>
</ul>
<p>For example <span class="math inline">\(P(C^+ | T^-)\)</span> is the probability (<span class="math inline">\(P\)</span>) that you do actually have Covid (<span class="math inline">\(C^+\)</span>) <em>given that</em> (<span class="math inline">\(|\)</span>) the test was negative (<span class="math inline">\(T^-\)</span>).</p>
<p>We would like to know the probability of having Covid <em>given that</em> your test was negative (<span class="math inline">\(P(C^+ | T^-)\)</span>). Using the conditional probability relationship from above, we can write:</p>
<p><span class="math display">\[
P(C^+ | T^-) = P(C^+ \text{ and } T^-) / P(T^-)
\]</span></p>
<p>We see from the tree diagram that <span class="math inline">\(P(C^+ \text{ and } T^-) = P(T^- | C^+) * P(C^+) = .4 * .015 = 0.006\)</span>.</p>
<!---
**TODO: ADD REFERENCE TO SUMMATION OF MUTUALLY EXCLUSIVE PROBABILITIES**
-->
<p>We observe that <span class="math inline">\(P(T^-) = P(T^- \text{ and } C^-) + P(T^- \text{ and } C^+)\)</span>, i.e.&nbsp;that we can obtain a negative test result through two paths, having Covid or not having Covid. We expand these further as conditional probabilities:</p>
<p><span class="math inline">\(P(T^- \text{ and } C^-) = P(T^- | C^-) * P(C^-)\)</span></p>
<p>and</p>
<p><span class="math inline">\(P(T^- \text{ and } C^+) = P(T^- | C^+) * P(C^+)\)</span>.</p>
<p>We can now calculate</p>
<p><span class="math display">\[
P(T^-) = P(T^- | C^-) * P(C^-) + P(T^- | C^+) * P(C^+)
\]</span></p>
<p><span class="math display">\[
= .995 * .985 + .4 * .015 = 0.986
\]</span></p>
<p>The answer, then, is:</p>
<p><span class="math inline">\(P(C^+ | T^-) = 0.006 / 0.986 = 0.0061\)</span> or 0.61%.</p>
<p>This matches very closely our simulation result, so we have some confidence that we have done the calculation correctly.</p>
</section>
<section id="estimating-driving-risk-for-insurance-purposes" class="level3" data-number="31.2.4">
<h3 data-number="31.2.4" class="anchored" data-anchor-id="estimating-driving-risk-for-insurance-purposes"><span class="header-section-number">31.2.4</span> Estimating Driving Risk for Insurance Purposes</h3>
<p>Another sort of introductory problem, following after <span class="citation" data-cites="feller1968introduction">(<a href="references.html#ref-feller1968introduction" role="doc-biblioref">Feller 1968</a>, p 122)</span>:</p>
<div id="nte-bayes_accidents" class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Note&nbsp;31.3: Notebook: Bayesian analysis for insurance premium
</div>
</div>
<div class="callout-body-container callout-body">
<div class="nb-links">
<p><a class="notebook-link" href="notebooks/bayes_accidents.Rmd">Download notebook</a> <a class="interact-button" href="./interact/lab/index.html?path=bayes_accidents.ipynb">Interact</a></p>
</div>
</div>
</div>
<div class="nb-start" name="bayes_accidents" title="Bayesian analysis for insurance premium">

</div>
<p>A mutual insurance company charges its members according to the risk of having an car accident. It is known that there are two classes of people — 80 percent of the population with good driving judgment and with a probability of .06 of having an accident each year, and 20 percent with poor judgment and a probability of .6 of having an accident each year. The company’s policy is to charge $100 for each percent of risk, i. e., a driver with a probability of .6 should pay 60*$100 = $6000.</p>
<p>If nothing is known of a driver except that they had an accident last year, what fee should they pay?</p>
<p>Another way to phrase this question is: given that a driver had an accident last year, what is the probability of them having an accident overall?</p>
<p>We will proceed as follows:</p>
<ol type="1">
<li>Generate a population of N people. Label each as <code>good driver</code> or <code>poor driver</code>.</li>
<li>Simulate the last year for each person: did they have an accident or not?</li>
<li>Select only the ones that had an accident last year.</li>
<li>Among those, calculate what their average risk is of making an accident. This will indicate the appropriate insurance premium.</li>
</ol>
<div class="cell" data-layout-align="center">
<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>n_trials <span class="ot">&lt;-</span> <span class="dv">100000</span></span>
<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>cost_per_percent <span class="ot">&lt;-</span> <span class="dv">100</span></span>
<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a>people <span class="ot">&lt;-</span> <span class="fu">sample</span>(</span>
<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a>    <span class="fu">c</span>(<span class="st">'good driver'</span>, <span class="st">'poor driver'</span>),</span>
<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a>    <span class="at">size=</span>n_trials,</span>
<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a>    <span class="at">replace=</span><span class="cn">TRUE</span>,</span>
<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a>    <span class="at">prob=</span><span class="fu">c</span>(<span class="fl">0.8</span>, <span class="fl">0.2</span>)</span>
<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb7-11"><a href="#cb7-11" aria-hidden="true" tabindex="-1"></a>good_driver <span class="ot">&lt;-</span> (people <span class="sc">==</span> <span class="st">'good driver'</span>)</span>
<span id="cb7-12"><a href="#cb7-12" aria-hidden="true" tabindex="-1"></a>poor_driver <span class="ot">&lt;-</span> <span class="sc">!</span>good_driver</span>
<span id="cb7-13"><a href="#cb7-13" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb7-14"><a href="#cb7-14" aria-hidden="true" tabindex="-1"></a><span class="co"># Did they have an accident last year?</span></span>
<span id="cb7-15"><a href="#cb7-15" aria-hidden="true" tabindex="-1"></a>had_accident <span class="ot">&lt;-</span> <span class="fu">logical</span>(n_trials)</span>
<span id="cb7-16"><a href="#cb7-16" aria-hidden="true" tabindex="-1"></a>had_accident[good_driver] <span class="ot">&lt;-</span> <span class="fu">sample</span>(</span>
<span id="cb7-17"><a href="#cb7-17" aria-hidden="true" tabindex="-1"></a>    <span class="fu">c</span>(<span class="cn">TRUE</span>, <span class="cn">FALSE</span>),</span>
<span id="cb7-18"><a href="#cb7-18" aria-hidden="true" tabindex="-1"></a>    <span class="at">size=</span><span class="fu">sum</span>(good_driver),</span>
<span id="cb7-19"><a href="#cb7-19" aria-hidden="true" tabindex="-1"></a>    <span class="at">replace=</span><span class="cn">TRUE</span>,</span>
<span id="cb7-20"><a href="#cb7-20" aria-hidden="true" tabindex="-1"></a>    <span class="at">prob=</span><span class="fu">c</span>(<span class="fl">0.06</span>, <span class="fl">0.94</span>)</span>
<span id="cb7-21"><a href="#cb7-21" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb7-22"><a href="#cb7-22" aria-hidden="true" tabindex="-1"></a>had_accident[poor_driver] <span class="ot">&lt;-</span> <span class="fu">sample</span>(</span>
<span id="cb7-23"><a href="#cb7-23" aria-hidden="true" tabindex="-1"></a>    <span class="fu">c</span>(<span class="cn">TRUE</span>, <span class="cn">FALSE</span>),</span>
<span id="cb7-24"><a href="#cb7-24" aria-hidden="true" tabindex="-1"></a>    <span class="at">size=</span><span class="fu">sum</span>(poor_driver),</span>
<span id="cb7-25"><a href="#cb7-25" aria-hidden="true" tabindex="-1"></a>    <span class="at">replace=</span><span class="cn">TRUE</span>,</span>
<span id="cb7-26"><a href="#cb7-26" aria-hidden="true" tabindex="-1"></a>    <span class="at">prob=</span><span class="fu">c</span>(<span class="fl">0.6</span>, <span class="fl">0.4</span>)</span>
<span id="cb7-27"><a href="#cb7-27" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb7-28"><a href="#cb7-28" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb7-29"><a href="#cb7-29" aria-hidden="true" tabindex="-1"></a>ppl_with_accidents <span class="ot">&lt;-</span> people[had_accident]</span>
<span id="cb7-30"><a href="#cb7-30" aria-hidden="true" tabindex="-1"></a>n_good_driver_accidents <span class="ot">&lt;-</span> <span class="fu">sum</span>(ppl_with_accidents <span class="sc">==</span> <span class="st">'good driver'</span>)</span>
<span id="cb7-31"><a href="#cb7-31" aria-hidden="true" tabindex="-1"></a>n_poor_driver_accidents <span class="ot">&lt;-</span> <span class="fu">sum</span>(ppl_with_accidents <span class="sc">==</span> <span class="st">'poor driver'</span>)</span>
<span id="cb7-32"><a href="#cb7-32" aria-hidden="true" tabindex="-1"></a>n_all_with_accidents <span class="ot">&lt;-</span> n_good_driver_accidents <span class="sc">+</span> n_poor_driver_accidents</span>
<span id="cb7-33"><a href="#cb7-33" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb7-34"><a href="#cb7-34" aria-hidden="true" tabindex="-1"></a>avg_risk_percent <span class="ot">&lt;-</span> (n_good_driver_accidents <span class="sc">*</span> <span class="fl">0.06</span> <span class="sc">+</span></span>
<span id="cb7-35"><a href="#cb7-35" aria-hidden="true" tabindex="-1"></a>                     n_poor_driver_accidents <span class="sc">*</span> <span class="fl">0.6</span>) <span class="sc">/</span></span>
<span id="cb7-36"><a href="#cb7-36" aria-hidden="true" tabindex="-1"></a>                     n_all_with_accidents <span class="sc">*</span> <span class="dv">100</span></span>
<span id="cb7-37"><a href="#cb7-37" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb7-38"><a href="#cb7-38" aria-hidden="true" tabindex="-1"></a>premium <span class="ot">&lt;-</span> avg_risk_percent <span class="sc">*</span> cost_per_percent</span>
<span id="cb7-39"><a href="#cb7-39" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb7-40"><a href="#cb7-40" aria-hidden="true" tabindex="-1"></a><span class="fu">message</span>(<span class="st">'Premium is: '</span>, <span class="fu">round</span>(premium))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>Premium is: 4418</code></pre>
</div>
</div>
<p>The answer should be around 4450 USD.</p>
<div class="nb-end">

</div>
<div class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
End of notebook: Bayesian analysis for insurance premium
</div>
</div>
<div class="callout-body-container callout-body">
<p><code>bayes_accidents</code> starts at <a href="#nte-bayes_accidents" class="quarto-xref">Note&nbsp;<span>31.3</span></a>.</p>
</div>
</div>
<!---
End of notebook.
-->
</section>
<section id="screening-for-disease" class="level3" data-number="31.2.5">
<h3 data-number="31.2.5" class="anchored" data-anchor-id="screening-for-disease"><span class="header-section-number">31.2.5</span> Screening for Disease</h3>
<!---
**TODO: SHALL WE REMOVE THIS PROBLEM, OR INTEGRATE PARTS OF ITS DESCRIPTION WITH THE COVID EXAMPLE?**
-->
<p>This is a classic Bayesian problem (quoted by Tversky and Kahneman <span class="citation" data-cites="tversky1982evidential">(<a href="references.html#ref-tversky1982evidential" role="doc-biblioref">1982, 154</a>)</span>, from Cascells <em>et al.</em> <span class="citation" data-cites="cascells1978interpretation">(<a href="references.html#ref-cascells1978interpretation" role="doc-biblioref">1978, 999</a>)</span>):</p>
<blockquote class="blockquote">
<p>If a test to detect a disease whose prevalence is 1/1000 has a false positive rate of 5%, what is the chance that a person found to have a positive result actually has the disease, assuming you know nothing about the person’s symptoms or signs?</p>
</blockquote>
<p>Tversky and Kahneman note that among the respondents — students and staff at Harvard Medical School — “the most common response, given by almost half of the participants, was 95%” — very much the wrong answer.</p>
<p>To obtain an answer by simulation, we may rephrase the question above with (hypothetical) absolute numbers as follows:</p>
<p>If a test to detect a disease whose prevalence has been estimated to be about 100,000 in the population of 100 million persons over age 40 (that is, about 1 in a thousand) has been observed to have a false positive rate of 60 in 1200 observations, and never gives a negative result if a person really has the disease, what is the chance that a person found to have a positive result actually has the disease, assuming you know nothing about the person’s symptoms or signs?</p>
<p>If the raw numbers are not available, the problem can be phrased in such terms as “about 1 case in 1000” and “about 5 false positives in 100 cases.”</p>
<p>One may obtain an answer as follows:</p>
<ol type="1">
<li><p>Construct bucket A with 999 white beads and 1 black bead, and bucket B with 95 green beads and 5 red beads. A more complete problem that also discusses false negatives would need a third bucket.</p></li>
<li><p>Pick a bead from bucket A. If black, record “T,” replace the bead, and end the trial. If white, continue to step 3.</p></li>
<li><p>If a white bead is drawn from bucket A, select a bead from bucket B. If red, record “F” and replace the bead, and if green record “N” and replace the bead.</p></li>
<li><p>Repeat steps 2-4 perhaps 10,000 times, and in the results count the proportion of “T”s to (“T”s plus “F”s) ignoring the “N”s).</p>
<p>Of course 10,000 draws would be tedious, but even after a few hundred draws a person would be likely to draw the correct conclusion that the proportion of “T”s to (“T”s plus “F”s) would be small. And it is easy with a computer to do 10,000 trials very quickly.</p>
<p>Note that the respondents in the Cascells <em>et al.</em> study were not naive; the medical staff members were supposed to understand statistics. Yet most doctors and other personnel offered wrong answers. If simulation can do better than the standard deductive method, then simulation would seem to be the method of choice. And only one piece of training for simulation is required: Teach the habit of saying “I’ll simulate it” and then actually doing so.</p></li>
</ol>
</section>
</section>
<section id="fundamental-problems-in-statistical-practice" class="level2" data-number="31.3">
<h2 data-number="31.3" class="anchored" data-anchor-id="fundamental-problems-in-statistical-practice"><span class="header-section-number">31.3</span> Fundamental problems in statistical practice</h2>
<p>Box and Tiao <span class="citation" data-cites="box1992bayesian">(<a href="references.html#ref-box1992bayesian" role="doc-biblioref">1992</a>)</span> begin their classic exposition of Bayesian statistics with the analysis of a famous problem first published by Fisher <span class="citation" data-cites="fisher1959statistical">(<a href="references.html#ref-fisher1959statistical" role="doc-biblioref">1959, 18</a>)</span>.</p>
<blockquote class="blockquote">
<p>…there are mice of two colors, black and brown. The black mice are of two genetic kinds, homozygotes (<em>BB</em>) and heterozygotes (<em>Bb</em>), and the brown mice are of one kind (<em>bb</em>). It is known from established genetic theory that the probabilities associated with offspring from various matings are as listed in <a href="#tbl-mice-genetics" class="quarto-xref">Table&nbsp;<span>31.1</span></a>.</p>
</blockquote>
<p>(See <span class="citation" data-cites="box1992bayesian">(<a href="references.html#ref-box1992bayesian" role="doc-biblioref">Box and Tiao 1992, 12–14</a>)</span>).</p>
<div id="tbl-mice-genetics" class="quarto-float quarto-figure quarto-figure-center anchored">
<figure class="quarto-float quarto-float-tbl figure">
<figcaption class="quarto-float-caption-top quarto-float-caption quarto-float-tbl" id="tbl-mice-genetics-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
Table&nbsp;31.1: Probabilities for Genetic Character of Mice Offspring
</figcaption>
<div aria-describedby="tbl-mice-genetics-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
<table class="caption-top table">
<thead>
<tr class="header">
<th></th>
<th style="text-align: left;">BB (black)</th>
<th style="text-align: left;">Bb (black)</th>
<th>bb (brown)</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>BB mated with bb</td>
<td style="text-align: left;">0</td>
<td style="text-align: left;">1</td>
<td>0</td>
</tr>
<tr class="even">
<td>Bb mated with bb</td>
<td style="text-align: left;">0</td>
<td style="text-align: left;">½</td>
<td>½</td>
</tr>
<tr class="odd">
<td>Bb mated with Bb</td>
<td style="text-align: left;">¼</td>
<td style="text-align: left;">½</td>
<td>¼</td>
</tr>
</tbody>
</table>
</div>
</figure>
</div>
<p>Suppose we have a “test” mouse which has been produced by a mating between two (<em>Bb</em>) mice and is black. What is the genetic kind of this mouse?</p>
<p>To answer that, we look at the information in the last line of the table: it shows that the probabilities of a test mouse is of kind <em>BB</em> and <em>Bb</em> are precisely known, and are 1/3 and 2/3 respectively ((1/4)/(1/4 + 1/2) vs (1/2)/(1/4 + 1/2)). We call this our “prior” estimate — in other words, our estimate before seeing data.</p>
<p>Suppose the test mouse is now mated with a brown mouse (of kind <em>bb</em>) and produces seven black offspring. Before, we thought that it was more likely for the parent to be of kind <em>Bb</em> than of kind <em>BB</em>. But if that were true, then we would have expected to have seen some brown offspring (the probability of mating <em>Bb</em> with <em>bb</em> resulting in brown offspring is given as 0.5). Therefore, we sense that it may now be more likely that the parent was of type <em>BB</em> instead. How do we quantify that?</p>
<p>One can calculate, as Fisher <span class="citation" data-cites="fisher1959statistical">(<a href="references.html#ref-fisher1959statistical" role="doc-biblioref">1959, 19</a>)</span> did, the probabilities after seeing the data (we call this the <em>posterior</em> probability). This is typically done using using Bayes’ rule.</p>
<p>But instead of doing that, let’s take the easy route out and simulate the situation instead.</p>
<ol type="1">
<li><p>We begin, as do Box and Tiao, by restricting our attention to the third line in <a href="#tbl-mice-genetics" class="quarto-xref">Table&nbsp;<span>31.1</span></a>. We draw a mouse with label ‘BB’, ‘Bb’, or ‘bb’, using those probabilities. We were told that the “test mouse” is black, so if we draw ‘bb’, we try again. (Alternatively, we could draw ‘BB’ and ‘Bb’ with probabilities of 1/3 and 2/3 respectively.)</p></li>
<li><p>We now want to examine the offspring of the test mouse when mated with a brown “bb” mouse. Specifically, we are only interested in cases where all offspring were black. We will store the genetic kind of the parents of such offspring so that we can count them later.</p>
<p>If our test mouse is “BB”, we already know that all their offspring will be black (“Bb”). Thus, store “BB” in the parent list.</p></li>
<li><p>If our test mouse is “Bb”, we have a bit more work to do. Draw seven offspring from the middle row of <a href="#tbl-mice-genetics" class="quarto-xref">Table&nbsp;<span>31.1</span></a>. If all the offspring are black, store “Bb” in the parent list.</p></li>
<li><p>Repeat steps 1-3 perhaps 10000 times.</p></li>
<li><p>Now, out of all parents count the numbers of “BB” vs “Bb”.</p></li>
</ol>
<p>We will do a naïve implementation that closely follows the logic described above, followed by a slightly optimized version.</p>
<!---
TODO - explain continue in more detail?  Have we covered `np.all`?
-->
<div id="nte-box_tao_mice" class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Note&nbsp;31.4: Notebook: A problem of black and brown mice
</div>
</div>
<div class="callout-body-container callout-body">
<div class="nb-links">
<p><a class="notebook-link" href="notebooks/box_tao_mice.Rmd">Download notebook</a> <a class="interact-button" href="./interact/lab/index.html?path=box_tao_mice.ipynb">Interact</a></p>
</div>
</div>
</div>
<div class="nb-start" name="box_tao_mice" title="A problem of black and brown mice">

</div>
<div class="cell" data-layout-align="center">
<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>n_trials <span class="ot">&lt;-</span> <span class="dv">100000</span></span>
<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Make a vector to store results for each trial.</span></span>
<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a><span class="co"># The results are strings, so use "character" type.</span></span>
<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a><span class="co"># Many of these we will not set, for example, for brown mice (see below).</span></span>
<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a>parents <span class="ot">&lt;-</span> <span class="fu">character</span>(n_trials)</span>
<span id="cb9-7"><a href="#cb9-7" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb9-8"><a href="#cb9-8" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> (i <span class="cf">in</span> <span class="dv">1</span><span class="sc">:</span>n_trials) {</span>
<span id="cb9-9"><a href="#cb9-9" aria-hidden="true" tabindex="-1"></a>    test_mouse <span class="ot">&lt;-</span> <span class="fu">sample</span>(<span class="fu">c</span>(<span class="st">'BB'</span>, <span class="st">'Bb'</span>, <span class="st">'bb'</span>),</span>
<span id="cb9-10"><a href="#cb9-10" aria-hidden="true" tabindex="-1"></a>                         <span class="at">size=</span><span class="dv">1</span>,</span>
<span id="cb9-11"><a href="#cb9-11" aria-hidden="true" tabindex="-1"></a>                         <span class="at">prob=</span><span class="fu">c</span>(<span class="fl">0.25</span>, <span class="fl">0.5</span>, <span class="fl">0.25</span>))</span>
<span id="cb9-12"><a href="#cb9-12" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb9-13"><a href="#cb9-13" aria-hidden="true" tabindex="-1"></a>    <span class="co"># The test mouse is black; since we drew a brown mouse skip this trial.</span></span>
<span id="cb9-14"><a href="#cb9-14" aria-hidden="true" tabindex="-1"></a>    <span class="cf">if</span> (test_mouse <span class="sc">==</span> <span class="st">'bb'</span>) {</span>
<span id="cb9-15"><a href="#cb9-15" aria-hidden="true" tabindex="-1"></a>        <span class="co"># "mext" has the effect of aborting this iteration of the loop</span></span>
<span id="cb9-16"><a href="#cb9-16" aria-hidden="true" tabindex="-1"></a>        <span class="co"># and going back to start the next iteration.  If the code gets</span></span>
<span id="cb9-17"><a href="#cb9-17" aria-hidden="true" tabindex="-1"></a>        <span class="co"># to "next", none of the rest of the loop (within the curly brackets</span></span>
<span id="cb9-18"><a href="#cb9-18" aria-hidden="true" tabindex="-1"></a>        <span class="co"># code  will run.</span></span>
<span id="cb9-19"><a href="#cb9-19" aria-hidden="true" tabindex="-1"></a>        <span class="cf">next</span></span>
<span id="cb9-20"><a href="#cb9-20" aria-hidden="true" tabindex="-1"></a>    }</span>
<span id="cb9-21"><a href="#cb9-21" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb9-22"><a href="#cb9-22" aria-hidden="true" tabindex="-1"></a>    <span class="co"># If the test mouse is 'BB', all 7 children are guaranteed to</span></span>
<span id="cb9-23"><a href="#cb9-23" aria-hidden="true" tabindex="-1"></a>    <span class="co"># be 'Bb' black.</span></span>
<span id="cb9-24"><a href="#cb9-24" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Therefore, add 'BB' to the parent list.</span></span>
<span id="cb9-25"><a href="#cb9-25" aria-hidden="true" tabindex="-1"></a>    <span class="cf">if</span> (test_mouse <span class="sc">==</span> <span class="st">'BB'</span>) {</span>
<span id="cb9-26"><a href="#cb9-26" aria-hidden="true" tabindex="-1"></a>        parents[i] <span class="ot">&lt;-</span> <span class="st">'BB'</span></span>
<span id="cb9-27"><a href="#cb9-27" aria-hidden="true" tabindex="-1"></a>    }</span>
<span id="cb9-28"><a href="#cb9-28" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb9-29"><a href="#cb9-29" aria-hidden="true" tabindex="-1"></a>    <span class="co"># If the parent mouse is 'Bb', we draw 7 children to</span></span>
<span id="cb9-30"><a href="#cb9-30" aria-hidden="true" tabindex="-1"></a>    <span class="co"># see whether all of them are black ('Bb').</span></span>
<span id="cb9-31"><a href="#cb9-31" aria-hidden="true" tabindex="-1"></a>    <span class="co"># The probabilities come from the middle row of the table.</span></span>
<span id="cb9-32"><a href="#cb9-32" aria-hidden="true" tabindex="-1"></a>    <span class="cf">if</span> (test_mouse <span class="sc">==</span> <span class="st">'Bb'</span>) {</span>
<span id="cb9-33"><a href="#cb9-33" aria-hidden="true" tabindex="-1"></a>        children <span class="ot">&lt;-</span> <span class="fu">sample</span>(<span class="fu">c</span>(<span class="st">'Bb'</span>, <span class="st">'bb'</span>),</span>
<span id="cb9-34"><a href="#cb9-34" aria-hidden="true" tabindex="-1"></a>                            <span class="at">size=</span><span class="dv">7</span>,</span>
<span id="cb9-35"><a href="#cb9-35" aria-hidden="true" tabindex="-1"></a>                            <span class="at">replace=</span><span class="cn">TRUE</span>,</span>
<span id="cb9-36"><a href="#cb9-36" aria-hidden="true" tabindex="-1"></a>                            <span class="at">prob=</span><span class="fu">c</span>(<span class="fl">0.5</span>, <span class="fl">0.5</span>))</span>
<span id="cb9-37"><a href="#cb9-37" aria-hidden="true" tabindex="-1"></a>        <span class="cf">if</span> (<span class="fu">all</span>(children <span class="sc">==</span> <span class="st">'Bb'</span>)) {</span>
<span id="cb9-38"><a href="#cb9-38" aria-hidden="true" tabindex="-1"></a>            parents[i] <span class="ot">&lt;-</span> <span class="st">'Bb'</span></span>
<span id="cb9-39"><a href="#cb9-39" aria-hidden="true" tabindex="-1"></a>        }</span>
<span id="cb9-40"><a href="#cb9-40" aria-hidden="true" tabindex="-1"></a>    }</span>
<span id="cb9-41"><a href="#cb9-41" aria-hidden="true" tabindex="-1"></a>}</span>
<span id="cb9-42"><a href="#cb9-42" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb9-43"><a href="#cb9-43" aria-hidden="true" tabindex="-1"></a><span class="co"># Now, count how many parents were 'BB' vs 'Bb'</span></span>
<span id="cb9-44"><a href="#cb9-44" aria-hidden="true" tabindex="-1"></a>n_parents_BB <span class="ot">&lt;-</span> <span class="fu">sum</span>(parents <span class="sc">==</span> <span class="st">'BB'</span>)</span>
<span id="cb9-45"><a href="#cb9-45" aria-hidden="true" tabindex="-1"></a>n_parents_Bb <span class="ot">&lt;-</span> <span class="fu">sum</span>(parents <span class="sc">==</span> <span class="st">'Bb'</span>)</span>
<span id="cb9-46"><a href="#cb9-46" aria-hidden="true" tabindex="-1"></a>n_B <span class="ot">&lt;-</span> n_parents_BB <span class="sc">+</span> n_parents_Bb</span>
<span id="cb9-47"><a href="#cb9-47" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb9-48"><a href="#cb9-48" aria-hidden="true" tabindex="-1"></a>p_BB <span class="ot">&lt;-</span> n_parents_BB <span class="sc">/</span> n_B</span>
<span id="cb9-49"><a href="#cb9-49" aria-hidden="true" tabindex="-1"></a>p_Bb <span class="ot">&lt;-</span> n_parents_Bb <span class="sc">/</span> n_B</span>
<span id="cb9-50"><a href="#cb9-50" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb9-51"><a href="#cb9-51" aria-hidden="true" tabindex="-1"></a><span class="fu">message</span>(<span class="st">'p_BB = '</span>, <span class="fu">round</span>(p_BB, <span class="dv">3</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>p_BB = 0.985</code></pre>
</div>
<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="fu">message</span>(<span class="st">'p_Bb = '</span>, <span class="fu">round</span>(p_Bb, <span class="dv">3</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>p_Bb = 0.015</code></pre>
</div>
<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="fu">message</span>(<span class="st">'Ratio = '</span>, <span class="fu">round</span>(p_BB <span class="sc">/</span> p_Bb, <span class="dv">1</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>Ratio = 64.8</code></pre>
</div>
</div>
<p>We see that all the offspring being black considerably changes the situation! We started with the odds being 2:1 in favor of Bb vs BB. The “posterior” or “after the evidence” ratio is closer to 64:1 in favor of <em>BB</em>! (1973, pp. 12-14)</p>
<p>Let’s tune the code a bit to run faster. Instead of doing the trials one mouse at a time, we will do the whole bunch together.</p>
<p>To do this, we will use matrices.</p>
<div class="r">
<p>So far, we have used one-dimensional <em>vectors</em> in R. A vector is a sequence of values. Let us generate a vector with <code>sample</code>, as we have many times in this book, and in this chapter.</p>
<div class="sourceCode cell-code" id="cb15"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="co"># A vector with five elements.</span></span>
<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>a_vector <span class="ot">&lt;-</span> <span class="fu">sample</span>(<span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">2</span>), <span class="at">size=</span><span class="dv">5</span>, <span class="at">replace=</span><span class="cn">TRUE</span>)</span>
<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a>a_vector</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>However, we can also generate <em>matrices</em> in R. Matrices have two dimensions; it has rows and columns, much like a data frame. Here is a matrix we create with <code>sample</code>, by first making a vector, and then reshaping the vector into a matrix.</p>
<div class="sourceCode cell-code" id="cb16"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="co"># A vector with 15 values.</span></span>
<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>another_vector <span class="ot">&lt;-</span> <span class="fu">sample</span>(<span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">2</span>), <span class="at">size=</span><span class="dv">15</span>, <span class="at">replace=</span><span class="cn">TRUE</span>)</span>
<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a><span class="co"># A matrix with five rows and three columns.</span></span>
<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a>a_matrix <span class="ot">&lt;-</span> <span class="fu">matrix</span>(another_vector, <span class="at">ncol=</span><span class="dv">3</span>)</span>
<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a>a_matrix</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>As usual, we can apply Boolean comparison operations to this matrix, to get a Boolean matrix:</p>
<div class="sourceCode cell-code" id="cb17"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>is_2 <span class="ot">&lt;-</span> a_matrix <span class="sc">==</span> <span class="dv">2</span></span>
<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>is_2</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>R has functions to operate over rows and columns of a matrix. In particular, is has a function <code>rowSums</code> that gives the sum of values in the row (and therefore, the sum over the columns, for each row). For example, to see how many of the values in each row are equal to 2, we can do:</p>
<div class="sourceCode cell-code" id="cb18"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>n_2s_in_rows <span class="ot">&lt;-</span> <span class="fu">rowSums</span>(is_2)</span>
<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>n_2s_in_rows</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Notice that we get one answer for each row, where the answer is the <code>sum</code> across the columns, for that row.</p>
</div>
<div class="cell" data-layout-align="center">
<div class="sourceCode cell-code" id="cb19"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>n_trials <span class="ot">&lt;-</span> <span class="dv">1000000</span></span>
<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a><span class="co"># In n_trials trials, pair two Bb mice and generate a child.</span></span>
<span id="cb19-4"><a href="#cb19-4" aria-hidden="true" tabindex="-1"></a>test_mice <span class="ot">&lt;-</span> <span class="fu">sample</span>(<span class="fu">c</span>(<span class="st">'BB'</span>, <span class="st">'Bb'</span>, <span class="st">'bb'</span>),</span>
<span id="cb19-5"><a href="#cb19-5" aria-hidden="true" tabindex="-1"></a>                    <span class="at">size=</span>n_trials,</span>
<span id="cb19-6"><a href="#cb19-6" aria-hidden="true" tabindex="-1"></a>                    <span class="at">replace=</span><span class="cn">TRUE</span>,</span>
<span id="cb19-7"><a href="#cb19-7" aria-hidden="true" tabindex="-1"></a>                    <span class="at">prob=</span><span class="fu">c</span>(<span class="fl">0.25</span>, <span class="fl">0.5</span>, <span class="fl">0.25</span>))</span>
<span id="cb19-8"><a href="#cb19-8" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb19-9"><a href="#cb19-9" aria-hidden="true" tabindex="-1"></a><span class="co"># The resulting test mouse is black, so filter out all brown ones.</span></span>
<span id="cb19-10"><a href="#cb19-10" aria-hidden="true" tabindex="-1"></a>test_mice <span class="ot">&lt;-</span> test_mice[test_mice <span class="sc">!=</span> <span class="st">'bb'</span>]</span>
<span id="cb19-11"><a href="#cb19-11" aria-hidden="true" tabindex="-1"></a>n_test_mice <span class="ot">&lt;-</span> <span class="fu">length</span>(test_mice)</span>
<span id="cb19-12"><a href="#cb19-12" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb19-13"><a href="#cb19-13" aria-hidden="true" tabindex="-1"></a><span class="co"># Each test mouse will now be mated with a brown mouse, producing 7 offspring.</span></span>
<span id="cb19-14"><a href="#cb19-14" aria-hidden="true" tabindex="-1"></a><span class="co"># We then store whether all the offspring were black or not.</span></span>
<span id="cb19-15"><a href="#cb19-15" aria-hidden="true" tabindex="-1"></a>all_offspring_black <span class="ot">&lt;-</span> <span class="fu">logical</span>(n_test_mice)</span>
<span id="cb19-16"><a href="#cb19-16" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb19-17"><a href="#cb19-17" aria-hidden="true" tabindex="-1"></a><span class="co"># If a test mouse is 'BB', we are assured that all its offspring</span></span>
<span id="cb19-18"><a href="#cb19-18" aria-hidden="true" tabindex="-1"></a><span class="co"># will be black.</span></span>
<span id="cb19-19"><a href="#cb19-19" aria-hidden="true" tabindex="-1"></a>all_offspring_black[test_mice <span class="sc">==</span> <span class="st">'BB'</span>] <span class="ot">&lt;-</span> <span class="cn">TRUE</span></span>
<span id="cb19-20"><a href="#cb19-20" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb19-21"><a href="#cb19-21" aria-hidden="true" tabindex="-1"></a><span class="co"># If a test mouse is 'Bb', we have to generate its offspring and</span></span>
<span id="cb19-22"><a href="#cb19-22" aria-hidden="true" tabindex="-1"></a><span class="co"># see whether they are all black or not</span></span>
<span id="cb19-23"><a href="#cb19-23" aria-hidden="true" tabindex="-1"></a>test_mice_Bb <span class="ot">&lt;-</span> (test_mice <span class="sc">==</span> <span class="st">'Bb'</span>)</span>
<span id="cb19-24"><a href="#cb19-24" aria-hidden="true" tabindex="-1"></a>n_test_mice_Bb <span class="ot">&lt;-</span> <span class="fu">sum</span>(test_mice_Bb)</span>
<span id="cb19-25"><a href="#cb19-25" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb19-26"><a href="#cb19-26" aria-hidden="true" tabindex="-1"></a><span class="co"># Generate all offspring of all 'Bb' test mice</span></span>
<span id="cb19-27"><a href="#cb19-27" aria-hidden="true" tabindex="-1"></a><span class="co"># This gives 7 children for each Bb mouse.</span></span>
<span id="cb19-28"><a href="#cb19-28" aria-hidden="true" tabindex="-1"></a>offspring <span class="ot">&lt;-</span> <span class="fu">sample</span>(<span class="fu">c</span>(<span class="st">'Bb'</span>, <span class="st">'bb'</span>),</span>
<span id="cb19-29"><a href="#cb19-29" aria-hidden="true" tabindex="-1"></a>                    <span class="at">size=</span>n_test_mice_Bb <span class="sc">*</span> <span class="dv">7</span>,</span>
<span id="cb19-30"><a href="#cb19-30" aria-hidden="true" tabindex="-1"></a>                    <span class="at">replace=</span><span class="cn">TRUE</span>,</span>
<span id="cb19-31"><a href="#cb19-31" aria-hidden="true" tabindex="-1"></a>                    <span class="at">prob=</span><span class="fu">c</span>(<span class="fl">0.5</span>, <span class="fl">0.5</span>))</span>
<span id="cb19-32"><a href="#cb19-32" aria-hidden="true" tabindex="-1"></a><span class="co"># This gives a 2-dimensional matrix, with one row per Bb mouse,</span></span>
<span id="cb19-33"><a href="#cb19-33" aria-hidden="true" tabindex="-1"></a><span class="co"># and 7 columns, one for each child.</span></span>
<span id="cb19-34"><a href="#cb19-34" aria-hidden="true" tabindex="-1"></a>offspring_mat <span class="ot">=</span> <span class="fu">matrix</span>(offspring, <span class="at">ncol=</span><span class="dv">7</span>)</span>
<span id="cb19-35"><a href="#cb19-35" aria-hidden="true" tabindex="-1"></a><span class="co"># Check whether all 7 children (columns) are Bb, for each row.</span></span>
<span id="cb19-36"><a href="#cb19-36" aria-hidden="true" tabindex="-1"></a>all_offspring_black[test_mice_Bb] <span class="ot">&lt;-</span> <span class="fu">rowSums</span>(offspring_mat <span class="sc">==</span> <span class="st">'Bb'</span>) <span class="sc">==</span> <span class="dv">7</span></span>
<span id="cb19-37"><a href="#cb19-37" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb19-38"><a href="#cb19-38" aria-hidden="true" tabindex="-1"></a><span class="co"># Find the genetic types of the parents of all-black offspring.</span></span>
<span id="cb19-39"><a href="#cb19-39" aria-hidden="true" tabindex="-1"></a>parents <span class="ot">&lt;-</span> test_mice[all_offspring_black]</span>
<span id="cb19-40"><a href="#cb19-40" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb19-41"><a href="#cb19-41" aria-hidden="true" tabindex="-1"></a><span class="co"># Calculate what fraction of parents were 'BB' vs 'Bb'.</span></span>
<span id="cb19-42"><a href="#cb19-42" aria-hidden="true" tabindex="-1"></a>parents_BB <span class="ot">&lt;-</span> (parents <span class="sc">==</span> <span class="st">'BB'</span>)</span>
<span id="cb19-43"><a href="#cb19-43" aria-hidden="true" tabindex="-1"></a>parents_Bb <span class="ot">&lt;-</span> (parents <span class="sc">==</span> <span class="st">'Bb'</span>)</span>
<span id="cb19-44"><a href="#cb19-44" aria-hidden="true" tabindex="-1"></a>n_B <span class="ot">&lt;-</span> <span class="fu">sum</span>(all_offspring_black)</span>
<span id="cb19-45"><a href="#cb19-45" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb19-46"><a href="#cb19-46" aria-hidden="true" tabindex="-1"></a>p_BB <span class="ot">&lt;-</span> <span class="fu">sum</span>(parents_BB) <span class="sc">/</span> n_B</span>
<span id="cb19-47"><a href="#cb19-47" aria-hidden="true" tabindex="-1"></a>p_Bb <span class="ot">&lt;-</span> <span class="fu">sum</span>(parents_Bb) <span class="sc">/</span> n_B</span>
<span id="cb19-48"><a href="#cb19-48" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb19-49"><a href="#cb19-49" aria-hidden="true" tabindex="-1"></a><span class="fu">message</span>(<span class="st">'p_BB = '</span>, <span class="fu">round</span>(p_BB, <span class="dv">3</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>p_BB = 0.985</code></pre>
</div>
<div class="sourceCode cell-code" id="cb21"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="fu">message</span>(<span class="st">'p_Bb = '</span>, <span class="fu">round</span>(p_Bb, <span class="dv">3</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>p_Bb = 0.015</code></pre>
</div>
<div class="sourceCode cell-code" id="cb23"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="fu">message</span>(<span class="st">'Ratio = '</span>, <span class="fu">round</span>(p_BB <span class="sc">/</span> p_Bb, <span class="dv">1</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>Ratio = 64.3</code></pre>
</div>
</div>
<p>This yields a similar result, but in much shorter time — which means we can increase the number of trials and get a more accurate result.</p>
<div class="nb-end">

</div>
<div class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
End of notebook: A problem of black and brown mice
</div>
</div>
<div class="callout-body-container callout-body">
<p><code>box_tao_mice</code> starts at <a href="#nte-box_tao_mice" class="quarto-xref">Note&nbsp;<span>31.4</span></a>.</p>
</div>
</div>
<!---
End of notebook.
-->
<!---
XXX TODO: How can we show how to derive this quantity using a filter tree type approach? XXX
-->
<p>Creating the correct simulation procedure is not trivial, because Bayesian reasoning is subtle — a reason it has been the cause of controversy for more than two centuries. But it certainly is not easier to create a correct procedure using analytic tools (except in the cookbook sense of plug-and-pray). And the difficult mathematics that underlie the analytic method (see e.g. <span class="citation" data-cites="box1992bayesian">(<a href="references.html#ref-box1992bayesian" role="doc-biblioref">Box and Tiao 1992</a>, Appendix A1.1)</span>) make it almost impossible for the statistician to fully understand the procedure from beginning to end. If one is interested in insight, the simulation procedure might well be preferred.<a href="#fn1" class="footnote-ref" id="fnref1" role="doc-noteref"><sup>1</sup></a></p>
</section>
<section id="problems-based-on-normal-and-other-distributions" class="level2" data-number="31.4">
<h2 data-number="31.4" class="anchored" data-anchor-id="problems-based-on-normal-and-other-distributions"><span class="header-section-number">31.4</span> Problems based on normal and other distributions</h2>
<p>This section should be skipped by all except advanced practitioners of statistics.</p>
<p>Much of the work in Bayesian analysis for scientific purposes treats the combining of prior distributions having Normal and other standard shapes with sample evidence which may also be represented with such standard functions. The mathematics involved often is formidable, though some of the calculational formulas are fairly simple and even intuitive.</p>
<p>These problems may be handled with simulation by replacing the Normal (or other) distribution with the original raw data when data are available, or by a set of discrete sub-universes when distributions are subjective.</p>
<p>Measured data from a continuous distribution present a special problem because the probability of any one observed value is very low, often approaching zero, and hence the probability of a given set of observed values usually cannot be estimated sensibly; this is the reason for the conventional practice of working with a continuous distribution itself, of course. But a simulation necessarily works with discrete values. A feasible procedure must bridge this gulf.</p>
<p>The logic for a problem of Schlaifer’s <span class="citation" data-cites="schlaifer1961introduction">(<a href="references.html#ref-schlaifer1961introduction" role="doc-biblioref">1961</a>, example 17.1)</span> will only be sketched out. The procedure is rather novel, but it has not heretofore been published and therefore must be considered tentative and requiring particular scrutiny.</p>
<section id="an-intermediate-problem-in-conditional-probability" class="level3" data-number="31.4.1">
<h3 data-number="31.4.1" class="anchored" data-anchor-id="an-intermediate-problem-in-conditional-probability"><span class="header-section-number">31.4.1</span> An Intermediate Problem in Conditional Probability</h3>
<p>Schlaifer employs a quality-control problem for his leading example of Bayesian estimation with Normal sampling. A chemical manufacturer wants to estimate the amount of yield of a crucial ingredient X in a batch of raw material in order to decide whether it should receive special handling. The yield ranges between 2 and 3 pounds (per gallon), and the manufacturer has compiled the distribution of the last 100 batches.</p>
<p>The manufacturer currently uses the decision rule that if the mean of nine samples from the batch (which vary only because of measurement error, which is the reason that he takes nine samples rather than just one) indicates that the batch mean is greater than 2.5 gallons, the batch is accepted. The first question Schlaifer asks, as a sampling-theory waystation to the more general question, is the likelihood that a given batch with any given yield — say 2.3 gallons — will produce a set of samples with a mean as great or greater than 2.5 gallons.</p>
<p>We are told that the manufacturer has in hand nine samples from a given batch; they are 1.84, 1.75, 1.39, 1.65, 3.53, 1.03, 2.73, 2.86, and 1.96, with a mean of 2.08. Because we are also told that the manufacturer considers the extent of sample variation to be the same at all yield levels, we may — if we are again working with 2.3 as our example of a possible universe — therefore add (2.3 minus 2.08 =) 0.22 to each of these nine observations, so as to constitute a bootstrap-type universe; we do this on the grounds that this is our best guess about the constitution of that distribution with a mean at (say) 2.3.</p>
<p>We then repeatedly draw samples of nine observations from this distribution (centered at 2.3) to see how frequently its mean exceeds 2.5. This work is so straightforward that we need not even state the steps in the procedure.</p>
</section>
<section id="estimating-the-posterior-distribution" class="level3" data-number="31.4.2">
<h3 data-number="31.4.2" class="anchored" data-anchor-id="estimating-the-posterior-distribution"><span class="header-section-number">31.4.2</span> Estimating the Posterior Distribution</h3>
<p>Next we estimate the posterior distribution. <a href="#fig-batch_posterior" class="quarto-xref">Figure&nbsp;<span>31.1</span></a> shows the prior distribution of batch yields, based on 100 previous batches.</p>
<div class="cell" data-layout-align="center">
<div class="cell-output-display">
<div id="fig-batch_posterior" class="quarto-float quarto-figure quarto-figure-center anchored" data-fig-align="center">
<figure class="quarto-float quarto-float-fig figure">
<div aria-describedby="fig-batch_posterior-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
<img src="diagrams/batch_posterior.svg" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:70.0%">
</div>
<figcaption class="quarto-float-caption-bottom quarto-float-caption quarto-float-fig" id="fig-batch_posterior-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
Figure&nbsp;31.1: Posterior distribution of batch yields
</figcaption>
</figure>
</div>
</div>
</div>
<p>Notation: S <sub>m</sub> = set of batches (where total S = 100) with a particular mean m (say, m = 2.1). x <sub>i</sub> = particular observation (say, x <sub>3</sub> = 1.03). s = the set of x <sub>i</sub> .</p>
<p>We now perform for each of the S <sub>m</sub> (categorized into the tenth-of-gallon divisions between 2.1 and 3.0 gallons), each corresponding to one of the yields ranging from 2.1 to 3.0, the same sort of sampling operation performed for S <sub>m=2.3</sub> in the previous problem. But now, instead of using the manufacturer’s decision criterion of 2.5, we construct an interval of arbitrary width around the sample mean of 2.08 — say at .1 intervals from 2.03 to 2.13 — and then work with the weighted proportions of sample means that fall into this interval.</p>
<ol type="1">
<li>Using a bootstrap-like approach, we presume that the sub-universe of observations related to each S <sub>m</sub> equals the mean of that S <sub>m</sub> — say, 2.1) plus (minus) the mean of the x <sub>i</sub> (equals 2.05) added to (subtracted from) each of the nine x <sub>i</sub> , say, 1.03 + .05 = 1.08. For a distribution centered at 2.3, the values would be (1.84 + .22 = 2.06, 1.75 + .22 = 1.97…).</li>
<li>Working with the distribution centered at 2.3 as an example: Constitute a universe of the values (1.84+.22=2.06, 1.75 + .22 = 1.97…). Here we may notice that the variability in the sample enters into the analysis at this point, rather than when the sample evidence is combined with the prior distribution; this is in contrast to conventional Bayesian practice where the posterior is the result of the prior and sample means weighted by the reciprocals of the variances (see e.g. <span class="citation" data-cites="box1992bayesian">(<a href="references.html#ref-box1992bayesian" role="doc-biblioref">Box and Tiao 1992, 17</a> and Appendix A1.1)</span>).</li>
<li>Draw nine observations from this universe (with replacement, of course), compute the mean, and record.</li>
<li>Repeat step 2 perhaps 1000 times and plot the distribution of outcomes.</li>
<li>Compute the percentages of the means within (say) .5 on each side of the sample mean, i. e. from 2.03–2.13. The resulting number — call it UP <sup>i</sup> — is the un-standardized (un-normalized) effect of this sub-distribution in the posterior distribution.</li>
<li>Repeat steps 1-5 to cover each other possible batch yield from 2.0 to 3.0 (2.3 was just done).</li>
<li>Weight each of these sub-distributions — actually, its UP <sup>i</sup> — by its prior probability, and call that WP <sup>i</sup> -.</li>
<li>Standardize the WP <sup>i</sup> s to a total probability of 1.0. The result is the posterior distribution. The value found is 2.283, which the reader may wish to compare with a theoretically-obtained result (which Schlaifer does not give).</li>
</ol>
<p>This procedure must be biased because the numbers of “hits” will differ between the two sides of the mean for all sub-distributions except that one centered at the same point as the sample, but the extent and properties of this bias are as-yet unknown. The bias would seem to be smaller as the interval is smaller, but a small interval requires a large number of simulations; a satisfactorily narrow interval surely will contain relatively few trials, which is a practical problem of still-unknown dimensions.</p>
<p>Another procedure — less theoretically justified and probably more biased — intended to get around the problem of the narrowness of the interval, is as follows:</p>
<ol start="5" type="1">
<li>(<strong>5a.</strong>) Compute the percentages of the means on each side of the sample mean, and note the smaller of the two (or in another possible process, the difference of the two). The resulting number — call it UP <sup>i</sup> — is the un-standardized (un-normalized) weight of this sub-distribution in the posterior distribution.</li>
</ol>
<p>Another possible criterion — a variation on the procedure in 5a — is the <em>difference</em> between the two tails; for a universe with the same mean as the sample, this difference would be zero.</p>
</section>
</section>
<section id="conclusion" class="level2" data-number="31.5">
<h2 data-number="31.5" class="anchored" data-anchor-id="conclusion"><span class="header-section-number">31.5</span> Conclusion</h2>
<p>All but the simplest problems in conditional probability are confusing to the intuition even if not difficult mathematically. But when one tackles Bayesian and other problems in probability with experimental simulation methods rather than with logic, neither simple nor complex problems need be difficult for experts or beginners.</p>
<p>This chapter shows how simulation can be a helpful and illuminating way to approach problems in Bayesian analysis.</p>
<p>Simulation has two valuable properties for Bayesian analysis:</p>
<ol type="1">
<li>It can provide an effective way to handle problems whose analytic solution may be difficult or impossible.</li>
<li>Simulation can provide insight to problems that otherwise are difficult to understand fully, as is peculiarly the case with Bayesian analysis.</li>
</ol>
<p>Bayesian problems of updating estimates can be handled easily and straightforwardly with simulation, whether the data are discrete or continuous. The process and the results tend to be intuitive and transparent. Simulation works best with the original raw data rather than with abstractions from them via percentages and distributions. This can aid the understanding as well as facilitate computation.</p>


<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" role="list" style="display: none">
<div id="ref-box1992bayesian" class="csl-entry" role="listitem">
Box, George E. P., and George C. Tiao. 1992. <em>Bayesian Inference in Statistical Analysis</em>. New <span>Y</span>ork: Wiley &amp; Sons, Inc. <a href="https://www.google.co.uk/books/edition/Bayesian_Inference_in_Statistical_Analys/T8Askeyk1k4C">https://www.google.co.uk/books/edition/Bayesian_Inference_in_Statistical_Analys/T8Askeyk1k4C</a>.
</div>
<div id="ref-cascells1978interpretation" class="csl-entry" role="listitem">
Cascells, Ward, Arno Schoenberger, and Thomas B. Grayboys. 1978. <span>“Interpretation by Physicians of Clinical Laboratory Results.”</span> <em>New England Journal of Medicine</em> 299: 999–1001. <a href="https://www.nejm.org/doi/full/10.1056/NEJM197811022991808">https://www.nejm.org/doi/full/10.1056/NEJM197811022991808</a>.
</div>
<div id="ref-feller1968introduction" class="csl-entry" role="listitem">
Feller, William. 1968. <em>An Introduction to Probability Theory and Its Applications: Volume i</em>. 3rd ed. Vol. 1. New York: John Wiley &amp; Sons. <a href="https://www.google.co.uk/books/edition/An_Introduction_to_Probability_Theory_an/jbkdAQAAMAAJ">https://www.google.co.uk/books/edition/An_Introduction_to_Probability_Theory_an/jbkdAQAAMAAJ</a>.
</div>
<div id="ref-fisher1959statistical" class="csl-entry" role="listitem">
Fisher, Ronald Aylmer. 1959. <span>“Statistical Methods and Scientific Inference.”</span> <a href="https://archive.org/details/statisticalmetho0000fish">https://archive.org/details/statisticalmetho0000fish</a>.
</div>
<div id="ref-peirce1923chance" class="csl-entry" role="listitem">
Peirce, Charles Sanders. 1923. <em>Chance, Love, and Logic: Philosophical Essays</em>. New York: Harcourt Brace &amp; Company, Inc. <a href="https://www.gutenberg.org/files/65274/65274-h/65274-h.htm">https://www.gutenberg.org/files/65274/65274-h/65274-h.htm</a>.
</div>
<div id="ref-schlaifer1961introduction" class="csl-entry" role="listitem">
Schlaifer, Robert. 1961. <em>Introduction to Statistics for Business Decisions</em>. New York: MacGraw-Hill. <a href="https://archive.org/details/introductiontost00schl">https://archive.org/details/introductiontost00schl</a>.
</div>
<div id="ref-tversky1982evidential" class="csl-entry" role="listitem">
Tversky, Amos, and Daniel Kahneman. 1982. <span>“Evidential Impact of Base Rates.”</span> In <em>Judgement Under Uncertainty: Heuristics and Biases</em>, edited by Daniel Kahneman, Paul Slovic, and Amos Tversky. Cambridge: Cambridge University Press. <a href="https://www.google.co.uk/books/edition/Judgment_Under_Uncertainty/_0H8gwj4a1MC">https://www.google.co.uk/books/edition/Judgment_Under_Uncertainty/_0H8gwj4a1MC</a>.
</div>
<div id="ref-wonnacott1990introductory" class="csl-entry" role="listitem">
Wonnacott, Thomas H, and Ronald J Wonnacott. 1990. <em>Introductory Statistics</em>. 5th ed. New York: John Wiley &amp; Sons.
</div>
</div>
</section>
<section id="footnotes" class="footnotes footnotes-end-of-document" role="doc-endnotes">
<hr>
<ol>
<li id="fn1"><p>We can use a similar procedure to illustrate an aspect of the Bayesian procedure that Box and Tiao emphasize, its sequentially-consistent character. First let us carry out the above procedure but observe only three black balls in a row. The program to be used is the same except for the insertion of “3” for “7” where “7” appears. We then estimate the probability for BB, which turns out to be about 1/5 instead of about 1/65. We then substitute for bucket A a bucket A’ with appropriate numbers of black Bb’s and black BB’s, to represent the “updated” prior probability. We may then continue by substituting “4” for “3” above (to attain a total of seven observed black balls), and find that the probability is about what it was when we observed 7 black balls in a single sample (1/65). This shows that the Bayesian procedure accumulates information without “leakage” and with consistency.<a href="#fnref1" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
</ol>
</section>

</main> <!-- /main -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
  const toggleBodyColorMode = (bsSheetEl) => {
    const mode = bsSheetEl.getAttribute("data-mode");
    const bodyEl = window.document.querySelector("body");
    if (mode === "dark") {
      bodyEl.classList.add("quarto-dark");
      bodyEl.classList.remove("quarto-light");
    } else {
      bodyEl.classList.add("quarto-light");
      bodyEl.classList.remove("quarto-dark");
    }
  }
  const toggleBodyColorPrimary = () => {
    const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
    if (bsSheetEl) {
      toggleBodyColorMode(bsSheetEl);
    }
  }
  toggleBodyColorPrimary();  
  const icon = "";
  const anchorJS = new window.AnchorJS();
  anchorJS.options = {
    placement: 'right',
    icon: icon
  };
  anchorJS.add('.anchored');
  const isCodeAnnotation = (el) => {
    for (const clz of el.classList) {
      if (clz.startsWith('code-annotation-')) {                     
        return true;
      }
    }
    return false;
  }
  const onCopySuccess = function(e) {
    // button target
    const button = e.trigger;
    // don't keep focus
    button.blur();
    // flash "checked"
    button.classList.add('code-copy-button-checked');
    var currentTitle = button.getAttribute("title");
    button.setAttribute("title", "Copied!");
    let tooltip;
    if (window.bootstrap) {
      button.setAttribute("data-bs-toggle", "tooltip");
      button.setAttribute("data-bs-placement", "left");
      button.setAttribute("data-bs-title", "Copied!");
      tooltip = new bootstrap.Tooltip(button, 
        { trigger: "manual", 
          customClass: "code-copy-button-tooltip",
          offset: [0, -8]});
      tooltip.show();    
    }
    setTimeout(function() {
      if (tooltip) {
        tooltip.hide();
        button.removeAttribute("data-bs-title");
        button.removeAttribute("data-bs-toggle");
        button.removeAttribute("data-bs-placement");
      }
      button.setAttribute("title", currentTitle);
      button.classList.remove('code-copy-button-checked');
    }, 1000);
    // clear code selection
    e.clearSelection();
  }
  const getTextToCopy = function(trigger) {
      const codeEl = trigger.previousElementSibling.cloneNode(true);
      for (const childEl of codeEl.children) {
        if (isCodeAnnotation(childEl)) {
          childEl.remove();
        }
      }
      return codeEl.innerText;
  }
  const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
    text: getTextToCopy
  });
  clipboard.on('success', onCopySuccess);
  if (window.document.getElementById('quarto-embedded-source-code-modal')) {
    // For code content inside modals, clipBoardJS needs to be initialized with a container option
    // TODO: Check when it could be a function (https://github.com/zenorocha/clipboard.js/issues/860)
    const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
      text: getTextToCopy,
      container: window.document.getElementById('quarto-embedded-source-code-modal')
    });
    clipboardModal.on('success', onCopySuccess);
  }
    var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
    var mailtoRegex = new RegExp(/^mailto:/);
      var filterRegex = new RegExp('/' + window.location.host + '/');
    var isInternal = (href) => {
        return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
    }
    // Inspect non-navigation links and adorn them if external
 	var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
    for (var i=0; i<links.length; i++) {
      const link = links[i];
      if (!isInternal(link.href)) {
        // undo the damage that might have been done by quarto-nav.js in the case of
        // links that we want to consider external
        if (link.dataset.originalHref !== undefined) {
          link.href = link.dataset.originalHref;
        }
      }
    }
  function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
    const config = {
      allowHTML: true,
      maxWidth: 500,
      delay: 100,
      arrow: false,
      appendTo: function(el) {
          return el.parentElement;
      },
      interactive: true,
      interactiveBorder: 10,
      theme: 'quarto',
      placement: 'bottom-start',
    };
    if (contentFn) {
      config.content = contentFn;
    }
    if (onTriggerFn) {
      config.onTrigger = onTriggerFn;
    }
    if (onUntriggerFn) {
      config.onUntrigger = onUntriggerFn;
    }
    window.tippy(el, config); 
  }
  const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
  for (var i=0; i<noterefs.length; i++) {
    const ref = noterefs[i];
    tippyHover(ref, function() {
      // use id or data attribute instead here
      let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
      try { href = new URL(href).hash; } catch {}
      const id = href.replace(/^#\/?/, "");
      const note = window.document.getElementById(id);
      if (note) {
        return note.innerHTML;
      } else {
        return "";
      }
    });
  }
  const xrefs = window.document.querySelectorAll('a.quarto-xref');
  const processXRef = (id, note) => {
    // Strip column container classes
    const stripColumnClz = (el) => {
      el.classList.remove("page-full", "page-columns");
      if (el.children) {
        for (const child of el.children) {
          stripColumnClz(child);
        }
      }
    }
    stripColumnClz(note)
    if (id === null || id.startsWith('sec-')) {
      // Special case sections, only their first couple elements
      const container = document.createElement("div");
      if (note.children && note.children.length > 2) {
        container.appendChild(note.children[0].cloneNode(true));
        for (let i = 1; i < note.children.length; i++) {
          const child = note.children[i];
          if (child.tagName === "P" && child.innerText === "") {
            continue;
          } else {
            container.appendChild(child.cloneNode(true));
            break;
          }
        }
        if (window.Quarto?.typesetMath) {
          window.Quarto.typesetMath(container);
        }
        return container.innerHTML
      } else {
        if (window.Quarto?.typesetMath) {
          window.Quarto.typesetMath(note);
        }
        return note.innerHTML;
      }
    } else {
      // Remove any anchor links if they are present
      const anchorLink = note.querySelector('a.anchorjs-link');
      if (anchorLink) {
        anchorLink.remove();
      }
      if (window.Quarto?.typesetMath) {
        window.Quarto.typesetMath(note);
      }
      // TODO in 1.5, we should make sure this works without a callout special case
      if (note.classList.contains("callout")) {
        return note.outerHTML;
      } else {
        return note.innerHTML;
      }
    }
  }
  for (var i=0; i<xrefs.length; i++) {
    const xref = xrefs[i];
    tippyHover(xref, undefined, function(instance) {
      instance.disable();
      let url = xref.getAttribute('href');
      let hash = undefined; 
      if (url.startsWith('#')) {
        hash = url;
      } else {
        try { hash = new URL(url).hash; } catch {}
      }
      if (hash) {
        const id = hash.replace(/^#\/?/, "");
        const note = window.document.getElementById(id);
        if (note !== null) {
          try {
            const html = processXRef(id, note.cloneNode(true));
            instance.setContent(html);
          } finally {
            instance.enable();
            instance.show();
          }
        } else {
          // See if we can fetch this
          fetch(url.split('#')[0])
          .then(res => res.text())
          .then(html => {
            const parser = new DOMParser();
            const htmlDoc = parser.parseFromString(html, "text/html");
            const note = htmlDoc.getElementById(id);
            if (note !== null) {
              const html = processXRef(id, note);
              instance.setContent(html);
            } 
          }).finally(() => {
            instance.enable();
            instance.show();
          });
        }
      } else {
        // See if we can fetch a full url (with no hash to target)
        // This is a special case and we should probably do some content thinning / targeting
        fetch(url)
        .then(res => res.text())
        .then(html => {
          const parser = new DOMParser();
          const htmlDoc = parser.parseFromString(html, "text/html");
          const note = htmlDoc.querySelector('main.content');
          if (note !== null) {
            // This should only happen for chapter cross references
            // (since there is no id in the URL)
            // remove the first header
            if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
              note.children[0].remove();
            }
            const html = processXRef(null, note);
            instance.setContent(html);
          } 
        }).finally(() => {
          instance.enable();
          instance.show();
        });
      }
    }, function(instance) {
    });
  }
      let selectedAnnoteEl;
      const selectorForAnnotation = ( cell, annotation) => {
        let cellAttr = 'data-code-cell="' + cell + '"';
        let lineAttr = 'data-code-annotation="' +  annotation + '"';
        const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
        return selector;
      }
      const selectCodeLines = (annoteEl) => {
        const doc = window.document;
        const targetCell = annoteEl.getAttribute("data-target-cell");
        const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
        const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
        const lines = annoteSpan.getAttribute("data-code-lines").split(",");
        const lineIds = lines.map((line) => {
          return targetCell + "-" + line;
        })
        let top = null;
        let height = null;
        let parent = null;
        if (lineIds.length > 0) {
            //compute the position of the single el (top and bottom and make a div)
            const el = window.document.getElementById(lineIds[0]);
            top = el.offsetTop;
            height = el.offsetHeight;
            parent = el.parentElement.parentElement;
          if (lineIds.length > 1) {
            const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
            const bottom = lastEl.offsetTop + lastEl.offsetHeight;
            height = bottom - top;
          }
          if (top !== null && height !== null && parent !== null) {
            // cook up a div (if necessary) and position it 
            let div = window.document.getElementById("code-annotation-line-highlight");
            if (div === null) {
              div = window.document.createElement("div");
              div.setAttribute("id", "code-annotation-line-highlight");
              div.style.position = 'absolute';
              parent.appendChild(div);
            }
            div.style.top = top - 2 + "px";
            div.style.height = height + 4 + "px";
            div.style.left = 0;
            let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
            if (gutterDiv === null) {
              gutterDiv = window.document.createElement("div");
              gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
              gutterDiv.style.position = 'absolute';
              const codeCell = window.document.getElementById(targetCell);
              const gutter = codeCell.querySelector('.code-annotation-gutter');
              gutter.appendChild(gutterDiv);
            }
            gutterDiv.style.top = top - 2 + "px";
            gutterDiv.style.height = height + 4 + "px";
          }
          selectedAnnoteEl = annoteEl;
        }
      };
      const unselectCodeLines = () => {
        const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
        elementsIds.forEach((elId) => {
          const div = window.document.getElementById(elId);
          if (div) {
            div.remove();
          }
        });
        selectedAnnoteEl = undefined;
      };
        // Handle positioning of the toggle
    window.addEventListener(
      "resize",
      throttle(() => {
        elRect = undefined;
        if (selectedAnnoteEl) {
          selectCodeLines(selectedAnnoteEl);
        }
      }, 10)
    );
    function throttle(fn, ms) {
    let throttle = false;
    let timer;
      return (...args) => {
        if(!throttle) { // first call gets through
            fn.apply(this, args);
            throttle = true;
        } else { // all the others get throttled
            if(timer) clearTimeout(timer); // cancel #2
            timer = setTimeout(() => {
              fn.apply(this, args);
              timer = throttle = false;
            }, ms);
        }
      };
    }
      // Attach click handler to the DT
      const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
      for (const annoteDlNode of annoteDls) {
        annoteDlNode.addEventListener('click', (event) => {
          const clickedEl = event.target;
          if (clickedEl !== selectedAnnoteEl) {
            unselectCodeLines();
            const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
            if (activeEl) {
              activeEl.classList.remove('code-annotation-active');
            }
            selectCodeLines(clickedEl);
            clickedEl.classList.add('code-annotation-active');
          } else {
            // Unselect the line
            unselectCodeLines();
            clickedEl.classList.remove('code-annotation-active');
          }
        });
      }
  const findCites = (el) => {
    const parentEl = el.parentElement;
    if (parentEl) {
      const cites = parentEl.dataset.cites;
      if (cites) {
        return {
          el,
          cites: cites.split(' ')
        };
      } else {
        return findCites(el.parentElement)
      }
    } else {
      return undefined;
    }
  };
  var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
  for (var i=0; i<bibliorefs.length; i++) {
    const ref = bibliorefs[i];
    const citeInfo = findCites(ref);
    if (citeInfo) {
      tippyHover(citeInfo.el, function() {
        var popup = window.document.createElement('div');
        citeInfo.cites.forEach(function(cite) {
          var citeDiv = window.document.createElement('div');
          citeDiv.classList.add('hanging-indent');
          citeDiv.classList.add('csl-entry');
          var biblioDiv = window.document.getElementById('ref-' + cite);
          if (biblioDiv) {
            citeDiv.innerHTML = biblioDiv.innerHTML;
          }
          popup.appendChild(citeDiv);
        });
        return popup.innerHTML;
      });
    }
  }
});
</script>
<nav class="page-navigation">
  <div class="nav-page nav-page-previous">
      <a href="./how_big_sample.html" class="pagination-link" aria-label="How Large a Sample?">
        <i class="bi bi-arrow-left-short"></i> <span class="nav-page-text"><span class="chapter-number">30</span>&nbsp; <span class="chapter-title">How Large a Sample?</span></span>
      </a>          
  </div>
  <div class="nav-page nav-page-next">
      <a href="./references.html" class="pagination-link" aria-label="References">
        <span class="nav-page-text">References</span> <i class="bi bi-arrow-right-short"></i>
      </a>
  </div>
</nav>
</div> <!-- /content -->


</body></html>