diff --git a/_book/Introduction-to-R-with-Tidyverse.pdf b/_book/Introduction-to-R-with-Tidyverse.pdf
index 29835f1..6ffe8d8 100644
Binary files a/_book/Introduction-to-R-with-Tidyverse.pdf and b/_book/Introduction-to-R-with-Tidyverse.pdf differ
diff --git a/_book/data_description.html b/_book/data_description.html
new file mode 100644
index 0000000..35381ef
--- /dev/null
+++ b/_book/data_description.html
@@ -0,0 +1,725 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
+
+<meta charset="utf-8">
+<meta name="generator" content="quarto-1.4.553">
+
+<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
+
+
+<title>Introduction to R with Tidyverse - Data description</title>
+<style>
+code{white-space: pre-wrap;}
+span.smallcaps{font-variant: small-caps;}
+div.columns{display: flex; gap: min(4vw, 1.5em);}
+div.column{flex: auto; overflow-x: auto;}
+div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+ul.task-list{list-style: none;}
+ul.task-list li input[type="checkbox"] {
+  width: 0.8em;
+  margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 
+  vertical-align: middle;
+}
+</style>
+
+
+<script src="site_libs/quarto-nav/quarto-nav.js"></script>
+<script src="site_libs/quarto-nav/headroom.min.js"></script>
+<script src="site_libs/clipboard/clipboard.min.js"></script>
+<script src="site_libs/quarto-search/autocomplete.umd.js"></script>
+<script src="site_libs/quarto-search/fuse.min.js"></script>
+<script src="site_libs/quarto-search/quarto-search.js"></script>
+<meta name="quarto:offset" content="./">
+<link href="./exercise_solutions.html" rel="next">
+<link href="./session5_notes.html" rel="prev">
+<script src="site_libs/quarto-html/quarto.js"></script>
+<script src="site_libs/quarto-html/popper.min.js"></script>
+<script src="site_libs/quarto-html/tippy.umd.min.js"></script>
+<script src="site_libs/quarto-html/anchor.min.js"></script>
+<link href="site_libs/quarto-html/tippy.css" rel="stylesheet">
+<link href="site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<script src="site_libs/bootstrap/bootstrap.min.js"></script>
+<link href="site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
+<link href="site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
+<script id="quarto-search-options" type="application/json">{
+  "location": "navbar",
+  "copy-button": false,
+  "collapse-after": 3,
+  "panel-placement": "end",
+  "type": "overlay",
+  "limit": 50,
+  "keyboard-shortcut": [
+    "f",
+    "/",
+    "s"
+  ],
+  "show-item-context": false,
+  "language": {
+    "search-no-results-text": "No results",
+    "search-matching-documents-text": "matching documents",
+    "search-copy-link-title": "Copy link to search",
+    "search-hide-matches-text": "Hide additional matches",
+    "search-more-match-text": "more match in this document",
+    "search-more-matches-text": "more matches in this document",
+    "search-clear-button-title": "Clear",
+    "search-text-placeholder": "",
+    "search-detached-cancel-button-title": "Cancel",
+    "search-submit-button-title": "Submit",
+    "search-label": "Search"
+  }
+}</script>
+
+
+</head>
+
+<body class="nav-sidebar floating nav-fixed">
+
+<div id="quarto-search-results"></div>
+  <header id="quarto-header" class="headroom fixed-top">
+    <nav class="navbar navbar-expand-lg " data-bs-theme="dark">
+      <div class="navbar-container container-fluid">
+      <div class="navbar-brand-container mx-auto">
+    <a class="navbar-brand" href="./index.html">
+    <span class="navbar-title">Introduction to R with Tidyverse</span>
+    </a>
+  </div>
+            <div id="quarto-search" class="" title="Search"></div>
+          <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarCollapse" aria-controls="navbarCollapse" aria-expanded="false" aria-label="Toggle navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
+  <span class="navbar-toggler-icon"></span>
+</button>
+          <div class="collapse navbar-collapse" id="navbarCollapse">
+            <ul class="navbar-nav navbar-nav-scroll ms-auto">
+  <li class="nav-item compact">
+    <a class="nav-link" href="https://github.com/sophie-a-lee/"> <i class="bi bi-github" role="img" aria-label="Github">
+</i> 
+<span class="menu-text"></span></a>
+  </li>  
+  <li class="nav-item compact">
+    <a class="nav-link" href="https://scubed.rbind.io/"> <i class="bi bi-book" role="img" aria-label="Website">
+</i> 
+<span class="menu-text"></span></a>
+  </li>  
+  <li class="nav-item compact">
+    <a class="nav-link" href="https://x.com/SophieStats10"> <i class="bi bi-twitter" role="img" aria-label="Twitter">
+</i> 
+<span class="menu-text"></span></a>
+  </li>  
+  <li class="nav-item compact">
+    <a class="nav-link" href="https://buymeacoffee.com/sophie_a_lee"> <i class="bi bi-cup-hot" role="img" aria-label="Buy me a coffee">
+</i> 
+<span class="menu-text"></span></a>
+  </li>  
+</ul>
+          </div> <!-- /navcollapse -->
+          <div class="quarto-navbar-tools">
+    <a href="./Introduction-to-R-with-Tidyverse.pdf" title="Download PDF" class="quarto-navigation-tool px-1" aria-label="Download PDF"><i class="bi bi-file-pdf"></i></a>
+</div>
+      </div> <!-- /container-fluid -->
+    </nav>
+  <nav class="quarto-secondary-nav">
+    <div class="container-fluid d-flex">
+      <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
+        <i class="bi bi-layout-text-sidebar-reverse"></i>
+      </button>
+        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./data_description.html">Appendices</a></li><li class="breadcrumb-item"><a href="./data_description.html">Data description</a></li></ol></nav>
+        <a class="flex-grow-1" role="button" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">      
+        </a>
+    </div>
+  </nav>
+</header>
+<!-- content -->
+<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
+<!-- sidebar -->
+  <nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation floating overflow-auto">
+    <div class="sidebar-menu-container"> 
+    <ul class="list-unstyled mt-1">
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./index.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Welcome!</span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./session1_notes.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction to R and RStudio</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./session2_notes.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Introduction to tidyverse and data wrangling</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./session3_notes.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Data preparation and manipulation</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./session4_notes.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Data visualisation with ggplot2</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./session5_notes.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Reproducible research with RMarkdown</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true">
+ <span class="menu-text">Appendices</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">  
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./data_description.html" class="sidebar-item-text sidebar-link active">
+ <span class="menu-text">Data description</span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./exercise_solutions.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Exercise solutions</span></a>
+  </div>
+</li>
+      </ul>
+  </li>
+    </ul>
+    </div>
+</nav>
+<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
+<!-- margin-sidebar -->
+    <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
+        <nav id="TOC" role="doc-toc" class="toc-active">
+    <h2 id="toc-title">Table of contents</h2>
+   
+  <ul>
+  <li><a href="#what-is-csp" id="toc-what-is-csp" class="nav-link active" data-scroll-target="#what-is-csp">What is ‘CSP’?</a></li>
+  <li><a href="#descriptions-of-variables" id="toc-descriptions-of-variables" class="nav-link" data-scroll-target="#descriptions-of-variables">Descriptions of variables</a>
+  <ul class="collapse">
+  <li><a href="#identifier-variables" id="toc-identifier-variables" class="nav-link" data-scroll-target="#identifier-variables">Identifier variables</a></li>
+  <li><a href="#regions-of-england" id="toc-regions-of-england" class="nav-link" data-scroll-target="#regions-of-england">Regions of England</a></li>
+  <li><a href="#settlement-funding-assessment-sfa" id="toc-settlement-funding-assessment-sfa" class="nav-link" data-scroll-target="#settlement-funding-assessment-sfa">Settlement Funding Assessment (SFA)</a></li>
+  <li><a href="#under-indexing-business-rate-multipliers" id="toc-under-indexing-business-rate-multipliers" class="nav-link" data-scroll-target="#under-indexing-business-rate-multipliers">Under-indexing business rate multipliers</a></li>
+  <li><a href="#council-tax" id="toc-council-tax" class="nav-link" data-scroll-target="#council-tax">Council tax</a></li>
+  <li><a href="#new-homes-bonus" id="toc-new-homes-bonus" class="nav-link" data-scroll-target="#new-homes-bonus">New Homes Bonus</a></li>
+  <li><a href="#rural-services-delivery-grant" id="toc-rural-services-delivery-grant" class="nav-link" data-scroll-target="#rural-services-delivery-grant">Rural Services Delivery Grant</a></li>
+  </ul></li>
+  </ul>
+</nav>
+    </div>
+<!-- main -->
+<main class="content" id="quarto-document-content">
+
+<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./data_description.html">Appendices</a></li><li class="breadcrumb-item"><a href="./data_description.html">Data description</a></li></ol></nav>
+<div class="quarto-title">
+<h1 class="title">Data description</h1>
+</div>
+
+
+
+<div class="quarto-title-meta">
+
+    
+  
+    
+  </div>
+  
+
+
+</header>
+
+
+<section id="what-is-csp" class="level2">
+<h2 class="anchored" data-anchor-id="what-is-csp">What is ‘CSP’?</h2>
+<p>The data we will be using throughout this course relates to the Core Spending Power (CSP) of English local authorities between 2015 and 2020. This is a measure of the resources available to local authorities in England to fund service delivery. The CSP is broken down into several components, presented as variables in the data. These components include:</p>
+<ul>
+<li>Settlement Funding Assessment (<code>sfa</code>)</li>
+<li>Compensation for under-indexing the business rates multipler (<code>under_index</code>)</li>
+<li>Income from council tax (<code>ct_total</code>)</li>
+<li>New Homes Bonus (<code>nhb</code>)</li>
+<li>Rural Services Delivery Grant (<code>rsdg</code>)</li>
+</ul>
+<p>Spending power is given in millions of pounds (£). The data were provided by the UK government’s Department for Levelling Up, Housing and Communities. Full guidance on the data can be found on the <a href="https://www.gov.uk/government/publications/explanatory-note-on-core-spending-power-provisional-local-government-finance-settlement-2024-to-2025/explanatory-note-on-core-spending-power-provisional-local-government-finance-settlement-2024-to-2025">Department’s website</a>. A brief description of the variables included in the data are given below.</p>
+</section>
+<section id="descriptions-of-variables" class="level2">
+<h2 class="anchored" data-anchor-id="descriptions-of-variables">Descriptions of variables</h2>
+<section id="identifier-variables" class="level3">
+<h3 class="anchored" data-anchor-id="identifier-variables">Identifier variables</h3>
+<p>Each dataset contains a unique identifier code variable, <code>ons_code</code>. This is a code given by the Government’s Office for National Statistics (ONS), and is used to join different datasets. There is also an <code>authority</code> variable which contains the local authority name (to see where each local authority lies on a map, you can visit the <a href="https://geoportal.statistics.gov.uk/documents/cb64eeb1b0a74e5ca277f9fac58500f4/explore">Government’s geoportal website</a>).</p>
+</section>
+<section id="regions-of-england" class="level3">
+<h3 class="anchored" data-anchor-id="regions-of-england">Regions of England</h3>
+<p>In addition to each local authority’s unique code and name, we are given the region that they lie within. England is separated into 9 regions (shown on <a href="https://geoportal.statistics.gov.uk/documents/bfe6f7099f9e4cc59f8064962959bfb5/explore">this map</a>) which are given as acronyms in the data. These are:</p>
+<ul>
+<li><code>L</code> = London</li>
+<li><code>NW</code> = North West</li>
+<li><code>NE</code> = North East</li>
+<li><code>YH</code> = Yorkshire and the Humber</li>
+<li><code>WM</code> = West Midlands</li>
+<li><code>EM</code> = East Midlands</li>
+<li><code>EE</code> = East England</li>
+<li><code>SW</code> = South West</li>
+<li><code>SE</code> = South East</li>
+</ul>
+</section>
+<section id="settlement-funding-assessment-sfa" class="level3">
+<h3 class="anchored" data-anchor-id="settlement-funding-assessment-sfa">Settlement Funding Assessment (SFA)</h3>
+<p>The Settlement Funding Assessment (<code>sfa</code> in the data) is the baseline funding level of local authorities, and includes the Revenue Support Grant (a central government grant given to local authorities).</p>
+</section>
+<section id="under-indexing-business-rate-multipliers" class="level3">
+<h3 class="anchored" data-anchor-id="under-indexing-business-rate-multipliers">Under-indexing business rate multipliers</h3>
+<p>The <code>under_index</code> variable is given to compensate local authorities that under-indexed business rate multipliers in previous years (i.e.&nbsp;those that used a measure of inflation that was lower than that should have been used).</p>
+</section>
+<section id="council-tax" class="level3">
+<h3 class="anchored" data-anchor-id="council-tax">Council tax</h3>
+<p>Council tax (<code>ct_total</code>) is the income made by each local authority from council tax. In England, the amount of council tax charged to residents is set by each local authority to make up additional revenue needed to cover planned spending.</p>
+</section>
+<section id="new-homes-bonus" class="level3">
+<h3 class="anchored" data-anchor-id="new-homes-bonus">New Homes Bonus</h3>
+<p>The <code>nhb</code> variables is the funding received as part of the New Homes Bonus, a government inncentive to encourage local authorities to promote new housing delevopment.</p>
+</section>
+<section id="rural-services-delivery-grant" class="level3">
+<h3 class="anchored" data-anchor-id="rural-services-delivery-grant">Rural Services Delivery Grant</h3>
+<p>The <code>rsdg</code> variable is funding received as part of the Rural Services Delivery Grant, provided to rural councils to recognise additional costs in these areas.</p>
+
+
+</section>
+</section>
+
+</main> <!-- /main -->
+<script id="quarto-html-after-body" type="application/javascript">
+window.document.addEventListener("DOMContentLoaded", function (event) {
+  const toggleBodyColorMode = (bsSheetEl) => {
+    const mode = bsSheetEl.getAttribute("data-mode");
+    const bodyEl = window.document.querySelector("body");
+    if (mode === "dark") {
+      bodyEl.classList.add("quarto-dark");
+      bodyEl.classList.remove("quarto-light");
+    } else {
+      bodyEl.classList.add("quarto-light");
+      bodyEl.classList.remove("quarto-dark");
+    }
+  }
+  const toggleBodyColorPrimary = () => {
+    const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
+    if (bsSheetEl) {
+      toggleBodyColorMode(bsSheetEl);
+    }
+  }
+  toggleBodyColorPrimary();  
+  const icon = "";
+  const anchorJS = new window.AnchorJS();
+  anchorJS.options = {
+    placement: 'right',
+    icon: icon
+  };
+  anchorJS.add('.anchored');
+  const isCodeAnnotation = (el) => {
+    for (const clz of el.classList) {
+      if (clz.startsWith('code-annotation-')) {                     
+        return true;
+      }
+    }
+    return false;
+  }
+  const clipboard = new window.ClipboardJS('.code-copy-button', {
+    text: function(trigger) {
+      const codeEl = trigger.previousElementSibling.cloneNode(true);
+      for (const childEl of codeEl.children) {
+        if (isCodeAnnotation(childEl)) {
+          childEl.remove();
+        }
+      }
+      return codeEl.innerText;
+    }
+  });
+  clipboard.on('success', function(e) {
+    // button target
+    const button = e.trigger;
+    // don't keep focus
+    button.blur();
+    // flash "checked"
+    button.classList.add('code-copy-button-checked');
+    var currentTitle = button.getAttribute("title");
+    button.setAttribute("title", "Copied!");
+    let tooltip;
+    if (window.bootstrap) {
+      button.setAttribute("data-bs-toggle", "tooltip");
+      button.setAttribute("data-bs-placement", "left");
+      button.setAttribute("data-bs-title", "Copied!");
+      tooltip = new bootstrap.Tooltip(button, 
+        { trigger: "manual", 
+          customClass: "code-copy-button-tooltip",
+          offset: [0, -8]});
+      tooltip.show();    
+    }
+    setTimeout(function() {
+      if (tooltip) {
+        tooltip.hide();
+        button.removeAttribute("data-bs-title");
+        button.removeAttribute("data-bs-toggle");
+        button.removeAttribute("data-bs-placement");
+      }
+      button.setAttribute("title", currentTitle);
+      button.classList.remove('code-copy-button-checked');
+    }, 1000);
+    // clear code selection
+    e.clearSelection();
+  });
+    var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
+    var mailtoRegex = new RegExp(/^mailto:/);
+      var filterRegex = new RegExp('/' + window.location.host + '/');
+    var isInternal = (href) => {
+        return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
+    }
+    // Inspect non-navigation links and adorn them if external
+ 	var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool)');
+    for (var i=0; i<links.length; i++) {
+      const link = links[i];
+      if (!isInternal(link.href)) {
+        // undo the damage that might have been done by quarto-nav.js in the case of
+        // links that we want to consider external
+        if (link.dataset.originalHref !== undefined) {
+          link.href = link.dataset.originalHref;
+        }
+      }
+    }
+  function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
+    const config = {
+      allowHTML: true,
+      maxWidth: 500,
+      delay: 100,
+      arrow: false,
+      appendTo: function(el) {
+          return el.parentElement;
+      },
+      interactive: true,
+      interactiveBorder: 10,
+      theme: 'quarto',
+      placement: 'bottom-start',
+    };
+    if (contentFn) {
+      config.content = contentFn;
+    }
+    if (onTriggerFn) {
+      config.onTrigger = onTriggerFn;
+    }
+    if (onUntriggerFn) {
+      config.onUntrigger = onUntriggerFn;
+    }
+    window.tippy(el, config); 
+  }
+  const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
+  for (var i=0; i<noterefs.length; i++) {
+    const ref = noterefs[i];
+    tippyHover(ref, function() {
+      // use id or data attribute instead here
+      let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
+      try { href = new URL(href).hash; } catch {}
+      const id = href.replace(/^#\/?/, "");
+      const note = window.document.getElementById(id);
+      if (note) {
+        return note.innerHTML;
+      } else {
+        return "";
+      }
+    });
+  }
+  const xrefs = window.document.querySelectorAll('a.quarto-xref');
+  const processXRef = (id, note) => {
+    // Strip column container classes
+    const stripColumnClz = (el) => {
+      el.classList.remove("page-full", "page-columns");
+      if (el.children) {
+        for (const child of el.children) {
+          stripColumnClz(child);
+        }
+      }
+    }
+    stripColumnClz(note)
+    if (id === null || id.startsWith('sec-')) {
+      // Special case sections, only their first couple elements
+      const container = document.createElement("div");
+      if (note.children && note.children.length > 2) {
+        container.appendChild(note.children[0].cloneNode(true));
+        for (let i = 1; i < note.children.length; i++) {
+          const child = note.children[i];
+          if (child.tagName === "P" && child.innerText === "") {
+            continue;
+          } else {
+            container.appendChild(child.cloneNode(true));
+            break;
+          }
+        }
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(container);
+        }
+        return container.innerHTML
+      } else {
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(note);
+        }
+        return note.innerHTML;
+      }
+    } else {
+      // Remove any anchor links if they are present
+      const anchorLink = note.querySelector('a.anchorjs-link');
+      if (anchorLink) {
+        anchorLink.remove();
+      }
+      if (window.Quarto?.typesetMath) {
+        window.Quarto.typesetMath(note);
+      }
+      // TODO in 1.5, we should make sure this works without a callout special case
+      if (note.classList.contains("callout")) {
+        return note.outerHTML;
+      } else {
+        return note.innerHTML;
+      }
+    }
+  }
+  for (var i=0; i<xrefs.length; i++) {
+    const xref = xrefs[i];
+    tippyHover(xref, undefined, function(instance) {
+      instance.disable();
+      let url = xref.getAttribute('href');
+      let hash = undefined; 
+      if (url.startsWith('#')) {
+        hash = url;
+      } else {
+        try { hash = new URL(url).hash; } catch {}
+      }
+      if (hash) {
+        const id = hash.replace(/^#\/?/, "");
+        const note = window.document.getElementById(id);
+        if (note !== null) {
+          try {
+            const html = processXRef(id, note.cloneNode(true));
+            instance.setContent(html);
+          } finally {
+            instance.enable();
+            instance.show();
+          }
+        } else {
+          // See if we can fetch this
+          fetch(url.split('#')[0])
+          .then(res => res.text())
+          .then(html => {
+            const parser = new DOMParser();
+            const htmlDoc = parser.parseFromString(html, "text/html");
+            const note = htmlDoc.getElementById(id);
+            if (note !== null) {
+              const html = processXRef(id, note);
+              instance.setContent(html);
+            } 
+          }).finally(() => {
+            instance.enable();
+            instance.show();
+          });
+        }
+      } else {
+        // See if we can fetch a full url (with no hash to target)
+        // This is a special case and we should probably do some content thinning / targeting
+        fetch(url)
+        .then(res => res.text())
+        .then(html => {
+          const parser = new DOMParser();
+          const htmlDoc = parser.parseFromString(html, "text/html");
+          const note = htmlDoc.querySelector('main.content');
+          if (note !== null) {
+            // This should only happen for chapter cross references
+            // (since there is no id in the URL)
+            // remove the first header
+            if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
+              note.children[0].remove();
+            }
+            const html = processXRef(null, note);
+            instance.setContent(html);
+          } 
+        }).finally(() => {
+          instance.enable();
+          instance.show();
+        });
+      }
+    }, function(instance) {
+    });
+  }
+      let selectedAnnoteEl;
+      const selectorForAnnotation = ( cell, annotation) => {
+        let cellAttr = 'data-code-cell="' + cell + '"';
+        let lineAttr = 'data-code-annotation="' +  annotation + '"';
+        const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
+        return selector;
+      }
+      const selectCodeLines = (annoteEl) => {
+        const doc = window.document;
+        const targetCell = annoteEl.getAttribute("data-target-cell");
+        const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
+        const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
+        const lines = annoteSpan.getAttribute("data-code-lines").split(",");
+        const lineIds = lines.map((line) => {
+          return targetCell + "-" + line;
+        })
+        let top = null;
+        let height = null;
+        let parent = null;
+        if (lineIds.length > 0) {
+            //compute the position of the single el (top and bottom and make a div)
+            const el = window.document.getElementById(lineIds[0]);
+            top = el.offsetTop;
+            height = el.offsetHeight;
+            parent = el.parentElement.parentElement;
+          if (lineIds.length > 1) {
+            const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
+            const bottom = lastEl.offsetTop + lastEl.offsetHeight;
+            height = bottom - top;
+          }
+          if (top !== null && height !== null && parent !== null) {
+            // cook up a div (if necessary) and position it 
+            let div = window.document.getElementById("code-annotation-line-highlight");
+            if (div === null) {
+              div = window.document.createElement("div");
+              div.setAttribute("id", "code-annotation-line-highlight");
+              div.style.position = 'absolute';
+              parent.appendChild(div);
+            }
+            div.style.top = top - 2 + "px";
+            div.style.height = height + 4 + "px";
+            div.style.left = 0;
+            let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
+            if (gutterDiv === null) {
+              gutterDiv = window.document.createElement("div");
+              gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
+              gutterDiv.style.position = 'absolute';
+              const codeCell = window.document.getElementById(targetCell);
+              const gutter = codeCell.querySelector('.code-annotation-gutter');
+              gutter.appendChild(gutterDiv);
+            }
+            gutterDiv.style.top = top - 2 + "px";
+            gutterDiv.style.height = height + 4 + "px";
+          }
+          selectedAnnoteEl = annoteEl;
+        }
+      };
+      const unselectCodeLines = () => {
+        const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
+        elementsIds.forEach((elId) => {
+          const div = window.document.getElementById(elId);
+          if (div) {
+            div.remove();
+          }
+        });
+        selectedAnnoteEl = undefined;
+      };
+        // Handle positioning of the toggle
+    window.addEventListener(
+      "resize",
+      throttle(() => {
+        elRect = undefined;
+        if (selectedAnnoteEl) {
+          selectCodeLines(selectedAnnoteEl);
+        }
+      }, 10)
+    );
+    function throttle(fn, ms) {
+    let throttle = false;
+    let timer;
+      return (...args) => {
+        if(!throttle) { // first call gets through
+            fn.apply(this, args);
+            throttle = true;
+        } else { // all the others get throttled
+            if(timer) clearTimeout(timer); // cancel #2
+            timer = setTimeout(() => {
+              fn.apply(this, args);
+              timer = throttle = false;
+            }, ms);
+        }
+      };
+    }
+      // Attach click handler to the DT
+      const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
+      for (const annoteDlNode of annoteDls) {
+        annoteDlNode.addEventListener('click', (event) => {
+          const clickedEl = event.target;
+          if (clickedEl !== selectedAnnoteEl) {
+            unselectCodeLines();
+            const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
+            if (activeEl) {
+              activeEl.classList.remove('code-annotation-active');
+            }
+            selectCodeLines(clickedEl);
+            clickedEl.classList.add('code-annotation-active');
+          } else {
+            // Unselect the line
+            unselectCodeLines();
+            clickedEl.classList.remove('code-annotation-active');
+          }
+        });
+      }
+  const findCites = (el) => {
+    const parentEl = el.parentElement;
+    if (parentEl) {
+      const cites = parentEl.dataset.cites;
+      if (cites) {
+        return {
+          el,
+          cites: cites.split(' ')
+        };
+      } else {
+        return findCites(el.parentElement)
+      }
+    } else {
+      return undefined;
+    }
+  };
+  var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
+  for (var i=0; i<bibliorefs.length; i++) {
+    const ref = bibliorefs[i];
+    const citeInfo = findCites(ref);
+    if (citeInfo) {
+      tippyHover(citeInfo.el, function() {
+        var popup = window.document.createElement('div');
+        citeInfo.cites.forEach(function(cite) {
+          var citeDiv = window.document.createElement('div');
+          citeDiv.classList.add('hanging-indent');
+          citeDiv.classList.add('csl-entry');
+          var biblioDiv = window.document.getElementById('ref-' + cite);
+          if (biblioDiv) {
+            citeDiv.innerHTML = biblioDiv.innerHTML;
+          }
+          popup.appendChild(citeDiv);
+        });
+        return popup.innerHTML;
+      });
+    }
+  }
+});
+</script>
+<nav class="page-navigation">
+  <div class="nav-page nav-page-previous">
+      <a href="./session5_notes.html" class="pagination-link" aria-label="Reproducible research with RMarkdown">
+        <i class="bi bi-arrow-left-short"></i> <span class="nav-page-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Reproducible research with RMarkdown</span></span>
+      </a>          
+  </div>
+  <div class="nav-page nav-page-next">
+      <a href="./exercise_solutions.html" class="pagination-link" aria-label="Exercise solutions">
+        <span class="nav-page-text">Exercise solutions</span> <i class="bi bi-arrow-right-short"></i>
+      </a>
+  </div>
+</nav>
+</div> <!-- /content -->
+
+
+
+
+</body></html>
\ No newline at end of file
diff --git a/_book/data_description.qmd b/_book/data_description.qmd
deleted file mode 100644
index e712980..0000000
--- a/_book/data_description.qmd
+++ /dev/null
@@ -1,45 +0,0 @@
-# Data description {.unnumbered}
-
-## What is 'CSP'?
-The data we will be using throughout this course relates to the Core Spending Power (CSP) of English local authorities between 2015 and 2020. This is a measure of the resources available to local authorities in England to fund service delivery. The CSP is broken down into several components, presented as variables in the data. These components include:
-
-- Settlement Funding Assessment (`sfa`)
-- Compensation for under-indexing the business rates multipler (`under_index`)
-- Income from council tax (`ct_total`)
-- New Homes Bonus (`nhb`)
-- Rural Services Delivery Grant (`rsdg`)
-
-Spending power is given in millions of pounds (£). The data were provided by the UK government's Department for Levelling Up, Housing and Communities. Full guidance on the data can be found on the [Department's website](https://www.gov.uk/government/publications/explanatory-note-on-core-spending-power-provisional-local-government-finance-settlement-2024-to-2025/explanatory-note-on-core-spending-power-provisional-local-government-finance-settlement-2024-to-2025). A brief description of the variables included in the data are given below.
-
-## Descriptions of variables
-
-### Identifier variables
-Each dataset contains a unique identifier code variable, `ons_code`. This is a code given by the Government's Office for National Statistics (ONS), and is used to join different datasets. There is also an `authority` variable which contains the local authority name (to see where each local authority lies on a map, you can visit the [Government's geoportal website](https://geoportal.statistics.gov.uk/documents/cb64eeb1b0a74e5ca277f9fac58500f4/explore)).
-
-### Regions of England
-In addition to each local authority's unique code and name, we are given the region that they lie within. England is separated into 9 regions (shown on [this map](https://geoportal.statistics.gov.uk/documents/bfe6f7099f9e4cc59f8064962959bfb5/explore)) which are given as acronyms in the data. These are:
-
-- `L` = London
-- `NW` = North West
-- `NE` = North East
-- `YH` = Yorkshire and the Humber
-- `WM` = West Midlands
-- `EM` = East Midlands
-- `EE` = East England
-- `SW` = South West
-- `SE` = South East
-
-### Settlement Funding Assessment (SFA)
-The Settlement Funding Assessment (`sfa` in the data) is the baseline funding level of local authorities, and includes the Revenue Support Grant (a central government grant given to local authorities).
-
-### Under-indexing business rate multipliers
-The `under_index` variable is given to compensate local authorities that under-indexed business rate multipliers in previous years (i.e. those that used a measure of inflation that was lower than that should have been used).
-
-### Council tax
-Council tax (`ct_total`) is the income made by each local authority from council tax. In England, the amount of council tax charged to residents is set by each local authority to make up additional revenue needed to cover planned spending.
-
-### New Homes Bonus
-The `nhb` variables is the funding received as part of the New Homes Bonus, a government inncentive to encourage local authorities to promote new housing delevopment.
-
-### Rural Services Delivery Grant 
-The `rsdg` variable is funding received as part of the Rural Services Delivery Grant, provided to rural councils to recognise additional costs in these areas.
diff --git a/_book/exercise_solutions.html b/_book/exercise_solutions.html
new file mode 100644
index 0000000..2d5bf4d
--- /dev/null
+++ b/_book/exercise_solutions.html
@@ -0,0 +1,1165 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
+
+<meta charset="utf-8">
+<meta name="generator" content="quarto-1.4.553">
+
+<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
+
+
+<title>Introduction to R with Tidyverse - Exercise solutions</title>
+<style>
+code{white-space: pre-wrap;}
+span.smallcaps{font-variant: small-caps;}
+div.columns{display: flex; gap: min(4vw, 1.5em);}
+div.column{flex: auto; overflow-x: auto;}
+div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+ul.task-list{list-style: none;}
+ul.task-list li input[type="checkbox"] {
+  width: 0.8em;
+  margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 
+  vertical-align: middle;
+}
+/* CSS for syntax highlighting */
+pre > code.sourceCode { white-space: pre; position: relative; }
+pre > code.sourceCode > span { line-height: 1.25; }
+pre > code.sourceCode > span:empty { height: 1.2em; }
+.sourceCode { overflow: visible; }
+code.sourceCode > span { color: inherit; text-decoration: inherit; }
+div.sourceCode { margin: 1em 0; }
+pre.sourceCode { margin: 0; }
+@media screen {
+div.sourceCode { overflow: auto; }
+}
+@media print {
+pre > code.sourceCode { white-space: pre-wrap; }
+pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
+}
+pre.numberSource code
+  { counter-reset: source-line 0; }
+pre.numberSource code > span
+  { position: relative; left: -4em; counter-increment: source-line; }
+pre.numberSource code > span > a:first-child::before
+  { content: counter(source-line);
+    position: relative; left: -1em; text-align: right; vertical-align: baseline;
+    border: none; display: inline-block;
+    -webkit-touch-callout: none; -webkit-user-select: none;
+    -khtml-user-select: none; -moz-user-select: none;
+    -ms-user-select: none; user-select: none;
+    padding: 0 4px; width: 4em;
+  }
+pre.numberSource { margin-left: 3em;  padding-left: 4px; }
+div.sourceCode
+  {   }
+@media screen {
+pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
+}
+</style>
+
+
+<script src="site_libs/quarto-nav/quarto-nav.js"></script>
+<script src="site_libs/quarto-nav/headroom.min.js"></script>
+<script src="site_libs/clipboard/clipboard.min.js"></script>
+<script src="site_libs/quarto-search/autocomplete.umd.js"></script>
+<script src="site_libs/quarto-search/fuse.min.js"></script>
+<script src="site_libs/quarto-search/quarto-search.js"></script>
+<meta name="quarto:offset" content="./">
+<link href="./data_description.html" rel="prev">
+<script src="site_libs/quarto-html/quarto.js"></script>
+<script src="site_libs/quarto-html/popper.min.js"></script>
+<script src="site_libs/quarto-html/tippy.umd.min.js"></script>
+<script src="site_libs/quarto-html/anchor.min.js"></script>
+<link href="site_libs/quarto-html/tippy.css" rel="stylesheet">
+<link href="site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<script src="site_libs/bootstrap/bootstrap.min.js"></script>
+<link href="site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
+<link href="site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
+<script id="quarto-search-options" type="application/json">{
+  "location": "navbar",
+  "copy-button": false,
+  "collapse-after": 3,
+  "panel-placement": "end",
+  "type": "overlay",
+  "limit": 50,
+  "keyboard-shortcut": [
+    "f",
+    "/",
+    "s"
+  ],
+  "show-item-context": false,
+  "language": {
+    "search-no-results-text": "No results",
+    "search-matching-documents-text": "matching documents",
+    "search-copy-link-title": "Copy link to search",
+    "search-hide-matches-text": "Hide additional matches",
+    "search-more-match-text": "more match in this document",
+    "search-more-matches-text": "more matches in this document",
+    "search-clear-button-title": "Clear",
+    "search-text-placeholder": "",
+    "search-detached-cancel-button-title": "Cancel",
+    "search-submit-button-title": "Submit",
+    "search-label": "Search"
+  }
+}</script>
+
+  <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
+  <script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js" type="text/javascript"></script>
+
+<script type="text/javascript">
+const typesetMath = (el) => {
+  if (window.MathJax) {
+    // MathJax Typeset
+    window.MathJax.typeset([el]);
+  } else if (window.katex) {
+    // KaTeX Render
+    var mathElements = el.getElementsByClassName("math");
+    var macros = [];
+    for (var i = 0; i < mathElements.length; i++) {
+      var texText = mathElements[i].firstChild;
+      if (mathElements[i].tagName == "SPAN") {
+        window.katex.render(texText.data, mathElements[i], {
+          displayMode: mathElements[i].classList.contains('display'),
+          throwOnError: false,
+          macros: macros,
+          fleqn: false
+        });
+      }
+    }
+  }
+}
+window.Quarto = {
+  typesetMath
+};
+</script>
+
+</head>
+
+<body class="nav-sidebar floating nav-fixed">
+
+<div id="quarto-search-results"></div>
+  <header id="quarto-header" class="headroom fixed-top">
+    <nav class="navbar navbar-expand-lg " data-bs-theme="dark">
+      <div class="navbar-container container-fluid">
+      <div class="navbar-brand-container mx-auto">
+    <a class="navbar-brand" href="./index.html">
+    <span class="navbar-title">Introduction to R with Tidyverse</span>
+    </a>
+  </div>
+            <div id="quarto-search" class="" title="Search"></div>
+          <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarCollapse" aria-controls="navbarCollapse" aria-expanded="false" aria-label="Toggle navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
+  <span class="navbar-toggler-icon"></span>
+</button>
+          <div class="collapse navbar-collapse" id="navbarCollapse">
+            <ul class="navbar-nav navbar-nav-scroll ms-auto">
+  <li class="nav-item compact">
+    <a class="nav-link" href="https://github.com/sophie-a-lee/"> <i class="bi bi-github" role="img" aria-label="Github">
+</i> 
+<span class="menu-text"></span></a>
+  </li>  
+  <li class="nav-item compact">
+    <a class="nav-link" href="https://scubed.rbind.io/"> <i class="bi bi-book" role="img" aria-label="Website">
+</i> 
+<span class="menu-text"></span></a>
+  </li>  
+  <li class="nav-item compact">
+    <a class="nav-link" href="https://x.com/SophieStats10"> <i class="bi bi-twitter" role="img" aria-label="Twitter">
+</i> 
+<span class="menu-text"></span></a>
+  </li>  
+  <li class="nav-item compact">
+    <a class="nav-link" href="https://buymeacoffee.com/sophie_a_lee"> <i class="bi bi-cup-hot" role="img" aria-label="Buy me a coffee">
+</i> 
+<span class="menu-text"></span></a>
+  </li>  
+</ul>
+          </div> <!-- /navcollapse -->
+          <div class="quarto-navbar-tools">
+    <a href="./Introduction-to-R-with-Tidyverse.pdf" title="Download PDF" class="quarto-navigation-tool px-1" aria-label="Download PDF"><i class="bi bi-file-pdf"></i></a>
+</div>
+      </div> <!-- /container-fluid -->
+    </nav>
+  <nav class="quarto-secondary-nav">
+    <div class="container-fluid d-flex">
+      <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
+        <i class="bi bi-layout-text-sidebar-reverse"></i>
+      </button>
+        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./data_description.html">Appendices</a></li><li class="breadcrumb-item"><a href="./exercise_solutions.html">Exercise solutions</a></li></ol></nav>
+        <a class="flex-grow-1" role="button" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">      
+        </a>
+    </div>
+  </nav>
+</header>
+<!-- content -->
+<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
+<!-- sidebar -->
+  <nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation floating overflow-auto">
+    <div class="sidebar-menu-container"> 
+    <ul class="list-unstyled mt-1">
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./index.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Welcome!</span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./session1_notes.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction to R and RStudio</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./session2_notes.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Introduction to tidyverse and data wrangling</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./session3_notes.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Data preparation and manipulation</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./session4_notes.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Data visualisation with ggplot2</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./session5_notes.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Reproducible research with RMarkdown</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true">
+ <span class="menu-text">Appendices</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">  
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./data_description.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Data description</span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./exercise_solutions.html" class="sidebar-item-text sidebar-link active">
+ <span class="menu-text">Exercise solutions</span></a>
+  </div>
+</li>
+      </ul>
+  </li>
+    </ul>
+    </div>
+</nav>
+<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
+<!-- margin-sidebar -->
+    <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
+        <nav id="TOC" role="doc-toc" class="toc-active">
+    <h2 id="toc-title">Table of contents</h2>
+   
+  <ul>
+  <li><a href="#exercise-1" id="toc-exercise-1" class="nav-link active" data-scroll-target="#exercise-1">Exercise 1</a></li>
+  <li><a href="#solutions" id="toc-solutions" class="nav-link" data-scroll-target="#solutions">Solutions</a></li>
+  <li><a href="#exercise-2" id="toc-exercise-2" class="nav-link" data-scroll-target="#exercise-2">Exercise 2</a></li>
+  <li><a href="#solutions-1" id="toc-solutions-1" class="nav-link" data-scroll-target="#solutions-1">Solutions</a></li>
+  <li><a href="#exercise-3" id="toc-exercise-3" class="nav-link" data-scroll-target="#exercise-3">Exercise 3</a></li>
+  <li><a href="#solutions-2" id="toc-solutions-2" class="nav-link" data-scroll-target="#solutions-2">Solutions</a></li>
+  <li><a href="#exercise-4" id="toc-exercise-4" class="nav-link" data-scroll-target="#exercise-4">Exercise 4</a></li>
+  <li><a href="#solutions-3" id="toc-solutions-3" class="nav-link" data-scroll-target="#solutions-3">Solutions</a></li>
+  <li><a href="#exercise-5" id="toc-exercise-5" class="nav-link" data-scroll-target="#exercise-5">Exercise 5</a></li>
+  <li><a href="#solutions-4" id="toc-solutions-4" class="nav-link" data-scroll-target="#solutions-4">Solutions</a></li>
+  <li><a href="#exercise-6" id="toc-exercise-6" class="nav-link" data-scroll-target="#exercise-6">Exercise 6</a></li>
+  <li><a href="#solutions-5" id="toc-solutions-5" class="nav-link" data-scroll-target="#solutions-5">Solutions</a></li>
+  <li><a href="#exercise-7" id="toc-exercise-7" class="nav-link" data-scroll-target="#exercise-7">Exercise 7</a></li>
+  <li><a href="#solution" id="toc-solution" class="nav-link" data-scroll-target="#solution">Solution</a></li>
+  <li><a href="#exercise-8" id="toc-exercise-8" class="nav-link" data-scroll-target="#exercise-8">Exercise 8</a></li>
+  <li><a href="#solutions-6" id="toc-solutions-6" class="nav-link" data-scroll-target="#solutions-6">Solutions</a></li>
+  </ul>
+</nav>
+    </div>
+<!-- main -->
+<main class="content" id="quarto-document-content">
+
+<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./data_description.html">Appendices</a></li><li class="breadcrumb-item"><a href="./exercise_solutions.html">Exercise solutions</a></li></ol></nav>
+<div class="quarto-title">
+<h1 class="title">Exercise solutions</h1>
+</div>
+
+
+
+<div class="quarto-title-meta">
+
+    
+  
+    
+  </div>
+  
+
+
+</header>
+
+
+<section id="exercise-1" class="level2">
+<h2 class="anchored" data-anchor-id="exercise-1">Exercise 1</h2>
+<ol type="1">
+<li>Open a new script file if you have not already done so.</li>
+<li>Save this script file into an appropriate location.</li>
+</ol>
+</section>
+<section id="solutions" class="level2">
+<h2 class="anchored" data-anchor-id="solutions">Solutions</h2>
+<ol type="1">
+<li>To open a new R script, click the <img src="images/open_shortcut.png" class="img-fluid" alt="new file icon"> icon and select ‘R script’.</li>
+<li>Save this file by following <strong>File -&gt; Save as…</strong> from the drop-down menu, selecting the folder you are working from in this course, and giving the file an appropriate name.</li>
+</ol>
+</section>
+<section id="exercise-2" class="level2">
+<h2 class="anchored" data-anchor-id="exercise-2">Exercise 2</h2>
+<ol type="1">
+<li>Add your name and the date to the top of your script file (hint: comment this out so R does not try to run it)</li>
+<li>Use R to calculate the following calculations. Add the result to the same line of the script file in a way that ensures there are no errors in the code.</li>
+</ol>
+<ol type="a">
+<li><span class="math inline">\(64^2\)</span></li>
+<li><span class="math inline">\(3432 \div 8\)</span></li>
+<li><span class="math inline">\(96 \times 72\)</span></li>
+</ol>
+<p>When you have finished this exercise, select the entire script file (using <code>Ctrl + a</code> on windows or <code>Command + a</code> on Mac) and run it to ensure there are no errors in the code.</p>
+</section>
+<section id="solutions-1" class="level2">
+<h2 class="anchored" data-anchor-id="solutions-1">Solutions</h2>
+<ol type="1">
+<li><p>Add a <code>#</code> symbol to the script file before printing your name and the date,</p></li>
+<li><p>After running the calculation, copy and paste the result after a <code>#</code> symbol to ensure there are no errors:</p></li>
+</ol>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="dv">64</span> <span class="sc">^</span> <span class="dv">2</span> <span class="co"># 4096</span></span>
+<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="do">## [1] 4096</span></span>
+<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="dv">3432</span> <span class="sc">/</span> <span class="dv">8</span> <span class="co"># 429</span></span>
+<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a><span class="do">## [1] 429</span></span>
+<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a><span class="dv">96</span> <span class="sc">*</span> <span class="dv">72</span> <span class="co"># 6912</span></span>
+<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a><span class="do">## [1] 6912</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</section>
+<section id="exercise-3" class="level2">
+<h2 class="anchored" data-anchor-id="exercise-3">Exercise 3</h2>
+<ol type="1">
+<li><p>How many local authorities were included in the London region?</p></li>
+<li><p>Give three different ways that it would be possible to select all spend variables (sfa_2020, nhb_2020, etc.) from the CSP_2020 dataset.</p></li>
+<li><p>Create a new tibble, <code>em_2020</code>, that just includes local authorities from the East Midlands (EM) region.</p></li>
+</ol>
+<ol type="a">
+<li>How many local authorities in the East Midlands had an SFA of between £5 and 10 million?</li>
+<li>Create a new variable with the total overall spend in 2020 for local authorities in the East Midlands.</li>
+</ol>
+</section>
+<section id="solutions-2" class="level2">
+<h2 class="anchored" data-anchor-id="solutions-2">Solutions</h2>
+<ol type="1">
+<li>First <code>filter</code> the data to return only rows which belong to the London region, then count the number of rows in this subgroup:</li>
+</ol>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>csp_2020 <span class="sc">%&gt;%</span> </span>
+<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(region <span class="sc">==</span> <span class="st">"L"</span>) <span class="sc">%&gt;%</span> </span>
+<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">count</span>()</span>
+<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a><span class="do">## # A tibble: 1 × 1</span></span>
+<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a><span class="do">##       n</span></span>
+<span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a><span class="do">##   &lt;int&gt;</span></span>
+<span id="cb2-7"><a href="#cb2-7" aria-hidden="true" tabindex="-1"></a><span class="do">## 1    34</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<ol start="2" type="1">
+<li>There are many different ways to select variables from a dataset. For a list of selection helpers, check the helpfile <code>?tidyr_tidy_select</code>. Some example include:</li>
+</ol>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Using the : symbol to return consecutive columns</span></span>
+<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a><span class="co"># By variable name:</span></span>
+<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a><span class="fu">select</span>(csp_2020, sfa_2020<span class="sc">:</span>rsdg_2020)</span>
+<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a><span class="co"># Or by column number:</span></span>
+<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a><span class="fu">select</span>(csp_2020, <span class="dv">4</span><span class="sc">:</span><span class="dv">9</span>)</span>
+<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb3-9"><a href="#cb3-9" aria-hidden="true" tabindex="-1"></a><span class="co"># Returning all variables with names ending "_2020"</span></span>
+<span id="cb3-10"><a href="#cb3-10" aria-hidden="true" tabindex="-1"></a><span class="fu">select</span>(csp_2020, <span class="fu">ends_with</span>(<span class="st">"_2020"</span>))</span>
+<span id="cb3-11"><a href="#cb3-11" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb3-12"><a href="#cb3-12" aria-hidden="true" tabindex="-1"></a><span class="co"># Return all numeric variables</span></span>
+<span id="cb3-13"><a href="#cb3-13" aria-hidden="true" tabindex="-1"></a><span class="fu">select</span>(csp_2020, <span class="fu">where</span>(is.numeric))</span>
+<span id="cb3-14"><a href="#cb3-14" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb3-15"><a href="#cb3-15" aria-hidden="true" tabindex="-1"></a><span class="co"># Return all variables that are not character </span></span>
+<span id="cb3-16"><a href="#cb3-16" aria-hidden="true" tabindex="-1"></a><span class="fu">select</span>(csp_2020, <span class="fu">where</span>(<span class="sc">!</span>is.character))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<ol start="3" type="1">
+<li>To create a new tibble, use <code>filter</code> to select the subgroup where region is “EM”, and attach as an object using the <code>&lt;-</code> symbol</li>
+</ol>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>em_2020 <span class="ot">&lt;-</span> <span class="fu">filter</span>(csp_2020, region <span class="sc">==</span> <span class="st">"EM"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<ol type="a">
+<li>Use <code>filter</code> to select the subgroup and then <code>count</code> the number of rows:</li>
+</ol>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>em_2020 <span class="sc">%&gt;%</span> </span>
+<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(<span class="fu">between</span>(sfa_2020, <span class="dv">5</span>, <span class="dv">10</span>)) <span class="sc">%&gt;%</span> </span>
+<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">count</span>()</span>
+<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a><span class="do">## # A tibble: 1 × 1</span></span>
+<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a><span class="do">##       n</span></span>
+<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a><span class="do">##   &lt;int&gt;</span></span>
+<span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a><span class="do">## 1     3</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<ol start="2" type="a">
+<li>Use the <code>mutate</code> function to create a new variable from the existing ones. <strong>Hint:</strong> If you are not sure of the variable names in the data, use the <code>names</code> function and copy them from the console:</li>
+</ol>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="fu">names</span>(em_2020)</span>
+<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a><span class="do">## [1] "ons_code"         "authority"        "region"           "sfa_2020"        </span></span>
+<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a><span class="do">## [5] "under_index_2020" "ct_total_2020"    "nhb_2020"         "nhb_return_2020" </span></span>
+<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a><span class="do">## [9] "rsdg_2020"</span></span>
+<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a>em_2020 <span class="ot">&lt;-</span> em_2020 <span class="sc">%&gt;%</span> </span>
+<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">total_spend =</span> sfa_2020 <span class="sc">+</span> under_index_2020 <span class="sc">+</span> ct_total_2020 <span class="sc">+</span> </span>
+<span id="cb6-9"><a href="#cb6-9" aria-hidden="true" tabindex="-1"></a>           nhb_2020 <span class="sc">+</span> nhb_return_2020 <span class="sc">+</span> rsdg_2020)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</section>
+<section id="exercise-4" class="level2">
+<h2 class="anchored" data-anchor-id="exercise-4">Exercise 4</h2>
+<ol type="1">
+<li>Create a data frame with the minimum, maximum and median total spend per year for each region.</li>
+<li>Produce a frequency table containing the number and percentage of local authorities in each region.</li>
+<li>Convert the data object <code>csp_long2</code> back into wide format, with one row per local authority and one variable per total spend per year (<strong>HINT:</strong> start by selecting only the variables you need from the long data frame). Use the help file <code>?pivot_wider</code> and <code>vignette("pivot")</code> for more hints.</li>
+<li>Using your new wide data frame, calculate the difference in total spending for each local authority between 2015 and 2020. How many local authorities have had an overall reduction in spending since 2015?</li>
+</ol>
+</section>
+<section id="solutions-3" class="level2">
+<h2 class="anchored" data-anchor-id="solutions-3">Solutions</h2>
+<ol type="1">
+<li>Use the <code>summarise</code> function after grouping by the <code>year</code> and <code>region</code> variables:</li>
+</ol>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>csp_long2 <span class="sc">%&gt;%</span> </span>
+<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(region, year) <span class="sc">%&gt;%</span> </span>
+<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarise</span>(<span class="at">min_spend =</span> <span class="fu">min</span>(total_spend),</span>
+<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a>            <span class="at">max_spend =</span> <span class="fu">max</span>(total_spend),</span>
+<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a>            <span class="at">median_spend =</span> <span class="fu">median</span>(total_spend)) <span class="sc">%&gt;%</span> </span>
+<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>()</span>
+<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a><span class="do">## # A tibble: 54 × 5</span></span>
+<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a><span class="do">##    region  year min_spend max_spend median_spend</span></span>
+<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a><span class="do">##    &lt;chr&gt;  &lt;dbl&gt;     &lt;dbl&gt;     &lt;dbl&gt;        &lt;dbl&gt;</span></span>
+<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a><span class="do">##  1 EE      2015         0      883.         15.9</span></span>
+<span id="cb7-11"><a href="#cb7-11" aria-hidden="true" tabindex="-1"></a><span class="do">##  2 EE      2016         0      860.         16.2</span></span>
+<span id="cb7-12"><a href="#cb7-12" aria-hidden="true" tabindex="-1"></a><span class="do">##  3 EE      2017         0      845.         15.0</span></span>
+<span id="cb7-13"><a href="#cb7-13" aria-hidden="true" tabindex="-1"></a><span class="do">##  4 EE      2018         0      860.         14.4</span></span>
+<span id="cb7-14"><a href="#cb7-14" aria-hidden="true" tabindex="-1"></a><span class="do">##  5 EE      2019         0      874.         14.7</span></span>
+<span id="cb7-15"><a href="#cb7-15" aria-hidden="true" tabindex="-1"></a><span class="do">##  6 EE      2020         0      915.         15.2</span></span>
+<span id="cb7-16"><a href="#cb7-16" aria-hidden="true" tabindex="-1"></a><span class="do">##  7 EM      2015         0      492.         12.4</span></span>
+<span id="cb7-17"><a href="#cb7-17" aria-hidden="true" tabindex="-1"></a><span class="do">##  8 EM      2016         0      479.         12.0</span></span>
+<span id="cb7-18"><a href="#cb7-18" aria-hidden="true" tabindex="-1"></a><span class="do">##  9 EM      2017         0      475.         11.1</span></span>
+<span id="cb7-19"><a href="#cb7-19" aria-hidden="true" tabindex="-1"></a><span class="do">## 10 EM      2018         0      483.         11.0</span></span>
+<span id="cb7-20"><a href="#cb7-20" aria-hidden="true" tabindex="-1"></a><span class="do">## # ℹ 44 more rows</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<ol start="2" type="1">
+<li>To calculate the percentage of local authorities in each region, we need the total number in each region and the overall number of local authorities:</li>
+</ol>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Use the csp_2020 data as only require one row per local authority</span></span>
+<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>csp_2020 <span class="sc">%&gt;%</span> </span>
+<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>  <span class="co"># Begin by calculating number of local authorities per region</span></span>
+<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(region) <span class="sc">%&gt;%</span> </span>
+<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a>  <span class="co"># Count number of rows in each group</span></span>
+<span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarise</span>(<span class="at">n_la_region =</span> <span class="fu">n</span>()) <span class="sc">%&gt;%</span> </span>
+<span id="cb8-7"><a href="#cb8-7" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span> </span>
+<span id="cb8-8"><a href="#cb8-8" aria-hidden="true" tabindex="-1"></a>  <span class="co"># Create a new variable with the total number of local authorities (the sum)</span></span>
+<span id="cb8-9"><a href="#cb8-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">n_la_overall =</span> <span class="fu">sum</span>(n_la_region),</span>
+<span id="cb8-10"><a href="#cb8-10" aria-hidden="true" tabindex="-1"></a>         <span class="co"># Calculate the percentage (and round to make easier to read)</span></span>
+<span id="cb8-11"><a href="#cb8-11" aria-hidden="true" tabindex="-1"></a>         <span class="at">perc_la_region =</span> <span class="fu">round</span>((n_la_region <span class="sc">/</span> n_la_overall) <span class="sc">*</span> <span class="dv">100</span>, <span class="dv">2</span>)) <span class="sc">%&gt;%</span> </span>
+<span id="cb8-12"><a href="#cb8-12" aria-hidden="true" tabindex="-1"></a>  <span class="co"># Remove the total local authority column</span></span>
+<span id="cb8-13"><a href="#cb8-13" aria-hidden="true" tabindex="-1"></a>  <span class="fu">select</span>(<span class="sc">-</span>n_la_overall)</span>
+<span id="cb8-14"><a href="#cb8-14" aria-hidden="true" tabindex="-1"></a><span class="do">## # A tibble: 9 × 3</span></span>
+<span id="cb8-15"><a href="#cb8-15" aria-hidden="true" tabindex="-1"></a><span class="do">##   region n_la_region perc_la_region</span></span>
+<span id="cb8-16"><a href="#cb8-16" aria-hidden="true" tabindex="-1"></a><span class="do">##   &lt;chr&gt;        &lt;int&gt;          &lt;dbl&gt;</span></span>
+<span id="cb8-17"><a href="#cb8-17" aria-hidden="true" tabindex="-1"></a><span class="do">## 1 EE              57          14.4 </span></span>
+<span id="cb8-18"><a href="#cb8-18" aria-hidden="true" tabindex="-1"></a><span class="do">## 2 EM              51          12.9 </span></span>
+<span id="cb8-19"><a href="#cb8-19" aria-hidden="true" tabindex="-1"></a><span class="do">## 3 L               34           8.59</span></span>
+<span id="cb8-20"><a href="#cb8-20" aria-hidden="true" tabindex="-1"></a><span class="do">## 4 NE              15           3.79</span></span>
+<span id="cb8-21"><a href="#cb8-21" aria-hidden="true" tabindex="-1"></a><span class="do">## 5 NW              46          11.6 </span></span>
+<span id="cb8-22"><a href="#cb8-22" aria-hidden="true" tabindex="-1"></a><span class="do">## 6 SE              81          20.4 </span></span>
+<span id="cb8-23"><a href="#cb8-23" aria-hidden="true" tabindex="-1"></a><span class="do">## 7 SW              47          11.9 </span></span>
+<span id="cb8-24"><a href="#cb8-24" aria-hidden="true" tabindex="-1"></a><span class="do">## 8 WM              38           9.6 </span></span>
+<span id="cb8-25"><a href="#cb8-25" aria-hidden="true" tabindex="-1"></a><span class="do">## 9 YH              27           6.82</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<ol start="3" type="1">
+<li>Use the <code>pivot_wider</code> function, use the year to set the new variable names suffix (<code>names_from =</code>), add a prefix to avoid variable names beginning with a number (<code>names_prefix =</code>), and take the <code>values_from</code> the current <code>total_spend</code> column:</li>
+</ol>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>csp_total_wide <span class="ot">&lt;-</span> csp_long2 <span class="sc">%&gt;%</span> </span>
+<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>  <span class="co"># Select variables to keep</span></span>
+<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">select</span>(ons_code<span class="sc">:</span>year, total_spend) <span class="sc">%&gt;%</span> </span>
+<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">pivot_wider</span>(<span class="at">names_from =</span> year,</span>
+<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a>              <span class="at">names_prefix =</span> <span class="st">"total_spend_"</span>,</span>
+<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a>              <span class="at">values_from =</span> total_spend)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<ol start="4" type="1">
+<li>Begin by using <code>mutate</code> to create a variable with the difference between total spend 2015 - 2020. Use <code>filter</code> to return rows where there is a reduction in spend, <code>count</code> the number of rows:</li>
+</ol>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb10"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>csp_total_wide <span class="sc">%&gt;%</span> </span>
+<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">total_diff =</span> total_spend_2020 <span class="sc">-</span> total_spend_2015) <span class="sc">%&gt;%</span> </span>
+<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(total_diff <span class="sc">&lt;</span> <span class="dv">0</span>) <span class="sc">%&gt;%</span> </span>
+<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">count</span>()</span>
+<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a><span class="do">## # A tibble: 1 × 1</span></span>
+<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a><span class="do">##       n</span></span>
+<span id="cb10-7"><a href="#cb10-7" aria-hidden="true" tabindex="-1"></a><span class="do">##   &lt;int&gt;</span></span>
+<span id="cb10-8"><a href="#cb10-8" aria-hidden="true" tabindex="-1"></a><span class="do">## 1   234</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</section>
+<section id="exercise-5" class="level2">
+<h2 class="anchored" data-anchor-id="exercise-5">Exercise 5</h2>
+<ol type="1">
+<li>Create a new data object containing the 2020 CSP data without the Greater London Authority observation. Name this data frame <code>csp_nolon_2020</code>.</li>
+<li>Using the <code>csp_nolon_2020</code> data, create a data visualisation to check the distribution (or shape) of the SFA variable.</li>
+<li>Based on the visualisation above, create a summary table for the SFA variable containing the minimum and maximum, and appropriate measures of the centre/average and spread.</li>
+</ol>
+</section>
+<section id="solutions-4" class="level2">
+<h2 class="anchored" data-anchor-id="solutions-4">Solutions</h2>
+<ol type="1">
+<li>Create a new object using the <code>&lt;-</code> symbol, use <code>filter</code> to remove the duplicate row:</li>
+</ol>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>csp_nolon_2020 <span class="ot">&lt;-</span> <span class="fu">filter</span>(csp_2020, authority <span class="sc">!=</span> <span class="st">"Greater London Authority"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<ol start="2" type="1">
+<li>Histograms are used to check the distribution of numeric variables:</li>
+</ol>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb12"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(<span class="at">data =</span> csp_nolon_2020) <span class="sc">+</span></span>
+<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_histogram</span>(<span class="fu">aes</span>(<span class="at">x =</span> sfa_2020))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output-display">
+<div>
+<figure class="figure">
+<p><img src="exercise_solutions_files/figure-html/sfa histogram-1.png" class="img-fluid figure-img" width="672"></p>
+</figure>
+</div>
+</div>
+</div>
+<ol start="3" type="1">
+<li>The histogram shows that the <code>sfa_2020</code> variable is very skewed, therefore the <code>median</code> and <code>IQR</code> are the most appropriate measures of centre and spread:</li>
+</ol>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="fu">summarise</span>(csp_nolon_2020,</span>
+<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>          <span class="at">min_sfa =</span> <span class="fu">min</span>(sfa_2020),</span>
+<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>          <span class="at">max_sfa =</span> <span class="fu">max</span>(sfa_2020),</span>
+<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a>          <span class="at">median_sfa =</span> <span class="fu">median</span>(sfa_2020),</span>
+<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a>          <span class="at">iqr_sfa =</span> <span class="fu">IQR</span>(sfa_2020))</span>
+<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a><span class="do">## # A tibble: 1 × 4</span></span>
+<span id="cb13-7"><a href="#cb13-7" aria-hidden="true" tabindex="-1"></a><span class="do">##   min_sfa max_sfa median_sfa iqr_sfa</span></span>
+<span id="cb13-8"><a href="#cb13-8" aria-hidden="true" tabindex="-1"></a><span class="do">##     &lt;dbl&gt;   &lt;dbl&gt;      &lt;dbl&gt;   &lt;dbl&gt;</span></span>
+<span id="cb13-9"><a href="#cb13-9" aria-hidden="true" tabindex="-1"></a><span class="do">## 1       0    470.       4.62    54.7</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</section>
+<section id="exercise-6" class="level2">
+<h2 class="anchored" data-anchor-id="exercise-6">Exercise 6</h2>
+<ol type="1">
+<li>What is the problem with the following code? Fix the code to change the shape of all the points.</li>
+</ol>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb14"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(csp_nolon_2020) <span class="sc">+</span> </span>
+<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>(<span class="fu">aes</span>(<span class="at">x =</span> sfa_2020, <span class="at">y =</span> ct_total_2020, <span class="at">shape =</span> <span class="st">"*"</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<ol start="2" type="1">
+<li><p>Add a line of best fit to the scatterplot showing the relationship between SFA and council tax total (hint: use <code>?geom_smooth</code>).</p></li>
+<li><p>Add a line of best fit for each region (hint: make each line a different colour).</p></li>
+</ol>
+</section>
+<section id="solutions-5" class="level2">
+<h2 class="anchored" data-anchor-id="solutions-5">Solutions</h2>
+<ol type="1">
+<li>Only aesthetics determined by variables in the data should lie inside the <code>aes</code> function, the <code>shape</code> argument should be outside of this:</li>
+</ol>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb15"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(csp_nolon_2020) <span class="sc">+</span> </span>
+<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>(<span class="fu">aes</span>(<span class="at">x =</span> sfa_2020, <span class="at">y =</span> ct_total_2020), <span class="at">shape =</span> <span class="st">"*"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output-display">
+<div>
+<figure class="figure">
+<p><img src="exercise_solutions_files/figure-html/aesthetic solution-1.png" class="img-fluid figure-img" width="672"></p>
+</figure>
+</div>
+</div>
+</div>
+<ol start="2" type="1">
+<li>The function <code>geom_smooth</code> adds a line of best fit, make sure to set <code>method = lm</code> to fit a linear trend:</li>
+</ol>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb16"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(<span class="at">data =</span> csp_nolon_2020, <span class="fu">aes</span>(<span class="at">x =</span> ct_total_2020, <span class="at">y =</span> sfa_2020)) <span class="sc">+</span></span>
+<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>() <span class="sc">+</span></span>
+<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_smooth</span>(<span class="at">method =</span> <span class="st">"lm"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output-display">
+<div>
+<figure class="figure">
+<p><img src="exercise_solutions_files/figure-html/scatter with line of best fit-1.png" class="img-fluid figure-img" width="672"></p>
+</figure>
+</div>
+</div>
+</div>
+<p><strong>Hint:</strong> To reduce repetitive coding, setting <code>aes</code> in the <code>ggplot</code> function applies these to the entire object.</p>
+<ol start="3" type="1">
+<li>A line of best fit for each group simply requires adding this to the <code>aes</code> function as a colour:</li>
+</ol>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb17"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(<span class="at">data =</span> csp_nolon_2020, </span>
+<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>       <span class="fu">aes</span>(<span class="at">x =</span> ct_total_2020, <span class="at">y =</span> sfa_2020, <span class="at">colour =</span> region)) <span class="sc">+</span></span>
+<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>() <span class="sc">+</span></span>
+<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_smooth</span>(<span class="at">method =</span> <span class="st">"lm"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output-display">
+<div>
+<figure class="figure">
+<p><img src="exercise_solutions_files/figure-html/scatter with line of best fit by region-1.png" class="img-fluid figure-img" width="672"></p>
+</figure>
+</div>
+</div>
+</div>
+</section>
+<section id="exercise-7" class="level2">
+<h2 class="anchored" data-anchor-id="exercise-7">Exercise 7</h2>
+<p>Use an appropriate data visualisation to show how the total spend in each local authority has changed over the years between 2015 and 2020. Choose a visualisation that shows these trends over time and allows us to compare them between regions.</p>
+</section>
+<section id="solution" class="level2">
+<h2 class="anchored" data-anchor-id="solution">Solution</h2>
+<p>The most appropriate plot to show a change in variable over time is a line graph (with year on the x-axis and spend on the y-axis). To compare these between regions, we could set the colour of these lines, but as there are so many local authorities, this would overload the graph and make it hard to compare. As an alternative, we can facet this graph by region to show the line graphs on the same scale on the same output.</p>
+<p>Be sure to set appropriate axis labels, font sizes, etc.</p>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb18"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Remove the Greater London Authority duplicate</span></span>
+<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>csp_long2 <span class="sc">%&gt;%</span> </span>
+<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(authority <span class="sc">!=</span> <span class="st">"Greater London Authority"</span>) <span class="sc">%&gt;%</span> </span>
+<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ggplot</span>() <span class="sc">+</span> </span>
+<span id="cb18-5"><a href="#cb18-5" aria-hidden="true" tabindex="-1"></a>  <span class="co"># Need to add a group to know what each line represents</span></span>
+<span id="cb18-6"><a href="#cb18-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_line</span>(<span class="fu">aes</span>(<span class="at">x =</span> year, <span class="at">y =</span> total_spend, <span class="at">group =</span> ons_code,</span>
+<span id="cb18-7"><a href="#cb18-7" aria-hidden="true" tabindex="-1"></a>                <span class="co"># OPTIONAL: colour by region to make it prettier!</span></span>
+<span id="cb18-8"><a href="#cb18-8" aria-hidden="true" tabindex="-1"></a>                <span class="at">colour =</span> region)) <span class="sc">+</span></span>
+<span id="cb18-9"><a href="#cb18-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">facet_wrap</span>( <span class="sc">~</span> region) <span class="sc">+</span> </span>
+<span id="cb18-10"><a href="#cb18-10" aria-hidden="true" tabindex="-1"></a>  <span class="fu">labs</span>(<span class="at">x =</span> <span class="st">"Year"</span>, <span class="at">y =</span> <span class="st">"Total core spending power (millions)"</span>) <span class="sc">+</span></span>
+<span id="cb18-11"><a href="#cb18-11" aria-hidden="true" tabindex="-1"></a>  <span class="co"># Add theme_light to make the background a nicer colour</span></span>
+<span id="cb18-12"><a href="#cb18-12" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme_light</span>() <span class="sc">+</span> </span>
+<span id="cb18-13"><a href="#cb18-13" aria-hidden="true" tabindex="-1"></a>  <span class="co"># Rotate the x-axis labels to avoid overlap</span></span>
+<span id="cb18-14"><a href="#cb18-14" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme</span>(<span class="at">axis.text.x.bottom =</span> <span class="fu">element_text</span>(<span class="at">angle =</span> <span class="dv">45</span>),</span>
+<span id="cb18-15"><a href="#cb18-15" aria-hidden="true" tabindex="-1"></a>        <span class="co"># Remove the legend (not needed, we have labels on the facets)</span></span>
+<span id="cb18-16"><a href="#cb18-16" aria-hidden="true" tabindex="-1"></a>        <span class="at">legend.position =</span> <span class="st">"none"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output-display">
+<div>
+<figure class="figure">
+<p><img src="exercise_solutions_files/figure-html/region total spend line graph-1.png" class="img-fluid figure-img" width="672"></p>
+</figure>
+</div>
+</div>
+</div>
+</section>
+<section id="exercise-8" class="level2">
+<h2 class="anchored" data-anchor-id="exercise-8">Exercise 8</h2>
+<p>Create an RMarkdown file that creates a html report describing the trends in core spending power in English local authorities between 2015 and 2020. Your report should include:</p>
+<ul>
+<li>A summary table of the total spending per year per region</li>
+<li>A suitable visualisation showing how the total annual spending has changed over this period, compared between regions</li>
+<li>A short interpretation of the table and visualisation</li>
+</ul>
+<p><strong>Note:</strong> You are not expected to be an expert in this data! Interpret these outputs as you would any other numeric variable measured over time.</p>
+</section>
+<section id="solutions-6" class="level2">
+<h2 class="anchored" data-anchor-id="solutions-6">Solutions</h2>
+<p>There are many different correct solutions to this exercise. All RMarkdown files should begin with a YAML header similar to the one below:</p>
+<div class="sourceCode" id="cb19"><pre class="sourceCode default code-with-copy"><code class="sourceCode default"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>---</span>
+<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>title: "Core spending power in English local authorities, 2015 - 2020"</span>
+<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a>author: Sophie Lee</span>
+<span id="cb19-4"><a href="#cb19-4" aria-hidden="true" tabindex="-1"></a>output: html_document</span>
+<span id="cb19-5"><a href="#cb19-5" aria-hidden="true" tabindex="-1"></a>---  </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Next, you may have a code chunk that sets up the global chunk options, loads any packages you needed, and loads the data that we will be using for the report. For example:</p>
+<div class="sourceCode" id="cb20"><pre class="sourceCode default code-with-copy"><code class="sourceCode default"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a>```{r setup, include = FALSE}</span>
+<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a># Set global chunk options to not show R code or messages</span>
+<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a>knitr::opts_chunk$set(echo = FALSE, message = FALSE)</span>
+<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a># Load the tidyverse package</span>
+<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a>library(tidyverse)</span>
+<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb20-8"><a href="#cb20-8" aria-hidden="true" tabindex="-1"></a># Load the long dataset</span>
+<span id="cb20-9"><a href="#cb20-9" aria-hidden="true" tabindex="-1"></a>csp_long2 &lt;- read_csv("data/CSP_long_201520.csv")</span>
+<span id="cb20-10"><a href="#cb20-10" aria-hidden="true" tabindex="-1"></a>```</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>You may have began with an introduction using RMarkdown syntax:</p>
+<div class="sourceCode" id="cb21"><pre class="sourceCode default code-with-copy"><code class="sourceCode default"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a># Introduction</span>
+<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a>The following report will investigate the trends in core spending power </span>
+<span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a>across England between 2015 and 2020. All values are give in millions </span>
+<span id="cb21-4"><a href="#cb21-4" aria-hidden="true" tabindex="-1"></a>of pounds. </span>
+<span id="cb21-5"><a href="#cb21-5" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-6"><a href="#cb21-6" aria-hidden="true" tabindex="-1"></a>The core spending power was made up of the following provisions:</span>
+<span id="cb21-7"><a href="#cb21-7" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-8"><a href="#cb21-8" aria-hidden="true" tabindex="-1"></a>- Settlement funding assessment (SFA)</span>
+<span id="cb21-9"><a href="#cb21-9" aria-hidden="true" tabindex="-1"></a>- Compensation for under-indexing the business rates multipliers</span>
+<span id="cb21-10"><a href="#cb21-10" aria-hidden="true" tabindex="-1"></a>- council tax </span>
+<span id="cb21-11"><a href="#cb21-11" aria-hidden="true" tabindex="-1"></a>- New homes bonus</span>
+<span id="cb21-12"><a href="#cb21-12" aria-hidden="true" tabindex="-1"></a>- New homes bonus returned funding</span>
+<span id="cb21-13"><a href="#cb21-13" aria-hidden="true" tabindex="-1"></a>- Rural Services Delivery Grant (RSDG)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Followed by a summary table, created using <code>summarise</code> and displayed using <code>kable</code>:</p>
+<div class="sourceCode" id="cb22"><pre class="sourceCode default code-with-copy"><code class="sourceCode default"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a># Total core spending power by region</span>
+<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>Below is a summary table containing the total core spending power per year </span>
+<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a>per region, given in millions of £:</span>
+<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a>```{r csp total summary table}</span>
+<span id="cb22-6"><a href="#cb22-6" aria-hidden="true" tabindex="-1"></a>csp_long2 %&gt;% </span>
+<span id="cb22-7"><a href="#cb22-7" aria-hidden="true" tabindex="-1"></a>  group_by(region, year) %&gt;% </span>
+<span id="cb22-8"><a href="#cb22-8" aria-hidden="true" tabindex="-1"></a>  summarise(min_spend = min(total_spend),</span>
+<span id="cb22-9"><a href="#cb22-9" aria-hidden="true" tabindex="-1"></a>            max_spend = max(total_spend),</span>
+<span id="cb22-10"><a href="#cb22-10" aria-hidden="true" tabindex="-1"></a>            median_spend = median(total_spend),</span>
+<span id="cb22-11"><a href="#cb22-11" aria-hidden="true" tabindex="-1"></a>            iqr_spend = IQR(total_spend)) %&gt;% </span>
+<span id="cb22-12"><a href="#cb22-12" aria-hidden="true" tabindex="-1"></a>  ungroup() %&gt;% </span>
+<span id="cb22-13"><a href="#cb22-13" aria-hidden="true" tabindex="-1"></a>  knitr::kable(.,</span>
+<span id="cb22-14"><a href="#cb22-14" aria-hidden="true" tabindex="-1"></a>               col.names = c("Region", "Year", "Minimum", </span>
+<span id="cb22-15"><a href="#cb22-15" aria-hidden="true" tabindex="-1"></a>                             "Maximum", "Median", "IQR"))</span>
+<span id="cb22-16"><a href="#cb22-16" aria-hidden="true" tabindex="-1"></a>```</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Then an additional code chunk producing a faceted line chart, similar to the one in Exercise 7:</p>
+<div class="sourceCode" id="cb23"><pre class="sourceCode default code-with-copy"><code class="sourceCode default"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a>```{r}</span>
+<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>csp_long2 %&gt;% </span>
+<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a>  filter(authority != "Greater London Authority") %&gt;% </span>
+<span id="cb23-4"><a href="#cb23-4" aria-hidden="true" tabindex="-1"></a>  ggplot() + </span>
+<span id="cb23-5"><a href="#cb23-5" aria-hidden="true" tabindex="-1"></a>  geom_line(aes(x = year, y = total_spend, group = ons_code,</span>
+<span id="cb23-6"><a href="#cb23-6" aria-hidden="true" tabindex="-1"></a>                colour = region)) +</span>
+<span id="cb23-7"><a href="#cb23-7" aria-hidden="true" tabindex="-1"></a>  facet_wrap( ~ region) + </span>
+<span id="cb23-8"><a href="#cb23-8" aria-hidden="true" tabindex="-1"></a>  labs(x = "Year", y = "Total core spending power (millions)") +</span>
+<span id="cb23-9"><a href="#cb23-9" aria-hidden="true" tabindex="-1"></a>  theme_light() + </span>
+<span id="cb23-10"><a href="#cb23-10" aria-hidden="true" tabindex="-1"></a>  theme(axis.text.x.bottom = element_text(angle = 45),</span>
+<span id="cb23-11"><a href="#cb23-11" aria-hidden="true" tabindex="-1"></a>        legend.position = "none")</span>
+<span id="cb23-12"><a href="#cb23-12" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb23-13"><a href="#cb23-13" aria-hidden="true" tabindex="-1"></a>```</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+
+
+</section>
+
+</main> <!-- /main -->
+<script id="quarto-html-after-body" type="application/javascript">
+window.document.addEventListener("DOMContentLoaded", function (event) {
+  const toggleBodyColorMode = (bsSheetEl) => {
+    const mode = bsSheetEl.getAttribute("data-mode");
+    const bodyEl = window.document.querySelector("body");
+    if (mode === "dark") {
+      bodyEl.classList.add("quarto-dark");
+      bodyEl.classList.remove("quarto-light");
+    } else {
+      bodyEl.classList.add("quarto-light");
+      bodyEl.classList.remove("quarto-dark");
+    }
+  }
+  const toggleBodyColorPrimary = () => {
+    const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
+    if (bsSheetEl) {
+      toggleBodyColorMode(bsSheetEl);
+    }
+  }
+  toggleBodyColorPrimary();  
+  const icon = "";
+  const anchorJS = new window.AnchorJS();
+  anchorJS.options = {
+    placement: 'right',
+    icon: icon
+  };
+  anchorJS.add('.anchored');
+  const isCodeAnnotation = (el) => {
+    for (const clz of el.classList) {
+      if (clz.startsWith('code-annotation-')) {                     
+        return true;
+      }
+    }
+    return false;
+  }
+  const clipboard = new window.ClipboardJS('.code-copy-button', {
+    text: function(trigger) {
+      const codeEl = trigger.previousElementSibling.cloneNode(true);
+      for (const childEl of codeEl.children) {
+        if (isCodeAnnotation(childEl)) {
+          childEl.remove();
+        }
+      }
+      return codeEl.innerText;
+    }
+  });
+  clipboard.on('success', function(e) {
+    // button target
+    const button = e.trigger;
+    // don't keep focus
+    button.blur();
+    // flash "checked"
+    button.classList.add('code-copy-button-checked');
+    var currentTitle = button.getAttribute("title");
+    button.setAttribute("title", "Copied!");
+    let tooltip;
+    if (window.bootstrap) {
+      button.setAttribute("data-bs-toggle", "tooltip");
+      button.setAttribute("data-bs-placement", "left");
+      button.setAttribute("data-bs-title", "Copied!");
+      tooltip = new bootstrap.Tooltip(button, 
+        { trigger: "manual", 
+          customClass: "code-copy-button-tooltip",
+          offset: [0, -8]});
+      tooltip.show();    
+    }
+    setTimeout(function() {
+      if (tooltip) {
+        tooltip.hide();
+        button.removeAttribute("data-bs-title");
+        button.removeAttribute("data-bs-toggle");
+        button.removeAttribute("data-bs-placement");
+      }
+      button.setAttribute("title", currentTitle);
+      button.classList.remove('code-copy-button-checked');
+    }, 1000);
+    // clear code selection
+    e.clearSelection();
+  });
+    var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
+    var mailtoRegex = new RegExp(/^mailto:/);
+      var filterRegex = new RegExp('/' + window.location.host + '/');
+    var isInternal = (href) => {
+        return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
+    }
+    // Inspect non-navigation links and adorn them if external
+ 	var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool)');
+    for (var i=0; i<links.length; i++) {
+      const link = links[i];
+      if (!isInternal(link.href)) {
+        // undo the damage that might have been done by quarto-nav.js in the case of
+        // links that we want to consider external
+        if (link.dataset.originalHref !== undefined) {
+          link.href = link.dataset.originalHref;
+        }
+      }
+    }
+  function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
+    const config = {
+      allowHTML: true,
+      maxWidth: 500,
+      delay: 100,
+      arrow: false,
+      appendTo: function(el) {
+          return el.parentElement;
+      },
+      interactive: true,
+      interactiveBorder: 10,
+      theme: 'quarto',
+      placement: 'bottom-start',
+    };
+    if (contentFn) {
+      config.content = contentFn;
+    }
+    if (onTriggerFn) {
+      config.onTrigger = onTriggerFn;
+    }
+    if (onUntriggerFn) {
+      config.onUntrigger = onUntriggerFn;
+    }
+    window.tippy(el, config); 
+  }
+  const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
+  for (var i=0; i<noterefs.length; i++) {
+    const ref = noterefs[i];
+    tippyHover(ref, function() {
+      // use id or data attribute instead here
+      let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
+      try { href = new URL(href).hash; } catch {}
+      const id = href.replace(/^#\/?/, "");
+      const note = window.document.getElementById(id);
+      if (note) {
+        return note.innerHTML;
+      } else {
+        return "";
+      }
+    });
+  }
+  const xrefs = window.document.querySelectorAll('a.quarto-xref');
+  const processXRef = (id, note) => {
+    // Strip column container classes
+    const stripColumnClz = (el) => {
+      el.classList.remove("page-full", "page-columns");
+      if (el.children) {
+        for (const child of el.children) {
+          stripColumnClz(child);
+        }
+      }
+    }
+    stripColumnClz(note)
+    if (id === null || id.startsWith('sec-')) {
+      // Special case sections, only their first couple elements
+      const container = document.createElement("div");
+      if (note.children && note.children.length > 2) {
+        container.appendChild(note.children[0].cloneNode(true));
+        for (let i = 1; i < note.children.length; i++) {
+          const child = note.children[i];
+          if (child.tagName === "P" && child.innerText === "") {
+            continue;
+          } else {
+            container.appendChild(child.cloneNode(true));
+            break;
+          }
+        }
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(container);
+        }
+        return container.innerHTML
+      } else {
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(note);
+        }
+        return note.innerHTML;
+      }
+    } else {
+      // Remove any anchor links if they are present
+      const anchorLink = note.querySelector('a.anchorjs-link');
+      if (anchorLink) {
+        anchorLink.remove();
+      }
+      if (window.Quarto?.typesetMath) {
+        window.Quarto.typesetMath(note);
+      }
+      // TODO in 1.5, we should make sure this works without a callout special case
+      if (note.classList.contains("callout")) {
+        return note.outerHTML;
+      } else {
+        return note.innerHTML;
+      }
+    }
+  }
+  for (var i=0; i<xrefs.length; i++) {
+    const xref = xrefs[i];
+    tippyHover(xref, undefined, function(instance) {
+      instance.disable();
+      let url = xref.getAttribute('href');
+      let hash = undefined; 
+      if (url.startsWith('#')) {
+        hash = url;
+      } else {
+        try { hash = new URL(url).hash; } catch {}
+      }
+      if (hash) {
+        const id = hash.replace(/^#\/?/, "");
+        const note = window.document.getElementById(id);
+        if (note !== null) {
+          try {
+            const html = processXRef(id, note.cloneNode(true));
+            instance.setContent(html);
+          } finally {
+            instance.enable();
+            instance.show();
+          }
+        } else {
+          // See if we can fetch this
+          fetch(url.split('#')[0])
+          .then(res => res.text())
+          .then(html => {
+            const parser = new DOMParser();
+            const htmlDoc = parser.parseFromString(html, "text/html");
+            const note = htmlDoc.getElementById(id);
+            if (note !== null) {
+              const html = processXRef(id, note);
+              instance.setContent(html);
+            } 
+          }).finally(() => {
+            instance.enable();
+            instance.show();
+          });
+        }
+      } else {
+        // See if we can fetch a full url (with no hash to target)
+        // This is a special case and we should probably do some content thinning / targeting
+        fetch(url)
+        .then(res => res.text())
+        .then(html => {
+          const parser = new DOMParser();
+          const htmlDoc = parser.parseFromString(html, "text/html");
+          const note = htmlDoc.querySelector('main.content');
+          if (note !== null) {
+            // This should only happen for chapter cross references
+            // (since there is no id in the URL)
+            // remove the first header
+            if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
+              note.children[0].remove();
+            }
+            const html = processXRef(null, note);
+            instance.setContent(html);
+          } 
+        }).finally(() => {
+          instance.enable();
+          instance.show();
+        });
+      }
+    }, function(instance) {
+    });
+  }
+      let selectedAnnoteEl;
+      const selectorForAnnotation = ( cell, annotation) => {
+        let cellAttr = 'data-code-cell="' + cell + '"';
+        let lineAttr = 'data-code-annotation="' +  annotation + '"';
+        const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
+        return selector;
+      }
+      const selectCodeLines = (annoteEl) => {
+        const doc = window.document;
+        const targetCell = annoteEl.getAttribute("data-target-cell");
+        const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
+        const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
+        const lines = annoteSpan.getAttribute("data-code-lines").split(",");
+        const lineIds = lines.map((line) => {
+          return targetCell + "-" + line;
+        })
+        let top = null;
+        let height = null;
+        let parent = null;
+        if (lineIds.length > 0) {
+            //compute the position of the single el (top and bottom and make a div)
+            const el = window.document.getElementById(lineIds[0]);
+            top = el.offsetTop;
+            height = el.offsetHeight;
+            parent = el.parentElement.parentElement;
+          if (lineIds.length > 1) {
+            const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
+            const bottom = lastEl.offsetTop + lastEl.offsetHeight;
+            height = bottom - top;
+          }
+          if (top !== null && height !== null && parent !== null) {
+            // cook up a div (if necessary) and position it 
+            let div = window.document.getElementById("code-annotation-line-highlight");
+            if (div === null) {
+              div = window.document.createElement("div");
+              div.setAttribute("id", "code-annotation-line-highlight");
+              div.style.position = 'absolute';
+              parent.appendChild(div);
+            }
+            div.style.top = top - 2 + "px";
+            div.style.height = height + 4 + "px";
+            div.style.left = 0;
+            let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
+            if (gutterDiv === null) {
+              gutterDiv = window.document.createElement("div");
+              gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
+              gutterDiv.style.position = 'absolute';
+              const codeCell = window.document.getElementById(targetCell);
+              const gutter = codeCell.querySelector('.code-annotation-gutter');
+              gutter.appendChild(gutterDiv);
+            }
+            gutterDiv.style.top = top - 2 + "px";
+            gutterDiv.style.height = height + 4 + "px";
+          }
+          selectedAnnoteEl = annoteEl;
+        }
+      };
+      const unselectCodeLines = () => {
+        const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
+        elementsIds.forEach((elId) => {
+          const div = window.document.getElementById(elId);
+          if (div) {
+            div.remove();
+          }
+        });
+        selectedAnnoteEl = undefined;
+      };
+        // Handle positioning of the toggle
+    window.addEventListener(
+      "resize",
+      throttle(() => {
+        elRect = undefined;
+        if (selectedAnnoteEl) {
+          selectCodeLines(selectedAnnoteEl);
+        }
+      }, 10)
+    );
+    function throttle(fn, ms) {
+    let throttle = false;
+    let timer;
+      return (...args) => {
+        if(!throttle) { // first call gets through
+            fn.apply(this, args);
+            throttle = true;
+        } else { // all the others get throttled
+            if(timer) clearTimeout(timer); // cancel #2
+            timer = setTimeout(() => {
+              fn.apply(this, args);
+              timer = throttle = false;
+            }, ms);
+        }
+      };
+    }
+      // Attach click handler to the DT
+      const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
+      for (const annoteDlNode of annoteDls) {
+        annoteDlNode.addEventListener('click', (event) => {
+          const clickedEl = event.target;
+          if (clickedEl !== selectedAnnoteEl) {
+            unselectCodeLines();
+            const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
+            if (activeEl) {
+              activeEl.classList.remove('code-annotation-active');
+            }
+            selectCodeLines(clickedEl);
+            clickedEl.classList.add('code-annotation-active');
+          } else {
+            // Unselect the line
+            unselectCodeLines();
+            clickedEl.classList.remove('code-annotation-active');
+          }
+        });
+      }
+  const findCites = (el) => {
+    const parentEl = el.parentElement;
+    if (parentEl) {
+      const cites = parentEl.dataset.cites;
+      if (cites) {
+        return {
+          el,
+          cites: cites.split(' ')
+        };
+      } else {
+        return findCites(el.parentElement)
+      }
+    } else {
+      return undefined;
+    }
+  };
+  var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
+  for (var i=0; i<bibliorefs.length; i++) {
+    const ref = bibliorefs[i];
+    const citeInfo = findCites(ref);
+    if (citeInfo) {
+      tippyHover(citeInfo.el, function() {
+        var popup = window.document.createElement('div');
+        citeInfo.cites.forEach(function(cite) {
+          var citeDiv = window.document.createElement('div');
+          citeDiv.classList.add('hanging-indent');
+          citeDiv.classList.add('csl-entry');
+          var biblioDiv = window.document.getElementById('ref-' + cite);
+          if (biblioDiv) {
+            citeDiv.innerHTML = biblioDiv.innerHTML;
+          }
+          popup.appendChild(citeDiv);
+        });
+        return popup.innerHTML;
+      });
+    }
+  }
+});
+</script>
+<nav class="page-navigation">
+  <div class="nav-page nav-page-previous">
+      <a href="./data_description.html" class="pagination-link" aria-label="Data description">
+        <i class="bi bi-arrow-left-short"></i> <span class="nav-page-text">Data description</span>
+      </a>          
+  </div>
+  <div class="nav-page nav-page-next">
+  </div>
+</nav>
+</div> <!-- /content -->
+
+
+
+
+</body></html>
\ No newline at end of file
diff --git a/_book/exercise_solutions_files/figure-html/aesthetic solution-1.png b/_book/exercise_solutions_files/figure-html/aesthetic solution-1.png
new file mode 100644
index 0000000..53389d2
Binary files /dev/null and b/_book/exercise_solutions_files/figure-html/aesthetic solution-1.png differ
diff --git a/_book/exercise_solutions_files/figure-html/region total spend line graph-1.png b/_book/exercise_solutions_files/figure-html/region total spend line graph-1.png
new file mode 100644
index 0000000..25d7885
Binary files /dev/null and b/_book/exercise_solutions_files/figure-html/region total spend line graph-1.png differ
diff --git a/_book/exercise_solutions_files/figure-html/scatter with line of best fit by region-1.png b/_book/exercise_solutions_files/figure-html/scatter with line of best fit by region-1.png
new file mode 100644
index 0000000..6ef173c
Binary files /dev/null and b/_book/exercise_solutions_files/figure-html/scatter with line of best fit by region-1.png differ
diff --git a/_book/exercise_solutions_files/figure-html/scatter with line of best fit-1.png b/_book/exercise_solutions_files/figure-html/scatter with line of best fit-1.png
new file mode 100644
index 0000000..27e9b56
Binary files /dev/null and b/_book/exercise_solutions_files/figure-html/scatter with line of best fit-1.png differ
diff --git a/_book/exercise_solutions_files/figure-html/sfa histogram-1.png b/_book/exercise_solutions_files/figure-html/sfa histogram-1.png
new file mode 100644
index 0000000..e3a4902
Binary files /dev/null and b/_book/exercise_solutions_files/figure-html/sfa histogram-1.png differ
diff --git a/_book/index.html b/_book/index.html
index 8cf954e..aae1bb3 100644
--- a/_book/index.html
+++ b/_book/index.html
@@ -7,7 +7,7 @@
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
 <meta name="author" content="Sophie Lee">
-<meta name="dcterms.date" content="2024-06-15">
+<meta name="dcterms.date" content="2024-07-15">
 
 <title>Introduction to R with Tidyverse</title>
 <style>
@@ -103,14 +103,17 @@
   <li class="nav-item compact">
     <a class="nav-link" href="https://x.com/SophieStats10"> <i class="bi bi-twitter" role="img" aria-label="Twitter">
 </i> 
+<span class="menu-text"></span></a>
+  </li>  
+  <li class="nav-item compact">
+    <a class="nav-link" href="https://buymeacoffee.com/sophie_a_lee"> <i class="bi bi-cup-hot" role="img" aria-label="Buy me a coffee">
+</i> 
 <span class="menu-text"></span></a>
   </li>  
 </ul>
           </div> <!-- /navcollapse -->
-          <div class="quarto-navbar-tools tools-wide">
-    <a href="https://github.com/sophie-a-lee/Introduction_R_Tidyverse_course" title="Source Code" class="quarto-navigation-tool px-1" aria-label="Source Code"><i class="bi bi-github"></i></a>
+          <div class="quarto-navbar-tools">
     <a href="./Introduction-to-R-with-Tidyverse.pdf" title="Download PDF" class="quarto-navigation-tool px-1" aria-label="Download PDF"><i class="bi bi-file-pdf"></i></a>
-    <a href="https://twitter.com/intent/tweet?url=|url|" title="Twitter" class="quarto-navigation-tool px-1" aria-label="Twitter"><i class="bi bi-twitter"></i></a>
 </div>
       </div> <!-- /container-fluid -->
     </nav>
@@ -119,7 +122,7 @@
       <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
         <i class="bi bi-layout-text-sidebar-reverse"></i>
       </button>
-        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./index.html"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Welcome!</span></a></li></ol></nav>
+        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./index.html">Welcome!</a></li></ol></nav>
         <a class="flex-grow-1" role="button" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">      
         </a>
     </div>
@@ -134,39 +137,62 @@
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./index.html" class="sidebar-item-text sidebar-link active">
- <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Welcome!</span></span></a>
+ <span class="menu-text">Welcome!</span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session1_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Session 1: Introduction to R and RStudio</span></span></a>
+ <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction to R and RStudio</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session2_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Session 2: Introduction to tidyverse and data wrangling</span></span></a>
+ <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Introduction to tidyverse and data wrangling</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session3_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Session 3: Data preparation and manipulation</span></span></a>
+ <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Data preparation and manipulation</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session4_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Session 4: Data visualisation with ggplot2</span></span></a>
+ <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Data visualisation with ggplot2</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session5_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">6</span>&nbsp; <span class="chapter-title">Reproducible research with RMarkdown</span></span></a>
+ <span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Reproducible research with RMarkdown</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true">
+ <span class="menu-text">Appendices</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">  
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./data_description.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Data description</span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./exercise_solutions.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Exercise solutions</span></a>
   </div>
 </li>
+      </ul>
+  </li>
     </ul>
     </div>
 </nav>
@@ -177,10 +203,10 @@
     <h2 id="toc-title">Table of contents</h2>
    
   <ul>
-  <li><a href="#welcome" id="toc-welcome" class="nav-link active" data-scroll-target="#welcome"><span class="header-section-number">1</span> Welcome!</a>
+  <li><a href="#welcome" id="toc-welcome" class="nav-link active" data-scroll-target="#welcome">Welcome!</a>
   <ul class="collapse">
-  <li><a href="#data-used-for-the-course" id="toc-data-used-for-the-course" class="nav-link" data-scroll-target="#data-used-for-the-course"><span class="header-section-number">1.1</span> Data used for the course</a></li>
-  <li><a href="#license" id="toc-license" class="nav-link" data-scroll-target="#license"><span class="header-section-number">1.2</span> License</a></li>
+  <li><a href="#data-used-for-the-course" id="toc-data-used-for-the-course" class="nav-link" data-scroll-target="#data-used-for-the-course">Data used for the course</a></li>
+  <li><a href="#license" id="toc-license" class="nav-link" data-scroll-target="#license">License</a></li>
   </ul></li>
   </ul>
 </nav>
@@ -207,7 +233,7 @@ <h1 class="title">Introduction to R with Tidyverse</h1>
     <div>
     <div class="quarto-title-meta-heading">Published</div>
     <div class="quarto-title-meta-contents">
-      <p class="date">June 15, 2024</p>
+      <p class="date">July 15, 2024</p>
     </div>
   </div>
   
@@ -219,8 +245,8 @@ <h1 class="title">Introduction to R with Tidyverse</h1>
 </header>
 
 
-<section id="welcome" class="level1" data-number="1">
-<h1 data-number="1"><span class="header-section-number">1</span> Welcome!</h1>
+<section id="welcome" class="level1 unnumbered">
+<h1 class="unnumbered">Welcome!</h1>
 <p>Welcome to the course materials for the <strong>Introduction to R with Tidyverse</strong> course.</p>
 <p>This course is designed to equip you with the essential skills to leverage the power of R and Tidyverse for their work. The course begins with a gentle introduction to the user-friendly RStudio interface and the basics of the R coding language, or syntax. This makes it ideal for anyone with little or no prior coding experience, or those looking for a refresher of the basics.</p>
 <p>Through this course, you will learn how to manipulate, transform, and clean data efficiently, and how to create compelling visualisations to communicate your findings effectively. Throughout the course, we will discuss best practices for reproducible coding.</p>
@@ -237,14 +263,14 @@ <h1 data-number="1"><span class="header-section-number">1</span> Welcome!</h1>
 <p>Throughout these notes, you will also see boxes like this containing ‘style tips’. These ensure that your code follows the <a href="https://style.tidyverse.org/">Tidyverse style guide</a>, making it as consistent and readable as possible.</p>
 </div>
 </div>
-<section id="data-used-for-the-course" class="level2" data-number="1.1">
-<h2 data-number="1.1" class="anchored" data-anchor-id="data-used-for-the-course"><span class="header-section-number">1.1</span> Data used for the course</h2>
+<section id="data-used-for-the-course" class="level2">
+<h2 class="anchored" data-anchor-id="data-used-for-the-course">Data used for the course</h2>
 <p>The examples and exercises in these materials are based on real world data. This data relates to the Core Spending Power (CSP) of English local authorities between 2015 and 2020.</p>
 <p>Data for this course can be downloaded from the <code>data</code> folder of this course’s <a href="https://github.com/sophie-a-lee/Introduction_R_Tidyverse_course">repository</a>.</p>
-<p>For more information about this data, including variable descriptions and sources, see <a href="data_description.qmd">the appendix</a>.</p>
+<p>For more information about this data, including variable descriptions and sources, see <a href="./data_description.html">the appendix</a>.</p>
 </section>
-<section id="license" class="level2" data-number="1.2">
-<h2 data-number="1.2" class="anchored" data-anchor-id="license"><span class="header-section-number">1.2</span> License</h2>
+<section id="license" class="level2">
+<h2 class="anchored" data-anchor-id="license">License</h2>
 <p>I believe that science should not be behind a paywall, that is why these materials are available for free online, licensed under a <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA licence</a>.</p>
 
 
@@ -665,8 +691,8 @@ <h2 data-number="1.2" class="anchored" data-anchor-id="license"><span class="hea
   <div class="nav-page nav-page-previous">
   </div>
   <div class="nav-page nav-page-next">
-      <a href="./session1_notes.html" class="pagination-link" aria-label="Session 1: Introduction to R and RStudio">
-        <span class="nav-page-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Session 1: Introduction to R and RStudio</span></span> <i class="bi bi-arrow-right-short"></i>
+      <a href="./session1_notes.html" class="pagination-link" aria-label="Introduction to R and RStudio">
+        <span class="nav-page-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction to R and RStudio</span></span> <i class="bi bi-arrow-right-short"></i>
       </a>
   </div>
 </nav>
diff --git a/_book/search.json b/_book/search.json
index 7675ead..89d2319 100644
--- a/_book/search.json
+++ b/_book/search.json
@@ -4,159 +4,379 @@
     "href": "index.html",
     "title": "Introduction to R with Tidyverse",
     "section": "",
-    "text": "1 Welcome!\nWelcome to the course materials for the Introduction to R with Tidyverse course.\nThis course is designed to equip you with the essential skills to leverage the power of R and Tidyverse for their work. The course begins with a gentle introduction to the user-friendly RStudio interface and the basics of the R coding language, or syntax. This makes it ideal for anyone with little or no prior coding experience, or those looking for a refresher of the basics.\nThrough this course, you will learn how to manipulate, transform, and clean data efficiently, and how to create compelling visualisations to communicate your findings effectively. Throughout the course, we will discuss best practices for reproducible coding.",
+    "text": "Welcome!\nWelcome to the course materials for the Introduction to R with Tidyverse course.\nThis course is designed to equip you with the essential skills to leverage the power of R and Tidyverse for their work. The course begins with a gentle introduction to the user-friendly RStudio interface and the basics of the R coding language, or syntax. This makes it ideal for anyone with little or no prior coding experience, or those looking for a refresher of the basics.\nThrough this course, you will learn how to manipulate, transform, and clean data efficiently, and how to create compelling visualisations to communicate your findings effectively. Throughout the course, we will discuss best practices for reproducible coding.",
     "crumbs": [
-      "<span class='chapter-number'>1</span>  <span class='chapter-title'>Welcome!</span>"
+      "Welcome!"
     ]
   },
   {
     "objectID": "index.html#data-used-for-the-course",
     "href": "index.html#data-used-for-the-course",
     "title": "Introduction to R with Tidyverse",
-    "section": "1.1 Data used for the course",
-    "text": "1.1 Data used for the course\nThe examples and exercises in these materials are based on real world data. This data relates to the Core Spending Power (CSP) of English local authorities between 2015 and 2020.\nData for this course can be downloaded from the data folder of this course’s repository.\nFor more information about this data, including variable descriptions and sources, see the appendix.",
+    "section": "Data used for the course",
+    "text": "Data used for the course\nThe examples and exercises in these materials are based on real world data. This data relates to the Core Spending Power (CSP) of English local authorities between 2015 and 2020.\nData for this course can be downloaded from the data folder of this course’s repository.\nFor more information about this data, including variable descriptions and sources, see the appendix.",
     "crumbs": [
-      "<span class='chapter-number'>1</span>  <span class='chapter-title'>Welcome!</span>"
+      "Welcome!"
     ]
   },
   {
     "objectID": "index.html#license",
     "href": "index.html#license",
     "title": "Introduction to R with Tidyverse",
-    "section": "1.2 License",
-    "text": "1.2 License\nI believe that science should not be behind a paywall, that is why these materials are available for free online, licensed under a CC BY-SA licence.",
+    "section": "License",
+    "text": "License\nI believe that science should not be behind a paywall, that is why these materials are available for free online, licensed under a CC BY-SA licence.",
     "crumbs": [
-      "<span class='chapter-number'>1</span>  <span class='chapter-title'>Welcome!</span>"
+      "Welcome!"
     ]
   },
   {
     "objectID": "session1_notes.html",
     "href": "session1_notes.html",
-    "title": "2  Session 1: Introduction to R and RStudio",
+    "title": "1  Introduction to R and RStudio",
     "section": "",
-    "text": "2.1 The RStudio interface\nThere are a number of software packages based on the R programming language aimed at making writing and running analyses easier for users. They all run R in the background but look different and contain different features.\nRStudio has been chosen for this course as it allows users to create script files, allowing code to be re-run, edited, and shared easily. RStudio also provides tools to help easily identify errors in R code, integrates help documentation into the main console and uses colour-coding to help read code at a glance.\nBefore installing RStudio, we must ensure that R is downloaded onto the machine. R is available to download for free for Windows, Mac, or Linux via the CRAN website.\nRstudio is also free to download from the Posit website.",
+    "text": "1.1 The RStudio interface\nThere are a number of software packages based on the R programming language aimed at making writing and running analyses easier for users. They all run R in the background but look different and contain different features.\nRStudio has been chosen for this course as it allows users to create script files, allowing code to be re-run, edited, and shared easily. RStudio also provides tools to help easily identify errors in R code, integrates help documentation into the main console and uses colour-coding to help read code at a glance.\nBefore installing RStudio, we must ensure that R is downloaded onto the machine. R is available to download for free for Windows, Mac, or Linux via the CRAN website.\nRstudio is also free to download from the Posit website.",
     "crumbs": [
-      "<span class='chapter-number'>2</span>  <span class='chapter-title'>Session 1: Introduction to R and RStudio</span>"
+      "<span class='chapter-number'>1</span>  <span class='chapter-title'>Introduction to R and RStudio</span>"
     ]
   },
   {
     "objectID": "session1_notes.html#the-rstudio-interface",
     "href": "session1_notes.html#the-rstudio-interface",
-    "title": "2  Session 1: Introduction to R and RStudio",
+    "title": "1  Introduction to R and RStudio",
     "section": "",
-    "text": "2.1.0.1 The RStudio console window\nThe screenshot below shows the RStudio interface which comprises of four windows:\n\n\n\nRStudio interface\n\n\nWindow A: R script files\nAll analysis and actions in R are carried out using the R syntax language. R script files allow you to write and edit code before running it in the console window.\n\n\n\n\n\n\nStyle tip\n\n\n\nLimit script files to 80 characters per line to ensure it is readable.\nRStudio has an option to add a margin that makes this easier to adhere to. Under the Tools drop-down menu, select Global options. Select Code from the list on the right, then under the Display tab, tick the Show margin box.\n\n\nIf this window is not visible, create a new script file using File -&gt; New File -&gt; R Script from the drop-down menus or clicking the  icon above the console and selecting R Script. This will open a new, blank script file. More than one script file can be open at the same time.\nCode entered into the script file does not run automatically. To run commands from the script, highlight the code and click the  icon above the top right corner of the script window (this can be carried out by pressing Ctrl + Enter in Windows or Command + Enter on a Mac computer). More than one command can be run at the same time by highlighting all of them.\nThe main advantage of using the script file rather than entering the code directly into the console is that it can be saved, edited and shared. To save a script file, use File -&gt; Save As… from the drop down menu, or click the  icon at the top of the window. It is important to save the script files at regular intervals to avoid losing work. Once the script file has been saved, we can also use the keyboard shortcuts Ctrl + s on Windows and Command + s on Mac to save the latest script file.\n\n\n\n\n\n\nStyle tip\n\n\n\nScript file names should be meaningful, lower case, and end in .R. Avoid using special characters in file names, including spaces. Use _ instead of spaces.\nWhere files should be run in a specific order, prefix the file name with numbers.\n\n\nPast script files can be opened using File -&gt; Open File… from the drop-down menu or by clicking the  icon and selecting a .R file. The keyboard shortcut to open an existing script file is Ctrl + o on Windows, and Command + o on Macs.\nWindow B: The R console\nThe R console window is where all commands run from the script file, results (other than plots), and messages, such as errors, are displayed. Commands can be written directly into the R console after the &gt; symbol and executed using Enter on the keyboard. It is not recommended to write code directly into the console as it is cannot be saved or replicated.\nEvery time a new R session is opened, details about version and citations of R will be given by default. To clear text from the console window, use the keyboard shortcut control + l (this is the same for both Windows and Mac users). Be aware that this clears all text from the console, including any results. Before running this command, check that any results can be replicated within the script file.\nWindow C: Environment and history\nThis window lists all data and objects currently loaded into R. More details on the types of objects and how to use the Environment window are given in later sections.\nWindow D: Files, plots, packages and help\nThis window has many potential uses: graphics are displayed and can be saved from here, and R help files will appear here. This window is only available in the RStudio interface and not in the basic R package.\n\n\nExercise 1\n\nOpen a new script file if you have not already done so.\nSave this script file into an appropriate location.",
+    "text": "1.1.0.1 The RStudio console window\nThe screenshot below shows the RStudio interface which comprises of four windows:\n\n\n\nRStudio interface\n\n\nWindow A: R script files\nAll analysis and actions in R are carried out using the R syntax language. R script files allow you to write and edit code before running it in the console window.\n\n\n\n\n\n\nStyle tip\n\n\n\nLimit script files to 80 characters per line to ensure it is readable.\nRStudio has an option to add a margin that makes this easier to adhere to. Under the Tools drop-down menu, select Global options. Select Code from the list on the right, then under the Display tab, tick the Show margin box.\n\n\nIf this window is not visible, create a new script file using File -&gt; New File -&gt; R Script from the drop-down menus or clicking the  icon above the console and selecting R Script. This will open a new, blank script file. More than one script file can be open at the same time.\nCode entered into the script file does not run automatically. To run commands from the script, highlight the code and click the  icon above the top right corner of the script window (this can be carried out by pressing Ctrl + Enter in Windows or Command + Enter on a Mac computer). More than one command can be run at the same time by highlighting all of them.\nThe main advantage of using the script file rather than entering the code directly into the console is that it can be saved, edited and shared. To save a script file, use File -&gt; Save As… from the drop down menu, or click the  icon at the top of the window. It is important to save the script files at regular intervals to avoid losing work. Once the script file has been saved, we can also use the keyboard shortcuts Ctrl + s on Windows and Command + s on Mac to save the latest script file.\n\n\n\n\n\n\nStyle tip\n\n\n\nScript file names should be meaningful, lower case, and end in .R. Avoid using special characters in file names, including spaces. Use _ instead of spaces.\nWhere files should be run in a specific order, prefix the file name with numbers.\n\n\nPast script files can be opened using File -&gt; Open File… from the drop-down menu or by clicking the  icon and selecting a .R file. The keyboard shortcut to open an existing script file is Ctrl + o on Windows, and Command + o on Macs.\nWindow B: The R console\nThe R console window is where all commands run from the script file, results (other than plots), and messages, such as errors, are displayed. Commands can be written directly into the R console after the &gt; symbol and executed using Enter on the keyboard. It is not recommended to write code directly into the console as it is cannot be saved or replicated.\nEvery time a new R session is opened, details about version and citations of R will be given by default. To clear text from the console window, use the keyboard shortcut control + l (this is the same for both Windows and Mac users). Be aware that this clears all text from the console, including any results. Before running this command, check that any results can be replicated within the script file.\nWindow C: Environment and history\nThis window lists all data and objects currently loaded into R. More details on the types of objects and how to use the Environment window are given in later sections.\nWindow D: Files, plots, packages and help\nThis window has many potential uses: graphics are displayed and can be saved from here, and R help files will appear here. This window is only available in the RStudio interface and not in the basic R package.\n\n\nExercise 1\n\nOpen a new script file if you have not already done so.\nSave this script file into an appropriate location.",
     "crumbs": [
-      "<span class='chapter-number'>2</span>  <span class='chapter-title'>Session 1: Introduction to R and RStudio</span>"
+      "<span class='chapter-number'>1</span>  <span class='chapter-title'>Introduction to R and RStudio</span>"
     ]
   },
   {
     "objectID": "session1_notes.html#r-syntax",
     "href": "session1_notes.html#r-syntax",
-    "title": "2  Session 1: Introduction to R and RStudio",
-    "section": "2.2 R syntax",
-    "text": "2.2 R syntax\nAll analyses within R are carried out using syntax, the R programming language. It is important to note that R is case-sensitive, so always ensure that you use the correct combination of upper and lower case letters when running functions or calling objects.\nAny text written in the R console or script file can be treated the same as text from other documents or programmes: text can be highlighted, copied and pasted to make coding more efficient.\nWhen creating script files, it is important to ensure they are clear and easy to read. Comments can be added to script files using the # symbol. R will ignore any text following the # on the same line.\n\n\n\n\n\n\nStyle tip\n\n\n\nCombining # and - creates sections within a script file, making them easier to navigate and organise.\nFor example:\n\n# Load data ----------\n\n# Tidy data ----------\n\n\n\n\n\n\n\n\n\nHelpful hint\n\n\n\nTo comment out chunks of code, highlight the rows and use the keyboard shortcut ctrl + shift + c on Windows, and Command + shift + c on Mac\n\n\nThe choice of brackets in R coding is particularly important as they all have different functions:\n\nRound brackets ( ) are the most commonly used as they define arguments of functions. Any text followed by round brackets is assumed to be a function and R will attempt to run it. If the name of a function is not followed by round brackets, R will return the algorithm used to create the function within the console.\nSquare brackets [ ] are used to set criteria or conditions within a function or object.\nCurly brackets { } are used within loops, when creating a new function, and within for and if functions.\n\nAll standard notation for mathematical calculations (+, -, *, /, ^, etc.) are compatible with R. At its simplest level, R is just a very powerful calculator!\n\n\n\n\n\n\nStyle tip\n\n\n\nAlthough R will work whether a space is added before/after a mathematical operator, the style guide recommends to add them surrounding most mathematical operations (+, -, *, /), but not around ^.\nFor example:\n\n# Stylish code\n1959 - 683\n(351 + 457)^2 - (213 + 169)^2\n\n# Un-stylish code\n1959-683\n(351+457)^2 - (213 + 169) ^ 2\n\n\n\n\nExercise 2\n\nAdd your name and the date to the top of your script file (hint: comment this out so R does not try to run it)\nUse R to calculate the following calculations. Add the result to the same line of the script file in a way that ensures there are no errors in the code.\n\n\n\\(64^2\\)\n\\(3432 \\div 8\\)\n\\(96 \\times 72\\)\n\nWhen you have finished this exercise, select the entire script file (using ctrl + a on windows or Command + a on Mac) and run it to ensure there are no errors in the code.",
+    "title": "1  Introduction to R and RStudio",
+    "section": "1.2 R syntax",
+    "text": "1.2 R syntax\nAll analyses within R are carried out using syntax, the R programming language. It is important to note that R is case-sensitive, so always ensure that you use the correct combination of upper and lower case letters when running functions or calling objects.\nAny text written in the R console or script file can be treated the same as text from other documents or programmes: text can be highlighted, copied and pasted to make coding more efficient.\nWhen creating script files, it is important to ensure they are clear and easy to read. Comments can be added to script files using the # symbol. R will ignore any text following the # on the same line.\n\n\n\n\n\n\nStyle tip\n\n\n\nCombining # and - creates sections within a script file, making them easier to navigate and organise.\nFor example:\n\n# Load data ----------\n\n# Tidy data ----------\n\n\n\n\n\n\n\n\n\nHelpful hint\n\n\n\nTo comment out chunks of code, highlight the rows and use the keyboard shortcut ctrl + shift + c on Windows, and Command + shift + c on Mac\n\n\nThe choice of brackets in R coding is particularly important as they all have different functions:\n\nRound brackets ( ) are the most commonly used as they define arguments of functions. Any text followed by round brackets is assumed to be a function and R will attempt to run it. If the name of a function is not followed by round brackets, R will return the algorithm used to create the function within the console.\nSquare brackets [ ] are used to set criteria or conditions within a function or object.\nCurly brackets { } are used within loops, when creating a new function, and within for and if functions.\n\nAll standard notation for mathematical calculations (+, -, *, /, ^, etc.) are compatible with R. At its simplest level, R is just a very powerful calculator!\n\n\n\n\n\n\nStyle tip\n\n\n\nAlthough R will work whether a space is added before/after a mathematical operator, the style guide recommends to add them surrounding most mathematical operations (+, -, *, /), but not around ^.\nFor example:\n\n# Stylish code\n1959 - 683\n(351 + 457)^2 - (213 + 169)^2\n\n# Un-stylish code\n1959-683\n(351+457)^2 - (213 + 169) ^ 2\n\n\n\n\nExercise 2\n\nAdd your name and the date to the top of your script file (hint: comment this out so R does not try to run it)\nUse R to calculate the following calculations. Add the result to the same line of the script file in a way that ensures there are no errors in the code.\n\n\n\\(64^2\\)\n\\(3432 \\div 8\\)\n\\(96 \\times 72\\)\n\nWhen you have finished this exercise, select the entire script file (using ctrl + a on windows or Command + a on Mac) and run it to ensure there are no errors in the code.",
     "crumbs": [
-      "<span class='chapter-number'>2</span>  <span class='chapter-title'>Session 1: Introduction to R and RStudio</span>"
+      "<span class='chapter-number'>1</span>  <span class='chapter-title'>Introduction to R and RStudio</span>"
     ]
   },
   {
     "objectID": "session1_notes.html#r-objects-and-functions",
     "href": "session1_notes.html#r-objects-and-functions",
-    "title": "2  Session 1: Introduction to R and RStudio",
-    "section": "2.3 R objects and functions",
-    "text": "2.3 R objects and functions\n\n2.3.1 Objects\nOne of the main advantages to using R over other software packages such as SPSS is that more than one dataset can be accessed at the same time. A collection of data stored in any format within the R session is known as an object. Objects can include single numbers, single variables, entire datasets, lists of datasets, or even tables and graphs.\n\n\n\n\n\n\nStyle tip\n\n\n\nObject names should only contain lower case letters, numbers and _ (instead of a space to separate words). The should be meaningful and concise.\n\n\nObjects are defined in R using the &lt;- or = symbols. For example,\n\nobject_1 &lt;- 81\n\nCreates an object in the environment named object_1, which takes the value 81. This will appear in the environment window of the console (window C from the interface shown earlier).\n\n\n\n\n\n\nStyle tip\n\n\n\nAlthough both work, use &lt;- for assignment, not =.\n\n\nTo retrieve an object, type its name into the script or console and run it. This object can then be included in functions or operations in place of the value assigned to it:\n\nobject_1\n## [1] 81\n\nobject_1 * 2\n## [1] 162\n\nR has some mathematical objects stored by default such as pi that can be used in calculations.\n\npi\n## [1] 3.141593\n\nThe [1] that appears at the beginning of each output line indicates that this is the first element in the object. If there were two lines then the second line would start with the number of that element in square brackets.\nFor example, if we had an object with 6 elements and when called the first line contained the first 5 elements, each line would begin with [1] and [6] respectively.\n\n\n2.3.2 Functions\nFunctions are built-in commands that allow R users to run analyses. All functions require the definition of arguments within round brackets (). Each function requires different information and has different arguments that can be used to customise the analysis. A detailed list of these arguments and a description of the function can be found in the function’s associated help file.\n\n\n2.3.3 Help files\nEach function that exists within R has an associated help file. RStudio does not require an internet connection to access these help files if the function is available in the current session of R.\nTo retrieve help files, enter ? followed by the function name into the console window, e.g ?mean. The help file will appear in window D of the interface shown in the introduction.\nHelp files contain the following information:\n\nDescription: what the function is used for\nUsage: how the function is used\nArguments: required and optional arguments entered into round brackets necessary for the function to work\nDetails: relevant details about the function in question\nReferences\nSee also: links to other relevant functions\nExamples: example code with applications of the function\n\n\n\n2.3.4 Error and warning messages\nWhere a function or object has not been correctly specified, or their is some mistake in the syntax that has been sent to the console, R will return an error message. These messages are generally informative and include the location of the error.\nThe most common errors include misspelling functions or objects:\n\nsqrt(ojbect_1)\n## Error in eval(expr, envir, enclos): object 'ojbect_1' not found\n\nSqrt(object_1)\n## Error in Sqrt(object_1): could not find function \"Sqrt\"\n\nOr where an object has not yet been specified:\n\nplot(x, y)\n## Error in eval(expr, envir, enclos): object 'x' not found\n\nWhen R returns an error message, this means that the operation has been completely halted. R may also return warning messages which look similar to errors but does not necessarily mean the operation has been stopped.\nWarnings are included to indicate that R suspects something in the operation may be wrong and should be checked. There are occasions where warnings can be ignored but this is only after the operation has been checked.\nWhen working within the R console, if an incomplete command is run, a + symbol will appear in the console, rather than the usual &gt;. This indicates that R expects you to keep writing the previous code. To overcome this issue, either finish the command on the next line of the console, or press the esc button on your keyboard to start from scratch.\nOne of the benefits of using RStudio rather than the basic R package is that it will suggest object or function names after typing the first few letters. This avoids spelling mistakes and accidental errors when running code. To accept the suggestion, either click the correct choice with your mouse or use the tab button on your keyboard.\n\n\n2.3.5 Cleaning the environment\nTo remove objects from the RStudio environment, we can use the rm function. This can be combined with the ls() function, which lists all objects in the environment, to remove all objects currently loaded:\n\nrm(list = ls())\n\n\n\n\n\n\n\nWarning\n\n\n\nThere are no undo and redo buttons for R syntax. The rm function will permanently delete objects from the environment. The only way to reverse this is to re-run the code that created the objects originally from the script file.\n\n\n\n\n2.3.6 R packages\nR packages are a collection of functions and datasets developed by R users that expand existing R capabilities or add completely new ones. Packages allow users to apply the most up-to-date methods shortly after they are developed, unlike other statistical software packages that require an entirely new version.\n\n2.3.6.1 Installing packages from CRAN\nThe quickest way to install a package in R is by using the install.packages function. This sends RStudio to the online repository of tested and verified R packages (known as CRAN) and downloads the package files onto the machine you are currently working from in temporary files. Ensure that the package you wish to install is spelled correctly and surrounded by ''.\n\n\n\n\n\n\nWarning\n\n\n\nThe install.packages function requires an internet connection, and can take a long time if the package has a lot of dependent packages that also need downloading.\nThis process should only be carried out the first time a package is used on a machine, or when a substantial update has taken place, to download the latest version of the package.\n\n\n\n\n2.3.6.2 Loading packages to an R session\nEvery time a new session of RStudio is opened, packages must be reloaded. To load a package into R (and gain access to the associated functions and data), use the library function.\nLoading a package does not require an internet connection, but will only work if the package has already been installed and saved onto the computer you are working from. If you are unsure, use the function installed.packages to return a list of all packages that are loaded onto the machine you are working from.\n\n\n\n\n\n\nStyle tip\n\n\n\nAdd your library function at the beginning of your script file. This reminds you to re-load packages when opening a new R session, and reduces the chance of error messages from functions requiring these packages.",
+    "title": "1  Introduction to R and RStudio",
+    "section": "1.3 R objects and functions",
+    "text": "1.3 R objects and functions\n\n1.3.1 Objects\nOne of the main advantages to using R over other software packages such as SPSS is that more than one dataset can be accessed at the same time. A collection of data stored in any format within the R session is known as an object. Objects can include single numbers, single variables, entire datasets, lists of datasets, or even tables and graphs.\n\n\n\n\n\n\nStyle tip\n\n\n\nObject names should only contain lower case letters, numbers and _ (instead of a space to separate words). The should be meaningful and concise.\n\n\nObjects are defined in R using the &lt;- or = symbols. For example,\n\nobject_1 &lt;- 81\n\nCreates an object in the environment named object_1, which takes the value 81. This will appear in the environment window of the console (window C from the interface shown earlier).\n\n\n\n\n\n\nStyle tip\n\n\n\nAlthough both work, use &lt;- for assignment, not =.\n\n\nTo retrieve an object, type its name into the script or console and run it. This object can then be included in functions or operations in place of the value assigned to it:\n\nobject_1\n## [1] 81\n\nobject_1 * 2\n## [1] 162\n\nR has some mathematical objects stored by default such as pi that can be used in calculations.\n\npi\n## [1] 3.141593\n\nThe [1] that appears at the beginning of each output line indicates that this is the first element in the object. If there were two lines then the second line would start with the number of that element in square brackets.\nFor example, if we had an object with 6 elements and when called the first line contained the first 5 elements, each line would begin with [1] and [6] respectively.\n\n\n1.3.2 Functions\nFunctions are built-in commands that allow R users to run analyses. All functions require the definition of arguments within round brackets (). Each function requires different information and has different arguments that can be used to customise the analysis. A detailed list of these arguments and a description of the function can be found in the function’s associated help file.\n\n\n1.3.3 Help files\nEach function that exists within R has an associated help file. RStudio does not require an internet connection to access these help files if the function is available in the current session of R.\nTo retrieve help files, enter ? followed by the function name into the console window, e.g ?mean. The help file will appear in window D of the interface shown in the introduction.\nHelp files contain the following information:\n\nDescription: what the function is used for\nUsage: how the function is used\nArguments: required and optional arguments entered into round brackets necessary for the function to work\nDetails: relevant details about the function in question\nReferences\nSee also: links to other relevant functions\nExamples: example code with applications of the function\n\n\n\n1.3.4 Error and warning messages\nWhere a function or object has not been correctly specified, or their is some mistake in the syntax that has been sent to the console, R will return an error message. These messages are generally informative and include the location of the error.\nThe most common errors include misspelling functions or objects:\n\nsqrt(ojbect_1)\n## Error in eval(expr, envir, enclos): object 'ojbect_1' not found\n\nSqrt(object_1)\n## Error in Sqrt(object_1): could not find function \"Sqrt\"\n\nOr where an object has not yet been specified:\n\nplot(x, y)\n## Error in eval(expr, envir, enclos): object 'x' not found\n\nWhen R returns an error message, this means that the operation has been completely halted. R may also return warning messages which look similar to errors but does not necessarily mean the operation has been stopped.\nWarnings are included to indicate that R suspects something in the operation may be wrong and should be checked. There are occasions where warnings can be ignored but this is only after the operation has been checked.\nWhen working within the R console, if an incomplete command is run, a + symbol will appear in the console, rather than the usual &gt;. This indicates that R expects you to keep writing the previous code. To overcome this issue, either finish the command on the next line of the console, or press the esc button on your keyboard to start from scratch.\nOne of the benefits of using RStudio rather than the basic R package is that it will suggest object or function names after typing the first few letters. This avoids spelling mistakes and accidental errors when running code. To accept the suggestion, either click the correct choice with your mouse or use the tab button on your keyboard.\n\n\n1.3.5 Cleaning the environment\nTo remove objects from the RStudio environment, we can use the rm function. This can be combined with the ls() function, which lists all objects in the environment, to remove all objects currently loaded:\n\nrm(list = ls())\n\n\n\n\n\n\n\nWarning\n\n\n\nThere are no undo and redo buttons for R syntax. The rm function will permanently delete objects from the environment. The only way to reverse this is to re-run the code that created the objects originally from the script file.\n\n\n\n\n1.3.6 R packages\nR packages are a collection of functions and datasets developed by R users that expand existing R capabilities or add completely new ones. Packages allow users to apply the most up-to-date methods shortly after they are developed, unlike other statistical software packages that require an entirely new version.\n\n1.3.6.1 Installing packages from CRAN\nThe quickest way to install a package in R is by using the install.packages function. This sends RStudio to the online repository of tested and verified R packages (known as CRAN) and downloads the package files onto the machine you are currently working from in temporary files. Ensure that the package you wish to install is spelled correctly and surrounded by ''.\n\n\n\n\n\n\nWarning\n\n\n\nThe install.packages function requires an internet connection, and can take a long time if the package has a lot of dependent packages that also need downloading.\nThis process should only be carried out the first time a package is used on a machine, or when a substantial update has taken place, to download the latest version of the package.\n\n\n\n\n1.3.6.2 Loading packages to an R session\nEvery time a new session of RStudio is opened, packages must be reloaded. To load a package into R (and gain access to the associated functions and data), use the library function.\nLoading a package does not require an internet connection, but will only work if the package has already been installed and saved onto the computer you are working from. If you are unsure, use the function installed.packages to return a list of all packages that are loaded onto the machine you are working from.\n\n\n\n\n\n\nStyle tip\n\n\n\nAdd your library function at the beginning of your script file. This reminds you to re-load packages when opening a new R session, and reduces the chance of error messages from functions requiring these packages.",
     "crumbs": [
-      "<span class='chapter-number'>2</span>  <span class='chapter-title'>Session 1: Introduction to R and RStudio</span>"
+      "<span class='chapter-number'>1</span>  <span class='chapter-title'>Introduction to R and RStudio</span>"
     ]
   },
   {
     "objectID": "session2_notes.html",
     "href": "session2_notes.html",
-    "title": "3  Session 2: Introduction to tidyverse and data wrangling",
+    "title": "2  Introduction to tidyverse and data wrangling",
     "section": "",
-    "text": "3.1 Opening and exploring data",
+    "text": "2.1 Opening and exploring data",
     "crumbs": [
-      "<span class='chapter-number'>3</span>  <span class='chapter-title'>Session 2: Introduction to tidyverse and data wrangling</span>"
+      "<span class='chapter-number'>2</span>  <span class='chapter-title'>Introduction to tidyverse and data wrangling</span>"
     ]
   },
   {
     "objectID": "session2_notes.html#opening-and-exploring-data",
     "href": "session2_notes.html#opening-and-exploring-data",
-    "title": "3  Session 2: Introduction to tidyverse and data wrangling",
+    "title": "2  Introduction to tidyverse and data wrangling",
     "section": "",
-    "text": "3.1.1 Styles of R coding\nUp to this point, we have not thought about the style of R coding we will be using. There are different approaches to R coding that we can use, they can be thought of as different dialects of the R programming language.\nThe choice of R ‘dialect’ depends on personal preference. Some prefer to use the ‘base R’ approach that does not rely on any packages that may need updating, making it a more stable approach. However, base R can be difficult to read for those not comfortable with coding.\n\n\n\n\n\n\n\n\n\nThe alternative approach that we will be adopting in this course is the ‘tidyverse’ approach. Tidyverse is a set of packages that have been designed to make R coding more readable and efficient. They have been designed with reproducibility in mind, which means there is a wealth of online (mostly free), well-written resources available to help use these packages.\nIf you have not done so already, install the tidyverse packages to your machine using the following code:\n\ninstall.packages('tidyverse')\n\n\n\n\n\n\n\nWarning\n\n\n\nThis can take a long time if you have never downloaded the tidyverse packages before as there are many dependencies that are required. Do not stress if you get a lot of text in the console! This is normal, but watch out for any error messages.\n\n\nOnce the tidyverse package is installed, we must load it into the current working session. At the beginning of your script file add the following syntax:\n\nlibrary(tidyverse)\n\n\n\n\n\n\n\nStyle tip\n\n\n\nBegin every script file with the library command, loading packages in before any data. This avoids any potential errors arising where functions are called before the necessary package has been loaded into the current session.\n\n\n\n\n3.1.2 The working directory\nThe working directory is a file path on your computer that R sets as the default location when opening, saving, or exporting documents, files, and graphics. This file path can be specified manually but setting the working directory saves time and makes code more efficient.\nThe working directory can be set manually by using the Session -&gt; Set Working Directory -&gt; Change Directory… option from the drop-down menu, or the setwd function. Both options require the directory to be specified each time R is restarted, are sensitive to changes in folders within the file path, and cannot be used when script files are shared between colleagues.\nAn alternative approach that overcomes these issues is to create an R project.\n\n3.1.2.1 R projects\nR projects are files (saved with the .Rproj extension) that keep associated files (including scripts, data, and outputs) grouped together. An R project automatically sets the working directory relative to its current location, which makes collaborative work easier, and avoids issues when a file path is changed.\nProjects are created by using the File -&gt; New project option from the drop-down menu, or using the  icon from the top-right corner of the RStudio interface. Existing projects can be opened under the File -&gt; Open project… drop-down menu or using the project icon.\nWhen creating a new project, we must choose whether we want to create a new directory or use an existing one. Usually, we will have already set up a folder containing data or other documents related to the analysis we plan to carry out. If this is the case, we are using an existing directory and selecting the analysis folder as the project directory.\n\n\n\n\n\n\nStyle tip\n\n\n\nHave a clear order to your analysis folder. Consider creating separate folders within a project for input and output data, documentation, and outputs such as graphs or tables.",
+    "text": "2.1.1 Styles of R coding\nUp to this point, we have not thought about the style of R coding we will be using. There are different approaches to R coding that we can use, they can be thought of as different dialects of the R programming language.\nThe choice of R ‘dialect’ depends on personal preference. Some prefer to use the ‘base R’ approach that does not rely on any packages that may need updating, making it a more stable approach. However, base R can be difficult to read for those not comfortable with coding.\n\n\n\n\n\n\n\n\n\nThe alternative approach that we will be adopting in this course is the ‘tidyverse’ approach. Tidyverse is a set of packages that have been designed to make R coding more readable and efficient. They have been designed with reproducibility in mind, which means there is a wealth of online (mostly free), well-written resources available to help use these packages.\nIf you have not done so already, install the tidyverse packages to your machine using the following code:\n\ninstall.packages('tidyverse')\n\n\n\n\n\n\n\nWarning\n\n\n\nThis can take a long time if you have never downloaded the tidyverse packages before as there are many dependencies that are required. Do not stress if you get a lot of text in the console! This is normal, but watch out for any error messages.\n\n\nOnce the tidyverse package is installed, we must load it into the current working session. At the beginning of your script file add the following syntax:\n\nlibrary(tidyverse)\n\n\n\n\n\n\n\nStyle tip\n\n\n\nBegin every script file with the library command, loading packages in before any data. This avoids any potential errors arising where functions are called before the necessary package has been loaded into the current session.\n\n\n\n\n2.1.2 The working directory\nThe working directory is a file path on your computer that R sets as the default location when opening, saving, or exporting documents, files, and graphics. This file path can be specified manually but setting the working directory saves time and makes code more efficient.\nThe working directory can be set manually by using the Session -&gt; Set Working Directory -&gt; Change Directory… option from the drop-down menu, or the setwd function. Both options require the directory to be specified each time R is restarted, are sensitive to changes in folders within the file path, and cannot be used when script files are shared between colleagues.\nAn alternative approach that overcomes these issues is to create an R project.\n\n2.1.2.1 R projects\nR projects are files (saved with the .Rproj extension) that keep associated files (including scripts, data, and outputs) grouped together. An R project automatically sets the working directory relative to its current location, which makes collaborative work easier, and avoids issues when a file path is changed.\nProjects are created by using the File -&gt; New project option from the drop-down menu, or using the  icon from the top-right corner of the RStudio interface. Existing projects can be opened under the File -&gt; Open project… drop-down menu or using the project icon.\nWhen creating a new project, we must choose whether we want to create a new directory or use an existing one. Usually, we will have already set up a folder containing data or other documents related to the analysis we plan to carry out. If this is the case, we are using an existing directory and selecting the analysis folder as the project directory.\n\n\n\n\n\n\nStyle tip\n\n\n\nHave a clear order to your analysis folder. Consider creating separate folders within a project for input and output data, documentation, and outputs such as graphs or tables.",
     "crumbs": [
-      "<span class='chapter-number'>3</span>  <span class='chapter-title'>Session 2: Introduction to tidyverse and data wrangling</span>"
+      "<span class='chapter-number'>2</span>  <span class='chapter-title'>Introduction to tidyverse and data wrangling</span>"
     ]
   },
   {
     "objectID": "session2_notes.html#data-input",
     "href": "session2_notes.html#data-input",
-    "title": "3  Session 2: Introduction to tidyverse and data wrangling",
-    "section": "3.2 4.3 Data input",
-    "text": "3.2 4.3 Data input\nTo ensure our code is collaborative and reproducible, we should strive to store data in formats that can be used across multiple platforms. One of the best ways to do this is to store data as a comma-delimited file (.csv). CSV files can be opened by a range of different softwares (including R, SPSS, STATA and excel), and base R can be used to open these files without requiring additional packages.\nBefore loading files in R, it is essential to check that they are correctly formatted. Data files should only contain one sheet with no pictures or graphics, each row should correspond to a case or observation and each column should correspond to a variable.\nTo avoid any errors arising from spelling mistakes, we can use the list.files function to return a list of files and folders from the current working directory. The file names can be copied from the console and pasted into the script file. As the data are saved in a folder within the working directory, we must add the argument path = to specify the folder we want to list files from.\n\nlist.files(path = \"data\")\n## [1] \"CSP_2015.csv\"         \"CSP_2016.csv\"         \"CSP_2017.csv\"        \n## [4] \"CSP_2018.csv\"         \"CSP_2019.csv\"         \"CSP_2020.csv\"        \n## [7] \"CSP_long_201520.csv\"  \"data_description.pdf\"\n\nThis list should contain 6 CSV files with the core spending power in local authorities in England between 2015 and 2020. We will first load and explore the 2020 data using the read_csv function and attaching the data to an object. Remember to add the data folder to the file name.\n\ncsp_2020 &lt;- read_csv(\"data/CSP_2020.csv\")\n\nImported datasets will appear in the Environment window of the console once they are saved as objects. This Environment also displays the number of variables and observations in each object. To preview the contents of an object, click on its name in the Environment window or use the function View(data).\nOther useful functions that help explore a dataset include:\n\n# Return variable names from a dataset object\nnames(csp_2020)\n## [1] \"ons_code\"         \"authority\"        \"region\"           \"sfa_2020\"        \n## [5] \"under_index_2020\" \"ct_total_2020\"    \"nhb_2020\"         \"nhb_return_2020\" \n## [9] \"rsdg_2020\"\n\n\n\n\n\n\n\nStyle tip\n\n\n\nVariable names should follow the same style rules as object names: only contain lower case letters, numbers, and use _ to separate words. They should be meaningful and concise.\n\n\n\n# Display information about the structure of an object\nstr(csp_2020)\n## spc_tbl_ [396 × 9] (S3: spec_tbl_df/tbl_df/tbl/data.frame)\n##  $ ons_code        : chr [1:396] \"E07000223\" \"E07000026\" \"E07000032\" \"E07000224\" ...\n##  $ authority       : chr [1:396] \"Adur\" \"Allerdale\" \"Amber Valley\" \"Arun\" ...\n##  $ region          : chr [1:396] \"SE\" \"NW\" \"EM\" \"SE\" ...\n##  $ sfa_2020        : num [1:396] 1.77 3.85 3.23 3.67 4.08 ...\n##  $ under_index_2020: num [1:396] 0.0708 0.1465 0.1292 0.147 0.1557 ...\n##  $ ct_total_2020   : num [1:396] 6.53 5.4 6.85 11.61 6.42 ...\n##  $ nhb_2020        : num [1:396] 0.0881 0.6061 1.5786 2.2949 1.1547 ...\n##  $ nhb_return_2020 : num [1:396] 0 0 0 0 0 0 0 0 0 0 ...\n##  $ rsdg_2020       : num [1:396] 0 0.326 0 0 0 ...\n##  - attr(*, \"spec\")=\n##   .. cols(\n##   ..   ons_code = col_character(),\n##   ..   authority = col_character(),\n##   ..   region = col_character(),\n##   ..   sfa_2020 = col_double(),\n##   ..   under_index_2020 = col_double(),\n##   ..   ct_total_2020 = col_double(),\n##   ..   nhb_2020 = col_double(),\n##   ..   nhb_return_2020 = col_double(),\n##   ..   rsdg_2020 = col_double()\n##   .. )\n##  - attr(*, \"problems\")=&lt;externalptr&gt;\n\nOutput from the str function differs depending on the type of object it is applied to. For example, this object is a tibble (tbl, Tidyverse’s name for a dataset). The information given about tibbles includes the object dimensions (396 x 9, or 396 rows and 9 columns), variable names, and variable types.\nIt is important to check that R has correctly recognised variable type when data are loaded, before generating any visualisations or analysis. If variables are incorrectly specified, this could either lead to errors or invalid analyses. We will see how to change variables types later in this session.\n\n# Return the first 6 rows of the tibble\nhead(csp_2020)\n## # A tibble: 6 × 9\n##   ons_code  authority    region sfa_2020 under_index_2020 ct_total_2020 nhb_2020\n##   &lt;chr&gt;     &lt;chr&gt;        &lt;chr&gt;     &lt;dbl&gt;            &lt;dbl&gt;         &lt;dbl&gt;    &lt;dbl&gt;\n## 1 E07000223 Adur         SE         1.77           0.0708          6.53   0.0881\n## 2 E07000026 Allerdale    NW         3.85           0.147           5.40   0.606 \n## 3 E07000032 Amber Valley EM         3.23           0.129           6.85   1.58  \n## 4 E07000224 Arun         SE         3.67           0.147          11.6    2.29  \n## 5 E07000170 Ashfield     EM         4.08           0.156           6.42   1.15  \n## 6 E07000105 Ashford      SE         2.88           0.115           7.92   3.05  \n## # ℹ 2 more variables: nhb_return_2020 &lt;dbl&gt;, rsdg_2020 &lt;dbl&gt;\n\n\n# Return the final 6 rows of the tibble.\ntail(csp_2020)\n## # A tibble: 6 × 9\n##   ons_code  authority   region sfa_2020 under_index_2020 ct_total_2020 nhb_2020\n##   &lt;chr&gt;     &lt;chr&gt;       &lt;chr&gt;     &lt;dbl&gt;            &lt;dbl&gt;         &lt;dbl&gt;    &lt;dbl&gt;\n## 1 E07000229 Worthing    SE         2.69            0.108          9.52    0.961\n## 2 E07000238 Wychavon    WM         2.65            0.106          6.29    4.73 \n## 3 E07000007 Wycombe     SE         0               0              0       0    \n## 4 E07000128 Wyre        NW         3.41            0.137          7.64    1.28 \n## 5 E07000239 Wyre Forest WM         2.84            0.114          7.45    0.262\n## 6 E06000014 York        YH        27.1             1.06          93.8     2.68 \n## # ℹ 2 more variables: nhb_return_2020 &lt;dbl&gt;, rsdg_2020 &lt;dbl&gt;\n\n\n3.2.1 Selecting variables\nOften, our analysis will not require every variable in a downloaded dataset, and we may wish to create a smaller analysis tibble. We may also wish to select individual variables from the tibble to apply functions to them without including the entire dataset.\nTo select one or more variable and return them as a new tibble, we can use the select function from tidyverse’s dplyr package.\nFor example, if we wanted to return the new homes bonus (nhb) for each local authority (the seventh column of the dataset), we can either select this based on the variable name or its location in the object:\n\n# Return the nhb_2020 variable from the csp_2020 object\nselect(csp_2020, nhb_2020)\n## # A tibble: 396 × 1\n##    nhb_2020\n##       &lt;dbl&gt;\n##  1   0.0881\n##  2   0.606 \n##  3   1.58  \n##  4   2.29  \n##  5   1.15  \n##  6   3.05  \n##  7   0     \n##  8   0     \n##  9   1.05  \n## 10   1.85  \n## # ℹ 386 more rows\n\n# Return the 7th variable of the csp_2020 object\nselect(csp_2020, 7)\n## # A tibble: 396 × 1\n##    nhb_2020\n##       &lt;dbl&gt;\n##  1   0.0881\n##  2   0.606 \n##  3   1.58  \n##  4   2.29  \n##  5   1.15  \n##  6   3.05  \n##  7   0     \n##  8   0     \n##  9   1.05  \n## 10   1.85  \n## # ℹ 386 more rows\n\nWe can select multiple variables and return them as a tibble by separating the variable names or numbers with commas:\n\n# Return three variables from the csp_2020 object\nselect(csp_2020, ons_code, authority, region)\n## # A tibble: 396 × 3\n##    ons_code  authority            region\n##    &lt;chr&gt;     &lt;chr&gt;                &lt;chr&gt; \n##  1 E07000223 Adur                 SE    \n##  2 E07000026 Allerdale            NW    \n##  3 E07000032 Amber Valley         EM    \n##  4 E07000224 Arun                 SE    \n##  5 E07000170 Ashfield             EM    \n##  6 E07000105 Ashford              SE    \n##  7 E31000001 Avon Fire            SW    \n##  8 E07000004 Aylesbury Vale       SE    \n##  9 E07000200 Babergh              EE    \n## 10 E09000002 Barking And Dagenham L     \n## # ℹ 386 more rows\n\nWhen selecting consecutive variables, a shortcut can be used that gives the first and last variable in the list, separated by a colon, :. The previous example can be carried out using the following code:\n\n# Return variables from ons_code upt to and including region\nselect(csp_2020, ons_code:region)\n## # A tibble: 396 × 3\n##    ons_code  authority            region\n##    &lt;chr&gt;     &lt;chr&gt;                &lt;chr&gt; \n##  1 E07000223 Adur                 SE    \n##  2 E07000026 Allerdale            NW    \n##  3 E07000032 Amber Valley         EM    \n##  4 E07000224 Arun                 SE    \n##  5 E07000170 Ashfield             EM    \n##  6 E07000105 Ashford              SE    \n##  7 E31000001 Avon Fire            SW    \n##  8 E07000004 Aylesbury Vale       SE    \n##  9 E07000200 Babergh              EE    \n## 10 E09000002 Barking And Dagenham L     \n## # ℹ 386 more rows\n\nThe select function can also be combined with a number of ‘selection helper’ functions that help us select variables based on naming conventions:\n\nstarts_with(\"xyz\") returns all variables with names beginning xyz\nends_with(\"xyz\") returns all variables with names ending xyz\ncontains(\"xyz\") returns all variables that have xyz within their name\n\nOr based on whether they match a condition:\n\nwhere(is.numeric) returns all variables that are classed as numeric\n\nFor a full list of these selection helpers, access the helpfile using ?tidyr_tidy_select.\nThe select function can also be used to remove variables from a tibble by adding a - before the variable name or number. For example:\n\n# Remove the ons_code variable \nselect(csp_2020, -ons_code)\n## # A tibble: 396 × 8\n##    authority            region sfa_2020 under_index_2020 ct_total_2020 nhb_2020\n##    &lt;chr&gt;                &lt;chr&gt;     &lt;dbl&gt;            &lt;dbl&gt;         &lt;dbl&gt;    &lt;dbl&gt;\n##  1 Adur                 SE         1.77           0.0708          6.53   0.0881\n##  2 Allerdale            NW         3.85           0.147           5.40   0.606 \n##  3 Amber Valley         EM         3.23           0.129           6.85   1.58  \n##  4 Arun                 SE         3.67           0.147          11.6    2.29  \n##  5 Ashfield             EM         4.08           0.156           6.42   1.15  \n##  6 Ashford              SE         2.88           0.115           7.92   3.05  \n##  7 Avon Fire            SW        16.0            0.437          27.8    0     \n##  8 Aylesbury Vale       SE         0              0               0      0     \n##  9 Babergh              EE         2.14           0.0857          5.77   1.05  \n## 10 Barking And Dagenham L         75.7            2.31           65.8    1.85  \n## # ℹ 386 more rows\n## # ℹ 2 more variables: nhb_return_2020 &lt;dbl&gt;, rsdg_2020 &lt;dbl&gt;\n\nThe select function returns the variable(s) in the form of a tibble (or dataset). However, some functions, such as basic summary functions, require data to be entered as a vector (a list of values). Tibbles with a single variable can be converted into a vector using the as.vector function, or we can use the base R approach to selecting a single variable. To return a single variable as a vector in base R, we can use the $ symbol between the data name and the variable to return:\n\ncsp_2020$nhb_2020\n\nIt is important to save any changes made to the existing dataset. This can be done using the write_csv function:\n\nwrite_csv(csp_2020, file = \"data/csp_2020_new.csv\")\n\n\n\n\n\n\n\nWarning\n\n\n\nWhen saving updated tibbles as files, use a different file name to the original raw data. Using the same name will overwrite the original file. We always want a copy of the raw data in case of any errors or issues.\n\n\n\n\n3.2.2 Filtering data\nThe filter function, from tidyverse’s dplyr package allows us to return subgroups of the data based on conditional statements. These conditional statements can include mathematical operators, e.g. &lt;= (less than or equal to), == (is equal to), and != (is not equal to), or can be based on conditional functions, e.g. is.na(variable) (is missing), between(a, b) (number lies between a and b).\nA more comprehensive list of conditional statements can be found in the help file using ?filter.\nFor example, to return the core spending power for local authorities in the North West region of England, we use the following:\n\n# Return rows where region is equal to NW from the csp_2020 object\nfilter(csp_2020, region == \"NW\")\n## # A tibble: 46 × 9\n##    ons_code  authority   region sfa_2020 under_index_2020 ct_total_2020 nhb_2020\n##    &lt;chr&gt;     &lt;chr&gt;       &lt;chr&gt;     &lt;dbl&gt;            &lt;dbl&gt;         &lt;dbl&gt;    &lt;dbl&gt;\n##  1 E07000026 Allerdale   NW         3.85            0.147          5.40   0.606 \n##  2 E07000027 Barrow-in-… NW         4.40            0.125          4.74   0.0111\n##  3 E06000008 Blackburn … NW        58.1             1.79          55.9    0.999 \n##  4 E06000009 Blackpool   NW        63.3             1.94          60.1    0.266 \n##  5 E08000001 Bolton      NW        84.2             2.73         115.     0.506 \n##  6 E07000117 Burnley     NW         5.90            0.171          7.16   0.694 \n##  7 E08000002 Bury        NW        42.3             1.44          89.0    0.458 \n##  8 E07000028 Carlisle    NW         3.34            0.134          7.49   1.49  \n##  9 E06000049 Cheshire E… NW        42.5             1.70         230.    11.2   \n## 10 E31000006 Cheshire F… NW        13.5             0.380         30.1    0     \n## # ℹ 36 more rows\n## # ℹ 2 more variables: nhb_return_2020 &lt;dbl&gt;, rsdg_2020 &lt;dbl&gt;\n\nMultiple conditional statements can be added to the same function by separating them with a comma ,. For example, to return a subgroup of local authorities in the North West region that had a settlement funding assessment (SFA) of over £40 million, we use the following:\n\nfilter(csp_2020, region == \"NW\", sfa_2020 &gt; 40)\n## # A tibble: 23 × 9\n##    ons_code  authority   region sfa_2020 under_index_2020 ct_total_2020 nhb_2020\n##    &lt;chr&gt;     &lt;chr&gt;       &lt;chr&gt;     &lt;dbl&gt;            &lt;dbl&gt;         &lt;dbl&gt;    &lt;dbl&gt;\n##  1 E06000008 Blackburn … NW         58.1             1.79          55.9    0.999\n##  2 E06000009 Blackpool   NW         63.3             1.94          60.1    0.266\n##  3 E08000001 Bolton      NW         84.2             2.73         115.     0.506\n##  4 E08000002 Bury        NW         42.3             1.44          89.0    0.458\n##  5 E06000049 Cheshire E… NW         42.5             1.70         230.    11.2  \n##  6 E06000050 Cheshire W… NW         56.3             2.12         196.    10.2  \n##  7 E10000006 Cumbria     NW        107.              3.56         248.     0.824\n##  8 E47000001 Greater Ma… NW         50.6             1.28          50.5    0    \n##  9 E06000006 Halton      NW         45.6             1.45          52.2    2.21 \n## 10 E08000011 Knowsley    NW         84.1             2.50          56.8    2.10 \n## # ℹ 13 more rows\n## # ℹ 2 more variables: nhb_return_2020 &lt;dbl&gt;, rsdg_2020 &lt;dbl&gt;\n\n\n\n3.2.3 Pipes\nWhen creating an analysis-ready dataset, we often want to combine functions such as select and filter. Previously, these would need to be carried out separately and a new object would need to be created or overwritten at each step, clogging up the environment.\nIn tidyverse, we combine multiple functions into a single process by using the ‘pipe’ symbol %&gt;%, which is read as ‘and then’ within the code.\n\n\n\n\n\n\nHelpful hint\n\n\n\nTo save time when piping, use the keyboard shortcut ctrl + shift + m for Windows, and Command + shift + m for Mac to create a pipe.\n\n\nFor example, we can return a list of local authority names from the North West region:\n\n# Using the csp_2020 object\ncsp_2020 %&gt;% \n  # Return just rows where region is equal to NW, and then\n  filter(region == \"NW\") %&gt;% \n  # Select just the authority variable\n  select(authority)\n## # A tibble: 46 × 1\n##    authority            \n##    &lt;chr&gt;                \n##  1 Allerdale            \n##  2 Barrow-in-Furness    \n##  3 Blackburn with Darwen\n##  4 Blackpool            \n##  5 Bolton               \n##  6 Burnley              \n##  7 Bury                 \n##  8 Carlisle             \n##  9 Cheshire East        \n## 10 Cheshire Fire        \n## # ℹ 36 more rows\n\n\n\n\n\n\n\nStyle tips\n\n\n\nWhen combining multiple functions within a process using pipes, it is good practice to start the code with the data and pipe that into the functions, rather than including it in the function itself.\n\n\nThe pipe can also be combined with the filter function to count the number of observations that lie within a subgroup:\n\n# Take the csp_2020 object\ncsp_2020 %&gt;% \n  # Return just rows where region is equal to NW, and then\n  filter(region == \"NW\") %&gt;% \n  # Count the number of rows\n  count()\n## # A tibble: 1 × 1\n##       n\n##   &lt;int&gt;\n## 1    46\n\n\n\n3.2.4 Creating new variables\nThe function mutate from tidyverse’s dplyr package allows us to add new variables to a dataset, either by manually specifying them or by creating them from existing variables. We can add multiple variables within the same function, separating each with a comma ,.\nFor example, we can create a new variables with the squared settlement funding assessment (sfa_2020), and another that recodes the council tax variable (ct_total_2020) into a categorical variable with three levels (low: below £5 million, medium: between £5 million and £15 million, and high: above £15 million):\n\n# Create a new object, csp_2020_new, starting with the object csp_2020\ncsp_2020_new &lt;- csp_2020 %&gt;% \n  # Add a new variable, sfa_2020_sq, by squaring the current sfa_2020 variable\n  mutate(sfa_2020_sq = sfa_2020 ^ 2,\n         # Create ct_2020-cat by cutting the ct_total_2020 object\n         ct_2020_cat = cut(ct_total_2020, \n                           # Create categories by cutting at 0, 5 and 15\n                           breaks = c(0, 5, 15, Inf),\n                           # Add labels to these new groups\n                           labels = c(\"Low\", \"Medium\", \"High\"),\n                           # Include the lowest break point in each group\n                           include_lowest = TRUE))\n\n\n\n\n\n\n\nHelpful hint\n\n\n\nThe c function takes a list of values separated by commas and returns them as a vector. This is useful when a function argument requires multiple values (and we don’t want R to move onto the next argument, which is what a comma inside functions usually means).\n\n\nThe mutate function is also useful for reclassifying variables when R did not correctly choose the variable type. In this example, the region variable is a grouping variable, but str(csp_2020) shows it is recognised by R as a character. Grouping variables in R are known as factors. To convert the region variable to a factor, we use the factor function inside mutate:\n\ncsp_2020_new &lt;- csp_2020 %&gt;% \n  # Add a new variable, sfa_2020_sq, by squaring the current sfa_2020 variable\n  mutate(sfa_2020_sq = sfa_2020 ^ 2,\n         # Create ct_2020-cat by cutting the ct_total_2020 object\n         ct_2020_cat = cut(ct_total_2020, \n                           # Create categories by cutting at 0, 5 and 15\n                           breaks = c(0, 5, 15, Inf),\n                           # Add labels to these new groups\n                           labels = c(\"Low\", \"Medium\", \"High\"),\n                           # Include the lowest break point in each group\n                           include_lowest = TRUE),\n         region_fct = factor(region, \n                             # To order the variable, use the levels argument\n                             levels = c(\"L\", \"NW\", \"NE\", \"YH\", \"WM\", \n                                        \"EM\", \"EE\", \"SW\", \"SE\")))\n\n# Check variables are correctly classified\nstr(csp_2020_new)\n## tibble [396 × 12] (S3: tbl_df/tbl/data.frame)\n##  $ ons_code        : chr [1:396] \"E07000223\" \"E07000026\" \"E07000032\" \"E07000224\" ...\n##  $ authority       : chr [1:396] \"Adur\" \"Allerdale\" \"Amber Valley\" \"Arun\" ...\n##  $ region          : chr [1:396] \"SE\" \"NW\" \"EM\" \"SE\" ...\n##  $ sfa_2020        : num [1:396] 1.77 3.85 3.23 3.67 4.08 ...\n##  $ under_index_2020: num [1:396] 0.0708 0.1465 0.1292 0.147 0.1557 ...\n##  $ ct_total_2020   : num [1:396] 6.53 5.4 6.85 11.61 6.42 ...\n##  $ nhb_2020        : num [1:396] 0.0881 0.6061 1.5786 2.2949 1.1547 ...\n##  $ nhb_return_2020 : num [1:396] 0 0 0 0 0 0 0 0 0 0 ...\n##  $ rsdg_2020       : num [1:396] 0 0.326 0 0 0 ...\n##  $ sfa_2020_sq     : num [1:396] 3.12 14.86 10.41 13.46 16.66 ...\n##  $ ct_2020_cat     : Factor w/ 3 levels \"Low\",\"Medium\",..: 2 2 2 2 2 2 3 NA 2 3 ...\n##  $ region_fct      : Factor w/ 9 levels \"L\",\"NW\",\"NE\",..: 9 2 6 9 6 9 8 9 7 1 ...\n\nAlthough there is no real ordering to the regions in England, attaching this order allows us to control how the are displayed in outputs. By default, character variables are displayed in alphabetical order. By adding the order to this variable, we will produce output where the reference region (London) will be displayed first, followed by regions from north to south.\n\n\nExercise 3\n\nHow many local authorities were included in the London region?\nGive three different ways that it would be possible to select all spend variables (sfa_2020, nhb_2020, etc.) from the CSP_2020 dataset.\nCreate a new tibble, em_2020, that just includes local authorities from the East Midlands (EM) region.\n\n\nHow many local authorities in the East Midlands had an SFA of between £5 and 10 million?\nCreate a new variable with the total overall spend in 2020 for local authorities in the East Midlands.",
+    "title": "2  Introduction to tidyverse and data wrangling",
+    "section": "2.2 4.3 Data input",
+    "text": "2.2 4.3 Data input\nTo ensure our code is collaborative and reproducible, we should strive to store data in formats that can be used across multiple platforms. One of the best ways to do this is to store data as a comma-delimited file (.csv). CSV files can be opened by a range of different softwares (including R, SPSS, STATA and excel), and base R can be used to open these files without requiring additional packages.\nBefore loading files in R, it is essential to check that they are correctly formatted. Data files should only contain one sheet with no pictures or graphics, each row should correspond to a case or observation and each column should correspond to a variable.\nTo avoid any errors arising from spelling mistakes, we can use the list.files function to return a list of files and folders from the current working directory. The file names can be copied from the console and pasted into the script file. As the data are saved in a folder within the working directory, we must add the argument path = to specify the folder we want to list files from.\n\nlist.files(path = \"data\")\n## [1] \"CSP_2015.csv\"         \"CSP_2016.csv\"         \"CSP_2017.csv\"        \n## [4] \"CSP_2018.csv\"         \"CSP_2019.csv\"         \"CSP_2020.csv\"        \n## [7] \"CSP_long_201520.csv\"  \"data_description.pdf\"\n\nThis list should contain 6 CSV files with the core spending power in local authorities in England between 2015 and 2020. We will first load and explore the 2020 data using the read_csv function and attaching the data to an object. Remember to add the data folder to the file name.\n\ncsp_2020 &lt;- read_csv(\"data/CSP_2020.csv\")\n\nImported datasets will appear in the Environment window of the console once they are saved as objects. This Environment also displays the number of variables and observations in each object. To preview the contents of an object, click on its name in the Environment window or use the function View(data).\nOther useful functions that help explore a dataset include:\n\n# Return variable names from a dataset object\nnames(csp_2020)\n## [1] \"ons_code\"         \"authority\"        \"region\"           \"sfa_2020\"        \n## [5] \"under_index_2020\" \"ct_total_2020\"    \"nhb_2020\"         \"nhb_return_2020\" \n## [9] \"rsdg_2020\"\n\n\n\n\n\n\n\nStyle tip\n\n\n\nVariable names should follow the same style rules as object names: only contain lower case letters, numbers, and use _ to separate words. They should be meaningful and concise.\n\n\n\n# Display information about the structure of an object\nstr(csp_2020)\n## spc_tbl_ [396 × 9] (S3: spec_tbl_df/tbl_df/tbl/data.frame)\n##  $ ons_code        : chr [1:396] \"E07000223\" \"E07000026\" \"E07000032\" \"E07000224\" ...\n##  $ authority       : chr [1:396] \"Adur\" \"Allerdale\" \"Amber Valley\" \"Arun\" ...\n##  $ region          : chr [1:396] \"SE\" \"NW\" \"EM\" \"SE\" ...\n##  $ sfa_2020        : num [1:396] 1.77 3.85 3.23 3.67 4.08 ...\n##  $ under_index_2020: num [1:396] 0.0708 0.1465 0.1292 0.147 0.1557 ...\n##  $ ct_total_2020   : num [1:396] 6.53 5.4 6.85 11.61 6.42 ...\n##  $ nhb_2020        : num [1:396] 0.0881 0.6061 1.5786 2.2949 1.1547 ...\n##  $ nhb_return_2020 : num [1:396] 0 0 0 0 0 0 0 0 0 0 ...\n##  $ rsdg_2020       : num [1:396] 0 0.326 0 0 0 ...\n##  - attr(*, \"spec\")=\n##   .. cols(\n##   ..   ons_code = col_character(),\n##   ..   authority = col_character(),\n##   ..   region = col_character(),\n##   ..   sfa_2020 = col_double(),\n##   ..   under_index_2020 = col_double(),\n##   ..   ct_total_2020 = col_double(),\n##   ..   nhb_2020 = col_double(),\n##   ..   nhb_return_2020 = col_double(),\n##   ..   rsdg_2020 = col_double()\n##   .. )\n##  - attr(*, \"problems\")=&lt;externalptr&gt;\n\nOutput from the str function differs depending on the type of object it is applied to. For example, this object is a tibble (tbl, Tidyverse’s name for a dataset). The information given about tibbles includes the object dimensions (396 x 9, or 396 rows and 9 columns), variable names, and variable types.\nIt is important to check that R has correctly recognised variable type when data are loaded, before generating any visualisations or analysis. If variables are incorrectly specified, this could either lead to errors or invalid analyses. We will see how to change variables types later in this session.\n\n# Return the first 6 rows of the tibble\nhead(csp_2020)\n## # A tibble: 6 × 9\n##   ons_code  authority    region sfa_2020 under_index_2020 ct_total_2020 nhb_2020\n##   &lt;chr&gt;     &lt;chr&gt;        &lt;chr&gt;     &lt;dbl&gt;            &lt;dbl&gt;         &lt;dbl&gt;    &lt;dbl&gt;\n## 1 E07000223 Adur         SE         1.77           0.0708          6.53   0.0881\n## 2 E07000026 Allerdale    NW         3.85           0.147           5.40   0.606 \n## 3 E07000032 Amber Valley EM         3.23           0.129           6.85   1.58  \n## 4 E07000224 Arun         SE         3.67           0.147          11.6    2.29  \n## 5 E07000170 Ashfield     EM         4.08           0.156           6.42   1.15  \n## 6 E07000105 Ashford      SE         2.88           0.115           7.92   3.05  \n## # ℹ 2 more variables: nhb_return_2020 &lt;dbl&gt;, rsdg_2020 &lt;dbl&gt;\n\n\n# Return the final 6 rows of the tibble.\ntail(csp_2020)\n## # A tibble: 6 × 9\n##   ons_code  authority   region sfa_2020 under_index_2020 ct_total_2020 nhb_2020\n##   &lt;chr&gt;     &lt;chr&gt;       &lt;chr&gt;     &lt;dbl&gt;            &lt;dbl&gt;         &lt;dbl&gt;    &lt;dbl&gt;\n## 1 E07000229 Worthing    SE         2.69            0.108          9.52    0.961\n## 2 E07000238 Wychavon    WM         2.65            0.106          6.29    4.73 \n## 3 E07000007 Wycombe     SE         0               0              0       0    \n## 4 E07000128 Wyre        NW         3.41            0.137          7.64    1.28 \n## 5 E07000239 Wyre Forest WM         2.84            0.114          7.45    0.262\n## 6 E06000014 York        YH        27.1             1.06          93.8     2.68 \n## # ℹ 2 more variables: nhb_return_2020 &lt;dbl&gt;, rsdg_2020 &lt;dbl&gt;\n\n\n2.2.1 Selecting variables\nOften, our analysis will not require every variable in a downloaded dataset, and we may wish to create a smaller analysis tibble. We may also wish to select individual variables from the tibble to apply functions to them without including the entire dataset.\nTo select one or more variable and return them as a new tibble, we can use the select function from tidyverse’s dplyr package.\nFor example, if we wanted to return the new homes bonus (nhb) for each local authority (the seventh column of the dataset), we can either select this based on the variable name or its location in the object:\n\n# Return the nhb_2020 variable from the csp_2020 object\nselect(csp_2020, nhb_2020)\n## # A tibble: 396 × 1\n##    nhb_2020\n##       &lt;dbl&gt;\n##  1   0.0881\n##  2   0.606 \n##  3   1.58  \n##  4   2.29  \n##  5   1.15  \n##  6   3.05  \n##  7   0     \n##  8   0     \n##  9   1.05  \n## 10   1.85  \n## # ℹ 386 more rows\n\n# Return the 7th variable of the csp_2020 object\nselect(csp_2020, 7)\n## # A tibble: 396 × 1\n##    nhb_2020\n##       &lt;dbl&gt;\n##  1   0.0881\n##  2   0.606 \n##  3   1.58  \n##  4   2.29  \n##  5   1.15  \n##  6   3.05  \n##  7   0     \n##  8   0     \n##  9   1.05  \n## 10   1.85  \n## # ℹ 386 more rows\n\nWe can select multiple variables and return them as a tibble by separating the variable names or numbers with commas:\n\n# Return three variables from the csp_2020 object\nselect(csp_2020, ons_code, authority, region)\n## # A tibble: 396 × 3\n##    ons_code  authority            region\n##    &lt;chr&gt;     &lt;chr&gt;                &lt;chr&gt; \n##  1 E07000223 Adur                 SE    \n##  2 E07000026 Allerdale            NW    \n##  3 E07000032 Amber Valley         EM    \n##  4 E07000224 Arun                 SE    \n##  5 E07000170 Ashfield             EM    \n##  6 E07000105 Ashford              SE    \n##  7 E31000001 Avon Fire            SW    \n##  8 E07000004 Aylesbury Vale       SE    \n##  9 E07000200 Babergh              EE    \n## 10 E09000002 Barking And Dagenham L     \n## # ℹ 386 more rows\n\nWhen selecting consecutive variables, a shortcut can be used that gives the first and last variable in the list, separated by a colon, :. The previous example can be carried out using the following code:\n\n# Return variables from ons_code upt to and including region\nselect(csp_2020, ons_code:region)\n## # A tibble: 396 × 3\n##    ons_code  authority            region\n##    &lt;chr&gt;     &lt;chr&gt;                &lt;chr&gt; \n##  1 E07000223 Adur                 SE    \n##  2 E07000026 Allerdale            NW    \n##  3 E07000032 Amber Valley         EM    \n##  4 E07000224 Arun                 SE    \n##  5 E07000170 Ashfield             EM    \n##  6 E07000105 Ashford              SE    \n##  7 E31000001 Avon Fire            SW    \n##  8 E07000004 Aylesbury Vale       SE    \n##  9 E07000200 Babergh              EE    \n## 10 E09000002 Barking And Dagenham L     \n## # ℹ 386 more rows\n\nThe select function can also be combined with a number of ‘selection helper’ functions that help us select variables based on naming conventions:\n\nstarts_with(\"xyz\") returns all variables with names beginning xyz\nends_with(\"xyz\") returns all variables with names ending xyz\ncontains(\"xyz\") returns all variables that have xyz within their name\n\nOr based on whether they match a condition:\n\nwhere(is.numeric) returns all variables that are classed as numeric\n\nFor a full list of these selection helpers, access the helpfile using ?tidyr_tidy_select.\nThe select function can also be used to remove variables from a tibble by adding a - before the variable name or number. For example:\n\n# Remove the ons_code variable \nselect(csp_2020, -ons_code)\n## # A tibble: 396 × 8\n##    authority            region sfa_2020 under_index_2020 ct_total_2020 nhb_2020\n##    &lt;chr&gt;                &lt;chr&gt;     &lt;dbl&gt;            &lt;dbl&gt;         &lt;dbl&gt;    &lt;dbl&gt;\n##  1 Adur                 SE         1.77           0.0708          6.53   0.0881\n##  2 Allerdale            NW         3.85           0.147           5.40   0.606 \n##  3 Amber Valley         EM         3.23           0.129           6.85   1.58  \n##  4 Arun                 SE         3.67           0.147          11.6    2.29  \n##  5 Ashfield             EM         4.08           0.156           6.42   1.15  \n##  6 Ashford              SE         2.88           0.115           7.92   3.05  \n##  7 Avon Fire            SW        16.0            0.437          27.8    0     \n##  8 Aylesbury Vale       SE         0              0               0      0     \n##  9 Babergh              EE         2.14           0.0857          5.77   1.05  \n## 10 Barking And Dagenham L         75.7            2.31           65.8    1.85  \n## # ℹ 386 more rows\n## # ℹ 2 more variables: nhb_return_2020 &lt;dbl&gt;, rsdg_2020 &lt;dbl&gt;\n\nThe select function returns the variable(s) in the form of a tibble (or dataset). However, some functions, such as basic summary functions, require data to be entered as a vector (a list of values). Tibbles with a single variable can be converted into a vector using the as.vector function, or we can use the base R approach to selecting a single variable. To return a single variable as a vector in base R, we can use the $ symbol between the data name and the variable to return:\n\ncsp_2020$nhb_2020\n\nIt is important to save any changes made to the existing dataset. This can be done using the write_csv function:\n\nwrite_csv(csp_2020, file = \"data/csp_2020_new.csv\")\n\n\n\n\n\n\n\nWarning\n\n\n\nWhen saving updated tibbles as files, use a different file name to the original raw data. Using the same name will overwrite the original file. We always want a copy of the raw data in case of any errors or issues.\n\n\n\n\n2.2.2 Filtering data\nThe filter function, from tidyverse’s dplyr package allows us to return subgroups of the data based on conditional statements. These conditional statements can include mathematical operators, e.g. &lt;= (less than or equal to), == (is equal to), and != (is not equal to), or can be based on conditional functions, e.g. is.na(variable) (is missing), between(a, b) (number lies between a and b).\nA more comprehensive list of conditional statements can be found in the help file using ?filter.\nFor example, to return the core spending power for local authorities in the North West region of England, we use the following:\n\n# Return rows where region is equal to NW from the csp_2020 object\nfilter(csp_2020, region == \"NW\")\n## # A tibble: 46 × 9\n##    ons_code  authority   region sfa_2020 under_index_2020 ct_total_2020 nhb_2020\n##    &lt;chr&gt;     &lt;chr&gt;       &lt;chr&gt;     &lt;dbl&gt;            &lt;dbl&gt;         &lt;dbl&gt;    &lt;dbl&gt;\n##  1 E07000026 Allerdale   NW         3.85            0.147          5.40   0.606 \n##  2 E07000027 Barrow-in-… NW         4.40            0.125          4.74   0.0111\n##  3 E06000008 Blackburn … NW        58.1             1.79          55.9    0.999 \n##  4 E06000009 Blackpool   NW        63.3             1.94          60.1    0.266 \n##  5 E08000001 Bolton      NW        84.2             2.73         115.     0.506 \n##  6 E07000117 Burnley     NW         5.90            0.171          7.16   0.694 \n##  7 E08000002 Bury        NW        42.3             1.44          89.0    0.458 \n##  8 E07000028 Carlisle    NW         3.34            0.134          7.49   1.49  \n##  9 E06000049 Cheshire E… NW        42.5             1.70         230.    11.2   \n## 10 E31000006 Cheshire F… NW        13.5             0.380         30.1    0     \n## # ℹ 36 more rows\n## # ℹ 2 more variables: nhb_return_2020 &lt;dbl&gt;, rsdg_2020 &lt;dbl&gt;\n\nMultiple conditional statements can be added to the same function by separating them with a comma ,. For example, to return a subgroup of local authorities in the North West region that had a settlement funding assessment (SFA) of over £40 million, we use the following:\n\nfilter(csp_2020, region == \"NW\", sfa_2020 &gt; 40)\n## # A tibble: 23 × 9\n##    ons_code  authority   region sfa_2020 under_index_2020 ct_total_2020 nhb_2020\n##    &lt;chr&gt;     &lt;chr&gt;       &lt;chr&gt;     &lt;dbl&gt;            &lt;dbl&gt;         &lt;dbl&gt;    &lt;dbl&gt;\n##  1 E06000008 Blackburn … NW         58.1             1.79          55.9    0.999\n##  2 E06000009 Blackpool   NW         63.3             1.94          60.1    0.266\n##  3 E08000001 Bolton      NW         84.2             2.73         115.     0.506\n##  4 E08000002 Bury        NW         42.3             1.44          89.0    0.458\n##  5 E06000049 Cheshire E… NW         42.5             1.70         230.    11.2  \n##  6 E06000050 Cheshire W… NW         56.3             2.12         196.    10.2  \n##  7 E10000006 Cumbria     NW        107.              3.56         248.     0.824\n##  8 E47000001 Greater Ma… NW         50.6             1.28          50.5    0    \n##  9 E06000006 Halton      NW         45.6             1.45          52.2    2.21 \n## 10 E08000011 Knowsley    NW         84.1             2.50          56.8    2.10 \n## # ℹ 13 more rows\n## # ℹ 2 more variables: nhb_return_2020 &lt;dbl&gt;, rsdg_2020 &lt;dbl&gt;\n\n\n\n2.2.3 Pipes\nWhen creating an analysis-ready dataset, we often want to combine functions such as select and filter. Previously, these would need to be carried out separately and a new object would need to be created or overwritten at each step, clogging up the environment.\nIn tidyverse, we combine multiple functions into a single process by using the ‘pipe’ symbol %&gt;%, which is read as ‘and then’ within the code.\n\n\n\n\n\n\nHelpful hint\n\n\n\nTo save time when piping, use the keyboard shortcut ctrl + shift + m for Windows, and Command + shift + m for Mac to create a pipe.\n\n\nFor example, we can return a list of local authority names from the North West region:\n\n# Using the csp_2020 object\ncsp_2020 %&gt;% \n  # Return just rows where region is equal to NW, and then\n  filter(region == \"NW\") %&gt;% \n  # Select just the authority variable\n  select(authority)\n## # A tibble: 46 × 1\n##    authority            \n##    &lt;chr&gt;                \n##  1 Allerdale            \n##  2 Barrow-in-Furness    \n##  3 Blackburn with Darwen\n##  4 Blackpool            \n##  5 Bolton               \n##  6 Burnley              \n##  7 Bury                 \n##  8 Carlisle             \n##  9 Cheshire East        \n## 10 Cheshire Fire        \n## # ℹ 36 more rows\n\n\n\n\n\n\n\nStyle tips\n\n\n\nWhen combining multiple functions within a process using pipes, it is good practice to start the code with the data and pipe that into the functions, rather than including it in the function itself.\n\n\nThe pipe can also be combined with the filter function to count the number of observations that lie within a subgroup:\n\n# Take the csp_2020 object\ncsp_2020 %&gt;% \n  # Return just rows where region is equal to NW, and then\n  filter(region == \"NW\") %&gt;% \n  # Count the number of rows\n  count()\n## # A tibble: 1 × 1\n##       n\n##   &lt;int&gt;\n## 1    46\n\n\n\n2.2.4 Creating new variables\nThe function mutate from tidyverse’s dplyr package allows us to add new variables to a dataset, either by manually specifying them or by creating them from existing variables. We can add multiple variables within the same function, separating each with a comma ,.\nFor example, we can create a new variables with the squared settlement funding assessment (sfa_2020), and another that recodes the council tax variable (ct_total_2020) into a categorical variable with three levels (low: below £5 million, medium: between £5 million and £15 million, and high: above £15 million):\n\n# Create a new object, csp_2020_new, starting with the object csp_2020\ncsp_2020_new &lt;- csp_2020 %&gt;% \n  # Add a new variable, sfa_2020_sq, by squaring the current sfa_2020 variable\n  mutate(sfa_2020_sq = sfa_2020 ^ 2,\n         # Create ct_2020-cat by cutting the ct_total_2020 object\n         ct_2020_cat = cut(ct_total_2020, \n                           # Create categories by cutting at 0, 5 and 15\n                           breaks = c(0, 5, 15, Inf),\n                           # Add labels to these new groups\n                           labels = c(\"Low\", \"Medium\", \"High\"),\n                           # Include the lowest break point in each group\n                           include_lowest = TRUE))\n\n\n\n\n\n\n\nHelpful hint\n\n\n\nThe c function takes a list of values separated by commas and returns them as a vector. This is useful when a function argument requires multiple values (and we don’t want R to move onto the next argument, which is what a comma inside functions usually means).\n\n\nThe mutate function is also useful for reclassifying variables when R did not correctly choose the variable type. In this example, the region variable is a grouping variable, but str(csp_2020) shows it is recognised by R as a character. Grouping variables in R are known as factors. To convert the region variable to a factor, we use the factor function inside mutate:\n\ncsp_2020_new &lt;- csp_2020 %&gt;% \n  # Add a new variable, sfa_2020_sq, by squaring the current sfa_2020 variable\n  mutate(sfa_2020_sq = sfa_2020 ^ 2,\n         # Create ct_2020-cat by cutting the ct_total_2020 object\n         ct_2020_cat = cut(ct_total_2020, \n                           # Create categories by cutting at 0, 5 and 15\n                           breaks = c(0, 5, 15, Inf),\n                           # Add labels to these new groups\n                           labels = c(\"Low\", \"Medium\", \"High\"),\n                           # Include the lowest break point in each group\n                           include_lowest = TRUE),\n         region_fct = factor(region, \n                             # To order the variable, use the levels argument\n                             levels = c(\"L\", \"NW\", \"NE\", \"YH\", \"WM\", \n                                        \"EM\", \"EE\", \"SW\", \"SE\")))\n\n# Check variables are correctly classified\nstr(csp_2020_new)\n## tibble [396 × 12] (S3: tbl_df/tbl/data.frame)\n##  $ ons_code        : chr [1:396] \"E07000223\" \"E07000026\" \"E07000032\" \"E07000224\" ...\n##  $ authority       : chr [1:396] \"Adur\" \"Allerdale\" \"Amber Valley\" \"Arun\" ...\n##  $ region          : chr [1:396] \"SE\" \"NW\" \"EM\" \"SE\" ...\n##  $ sfa_2020        : num [1:396] 1.77 3.85 3.23 3.67 4.08 ...\n##  $ under_index_2020: num [1:396] 0.0708 0.1465 0.1292 0.147 0.1557 ...\n##  $ ct_total_2020   : num [1:396] 6.53 5.4 6.85 11.61 6.42 ...\n##  $ nhb_2020        : num [1:396] 0.0881 0.6061 1.5786 2.2949 1.1547 ...\n##  $ nhb_return_2020 : num [1:396] 0 0 0 0 0 0 0 0 0 0 ...\n##  $ rsdg_2020       : num [1:396] 0 0.326 0 0 0 ...\n##  $ sfa_2020_sq     : num [1:396] 3.12 14.86 10.41 13.46 16.66 ...\n##  $ ct_2020_cat     : Factor w/ 3 levels \"Low\",\"Medium\",..: 2 2 2 2 2 2 3 NA 2 3 ...\n##  $ region_fct      : Factor w/ 9 levels \"L\",\"NW\",\"NE\",..: 9 2 6 9 6 9 8 9 7 1 ...\n\nAlthough there is no real ordering to the regions in England, attaching this order allows us to control how the are displayed in outputs. By default, character variables are displayed in alphabetical order. By adding the order to this variable, we will produce output where the reference region (London) will be displayed first, followed by regions from north to south.\n\n\nExercise 3\n\nHow many local authorities were included in the London region?\nGive three different ways that it would be possible to select all spend variables (sfa_2020, nhb_2020, etc.) from the CSP_2020 dataset.\nCreate a new tibble, em_2020, that just includes local authorities from the East Midlands (EM) region.\n\n\nHow many local authorities in the East Midlands had an SFA of between £5 and 10 million?\nCreate a new variable with the total overall spend in 2020 for local authorities in the East Midlands.",
     "crumbs": [
-      "<span class='chapter-number'>3</span>  <span class='chapter-title'>Session 2: Introduction to tidyverse and data wrangling</span>"
+      "<span class='chapter-number'>2</span>  <span class='chapter-title'>Introduction to tidyverse and data wrangling</span>"
     ]
   },
   {
     "objectID": "session3_notes.html",
     "href": "session3_notes.html",
-    "title": "4  Session 3: Data preparation and manipulation",
+    "title": "3  Data preparation and manipulation",
     "section": "",
-    "text": "4.1 Data wrangling and summarising",
+    "text": "3.1 Data wrangling and summarising",
     "crumbs": [
-      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Session 3: Data preparation and manipulation</span>"
+      "<span class='chapter-number'>3</span>  <span class='chapter-title'>Data preparation and manipulation</span>"
     ]
   },
   {
     "objectID": "session3_notes.html#data-wrangling-and-summarising",
     "href": "session3_notes.html#data-wrangling-and-summarising",
-    "title": "4  Session 3: Data preparation and manipulation",
+    "title": "3  Data preparation and manipulation",
     "section": "",
-    "text": "4.1.1 Combining two datasets\nWe may need to combine data from different files within R to perform an analysis. For example, in our case we have the core spending power for each year between 2015 and 2020. If our analysis required comparing this spending over the time period, we would need to combine these files together.\nBefore the data can be combined, it must be loaded into R. We will begin combining data from 2015 and 2016, then extend this to the entire period.\n\n# Return a list of files to copy from the working directory\nlist.files(path = \"data\")\n\n[1] \"CSP_2015.csv\"         \"CSP_2016.csv\"         \"CSP_2017.csv\"        \n[4] \"CSP_2018.csv\"         \"CSP_2019.csv\"         \"CSP_2020.csv\"        \n[7] \"CSP_long_201520.csv\"  \"data_description.pdf\"\n\n# Load the 2015 data and attach as an object\nCSP_2015 &lt;- read_csv(\"data/CSP_2015.csv\")\n\n# Load the 2016 data and attach as an object\nCSP_2016 &lt;- read_csv(\"data/CSP_2016.csv\")\n\nNext, we will combine these datasets by joining them using key variable(s) which are shared between them. In this case, each local authority has a unique identifier code (ons_code) and naming variable (authority), they also should have the same region listed across both datasets.\nIn Tidyverse, there is a family of ‘joining’ functions that combine two datasets at a time. The choice of function depends on which observations we wish to keep where the joining variables do not match between data. In this example, we expect all local authority values to be the same across years, so will use the full_join function.\nFor more information about different joining options, check the helpfile via ?full_join.\n\n# Create a new object by joining the two datasets\ncsp_201516 &lt;- full_join(CSP_2015, CSP_2016, \n                        # List the key joining variables (in speech marks)\n                        by = c(\"ons_code\", \"authority\", \"region\"))\n\n\n\n4.1.2 Joining multiple datasets\nR’s joining functions can only be applied to two datasets at a time. To combine all 6 core spending power datasets from 2015 to 2020 in this way would require a lot of repetitive coding (which we want to avoid where necessary).\nAn alternative approach would be to automate this process by using functional programming, implemented using tidyverse’s purrr package.\nThe first step of this process requires loading all csv files into R by repeatedly applying read_csv. This requires a list of file names from the working directory. The function list.files introduced earlier contains an optional argument, pattern which can be used to return files and folders that match a naming pattern. In this case, all csv files begin “CSP_20”, so to return this list of names from the data folder, we use the function:\n\ncsp_201520 &lt;- list.files(path = \"data\", pattern = \"CSP_20\")\n\nNext, we apply read_csv to each element of the list of file names. The function map allows us to do this and return a list of tibbles. As the data lies in a folder in the working directory, we must add this file path to the file names:\n\n# Return a list of files in the data folder containing CSP_20\ncsp_201520 &lt;- list.files(path = \"data\", pattern = \"CSP_20\") %&gt;% \n  # Add \"data/\" to each of these file names\n  paste0(\"data/\", .) %&gt;% \n  # Apply read_csv to every element of the list (of file names)\n  map(read_csv)\n\nFinally, we require a function that apply full_join iteratively to the list of tibbles and reduce it to a single tibble containing core spending powers for all years. The function that does this is reduce:\n\n# Return a list of files in the data folder containing CSP_20\ncsp_201520 &lt;- list.files(path = \"data\", pattern = \"CSP_20\") %&gt;% \n  # Add \"data/\" to each of these file names\n  paste0(\"data/\", .) %&gt;% \n  # Apply read_csv to every element of the list (of file names)\n  map(read_csv) %&gt;% \n  # Reduce the list of tibbles to a single object by iteratively joining\n  reduce(full_join, by = c(\"ons_code\", \"authority\", \"region\"))\n\n\n\n4.1.3 Transforming data\nThe dataset containing core spending power in England between 2015 and 2020 is currently in what is known as wide format. This means there is a variable per measure per year, making the object very wide.\nSome analyses and visualisations, particularly those used for temporal data, require a time variable in the dataset (for example, year). This requires the data to be in a different format, known as long format. Long format is where each row contains an observation per year (making the data much longer and narrower).\nTo convert data between wide and long formats, we can use the tidyverse functions pivot_longer and pivot_wider.\nThe first argument required by pivot_longer is the data we wish to transform. This is followed by the columns we wish to pivot (in this case, all variable other than the local authority codes, names, and regions). The next steps will depend on the format of data we wish to transform, format of the data we would like to generate, the values we need to include in the long dataset, and where this information will be extracted from.\nFor worked examples and a detailed explanation of different approaches that can be used to pivot data, access the vignette for these function by entering vignette(\"pivot\") into the R console.\nIn the core spending power example, the new dataset will contain a row per local authority per year. A new year variable will be created using the suffix of the original variable names, and the prefix of the original names (e.g. sfa) will be retained for the new variable names.\nUsing a combination of the helpfile (?pivot_longer) and vignette, the arguments required to convert this data are names_to, to specify the old variable names will be used in the new data, and names_pattern to define how the old variable names will be separated.\n\n# Create an object csp_long by pivotting csp_201520\ncsp_long &lt;- pivot_longer(csp_201520, \n                         # Pivot columns sfa_2015 up to and including rsdg_2020\n                         cols = sfa_2015:rsdg_2020,\n                         # Separate the old variable names in two, \n                         # keep the prefix as it was, and put the suffix \n                         # into a new variable, year\n                         names_to = c(\".value\", \"year\"),\n                         # The name prefix and suffix were separated by an _,\n                         # the prefix can take different lengths, the suffix \n                         # is always the final 4 characters\n                         names_pattern = \"(.*)_(....)\")\n\n# Check the new, long dataset's structure\nstr(csp_long)\n\ntibble [2,376 × 10] (S3: tbl_df/tbl/data.frame)\n $ ons_code   : chr [1:2376] \"E07000223\" \"E07000223\" \"E07000223\" \"E07000223\" ...\n $ authority  : chr [1:2376] \"Adur\" \"Adur\" \"Adur\" \"Adur\" ...\n $ region     : chr [1:2376] \"SE\" \"SE\" \"SE\" \"SE\" ...\n $ year       : chr [1:2376] \"2015\" \"2016\" \"2017\" \"2018\" ...\n $ sfa        : num [1:2376] 3.02 2.39 1.92 1.7 1.74 ...\n $ under_index: num [1:2376] 0.0234 0.0234 0.0248 0.039 0.0567 ...\n $ ct_total   : num [1:2376] 5.47 5.68 5.85 6.08 6.35 ...\n $ nhb        : num [1:2376] 0.652 0.767 0.553 0.202 0.126 ...\n $ nhb_return : num [1:2376] 0.00523 0.00374 0.00397 0 0 ...\n $ rsdg       : num [1:2376] 0 0 0 0 0 ...\n\n\nNotice that the new year variable is recognised as a character, not a numeric variable as we would like. This is because these values were taken from variable names, which R treats as characters. To fix this, we can use the mutate function to convert the new variable into a numeric variable.\nWe may also wish to calculate the total core spending power for each local authority per year to compare this over time:\n\n# Create a new object based on the long data\ncsp_long2 &lt;- mutate(csp_long, \n                    # Convert year to a numeric variable\n                    year = as.numeric(year),\n                    # Create a new total spend variable\n                    total_spend = sfa + under_index + ct_total + nhb + \n                      nhb_return + rsdg)\n\nAfter manipulating and transforming the data into the format we need for analysis and visualisation, we can save this object to reload later. Tibbles and data frame objects can be saved as CSV files using the write_csv function. Remember to save the data with a different name than the raw data to avoid overwriting these files.\n\nwrite_csv(csp_long2, file = \"data/CSP_long_201520.csv\")\n\n\n\n4.1.4 Summary tables\nSummary tables can be created using the summarise function. This returns tables in a tibble format, meaning they can easily be customised and exported as CSV files (using the write_csv function).\nThe summarise function is set up similarly to the mutate function: summaries are listed and given variable names, separated by a comma. The difference between these functions is that summarise collapses the tibble into a single summary row, and the new variables must be created using a summary function.\nCommon examples of summary functions include:\n\nmean\nmedian\nrange (gives the minimum and maximum values)\nmin\nmax\nIQR (interquartile range, gives the range of the middle 50% of the sample)\nsd (standard deviation, a measure of the spread when data are normally distributed)\nsum\nn (counts the number of rows the summary is calculated from)\n\nFor a full list of compatible summary functions, view the helpfile ?summarise.\nIf we wanted to summarise the total core spending power between 2015 and 2020 across all local authorities, we can apply summarise to the long format data from the previous section:\n\nsummarise(csp_long2, \n          # Return sum of the total_spend variable\n          total_spend_all = sum(total_spend),\n          # Return the mean total spend\n          mean_total_spend = mean(total_spend),\n          # Return the median total spend\n          median_total_spend = median(total_spend),\n          # Return the 10th percentile (the value that 10% of the sample lies below)\n          quantile10_total_spend = quantile(total_spend, 0.1),\n          # Count the number of rows that have been summarised\n          total_obs = n())\n\n# A tibble: 1 × 5\n  total_spend_all mean_total_spend median_total_spend quantile10_total_spend\n            &lt;dbl&gt;            &lt;dbl&gt;              &lt;dbl&gt;                  &lt;dbl&gt;\n1         263484.             111.               17.6                   8.34\n# ℹ 1 more variable: total_obs &lt;int&gt;\n\n\nThe summarise function can be used to produce grouped summaries. This is done by first grouping the data with the group_by function. For example, if we wished to produce a summary table with a row per local authority, summarising the total spending between 2015 and 2020, we would use the following:\n\ncsp_long2 %&gt;% \n  # Group by the local authority's unique identifiers\n  group_by(ons_code, authority) %&gt;% \n  # Total spend 2015 - 2020\n  summarise(total_spend_all = sum(total_spend),\n            # Mean spend 2015 - 2020  \n            mean_total_spend = mean(total_spend),\n            # Median spend 2015 - 2020\n            median_total_spend = median(total_spend),\n            # 10th percentile of total spend\n            quantile10_total_spend = quantile(total_spend, 0.1),\n            # Number of rows summarised over\n            total_obs = n()) %&gt;%\n  # Remove grouping structure\n  ungroup()\n\n# A tibble: 396 × 7\n   ons_code  authority       total_spend_all mean_total_spend median_total_spend\n   &lt;chr&gt;     &lt;chr&gt;                     &lt;dbl&gt;            &lt;dbl&gt;              &lt;dbl&gt;\n 1 -         Greater London…          12416.           2069.              2022. \n 2 E06000001 Hartlepool                 485.             80.8               80.3\n 3 E06000002 Middlesbrough              711.            119.               118. \n 4 E06000003 Redcar And Cle…            660.            110.               109. \n 5 E06000004 Stockton-on-Te…            832.            139.               138. \n 6 E06000005 Darlington                 474.             79.0               78.5\n 7 E06000006 Halton                     598.             99.7               99.0\n 8 E06000007 Warrington                 806.            134.               133. \n 9 E06000008 Blackburn with…            692.            115.               115. \n10 E06000009 Blackpool                  750.            125.               124. \n# ℹ 386 more rows\n# ℹ 2 more variables: quantile10_total_spend &lt;dbl&gt;, total_obs &lt;int&gt;\n\n\n\n\n\n\n\n\nWarning\n\n\n\nWhenever using group_by, make sure to ungroup the data before proceeding. The grouping structure can be large and slow analysis down, or may interact with other functions to produce unexpected analyses.\n\n\n\n\nExercise 4\n\nCreate a data frame with the minimum, maximum and median total spend per year for each region.\nProduce a frequency table containing the number and percentage of local authorities in each region.\nConvert the data object csp_long2 back into wide format, with one row per local authority and one variable per total spend per year (HINT: start by selecting only the variables you need from the long data frame). Use the help file ?pivot_wider and vignette(\"pivot\") for more hints.\nUsing your new wide data frame, calculate the difference in total spending for each local authority between 2015 and 2020. How many local authorities have had an overall reduction in spending since 2015?",
+    "text": "3.1.1 Combining two datasets\nWe may need to combine data from different files within R to perform an analysis. For example, in our case we have the core spending power for each year between 2015 and 2020. If our analysis required comparing this spending over the time period, we would need to combine these files together.\nBefore the data can be combined, it must be loaded into R. We will begin combining data from 2015 and 2016, then extend this to the entire period.\n\n# Return a list of files to copy from the working directory\nlist.files(path = \"data\")\n\n[1] \"CSP_2015.csv\"         \"CSP_2016.csv\"         \"CSP_2017.csv\"        \n[4] \"CSP_2018.csv\"         \"CSP_2019.csv\"         \"CSP_2020.csv\"        \n[7] \"CSP_long_201520.csv\"  \"data_description.pdf\"\n\n# Load the 2015 data and attach as an object\nCSP_2015 &lt;- read_csv(\"data/CSP_2015.csv\")\n\n# Load the 2016 data and attach as an object\nCSP_2016 &lt;- read_csv(\"data/CSP_2016.csv\")\n\nNext, we will combine these datasets by joining them using key variable(s) which are shared between them. In this case, each local authority has a unique identifier code (ons_code) and naming variable (authority), they also should have the same region listed across both datasets.\nIn Tidyverse, there is a family of ‘joining’ functions that combine two datasets at a time. The choice of function depends on which observations we wish to keep where the joining variables do not match between data. In this example, we expect all local authority values to be the same across years, so will use the full_join function.\nFor more information about different joining options, check the helpfile via ?full_join.\n\n# Create a new object by joining the two datasets\ncsp_201516 &lt;- full_join(CSP_2015, CSP_2016, \n                        # List the key joining variables (in speech marks)\n                        by = c(\"ons_code\", \"authority\", \"region\"))\n\n\n\n3.1.2 Joining multiple datasets\nR’s joining functions can only be applied to two datasets at a time. To combine all 6 core spending power datasets from 2015 to 2020 in this way would require a lot of repetitive coding (which we want to avoid where necessary).\nAn alternative approach would be to automate this process by using functional programming, implemented using tidyverse’s purrr package.\nThe first step of this process requires loading all csv files into R by repeatedly applying read_csv. This requires a list of file names from the working directory. The function list.files introduced earlier contains an optional argument, pattern which can be used to return files and folders that match a naming pattern. In this case, all csv files begin “CSP_20”, so to return this list of names from the data folder, we use the function:\n\ncsp_201520 &lt;- list.files(path = \"data\", pattern = \"CSP_20\")\n\nNext, we apply read_csv to each element of the list of file names. The function map allows us to do this and return a list of tibbles. As the data lies in a folder in the working directory, we must add this file path to the file names:\n\n# Return a list of files in the data folder containing CSP_20\ncsp_201520 &lt;- list.files(path = \"data\", pattern = \"CSP_20\") %&gt;% \n  # Add \"data/\" to each of these file names\n  paste0(\"data/\", .) %&gt;% \n  # Apply read_csv to every element of the list (of file names)\n  map(read_csv)\n\nFinally, we require a function that apply full_join iteratively to the list of tibbles and reduce it to a single tibble containing core spending powers for all years. The function that does this is reduce:\n\n# Return a list of files in the data folder containing CSP_20\ncsp_201520 &lt;- list.files(path = \"data\", pattern = \"CSP_20\") %&gt;% \n  # Add \"data/\" to each of these file names\n  paste0(\"data/\", .) %&gt;% \n  # Apply read_csv to every element of the list (of file names)\n  map(read_csv) %&gt;% \n  # Reduce the list of tibbles to a single object by iteratively joining\n  reduce(full_join, by = c(\"ons_code\", \"authority\", \"region\"))\n\n\n\n3.1.3 Transforming data\nThe dataset containing core spending power in England between 2015 and 2020 is currently in what is known as wide format. This means there is a variable per measure per year, making the object very wide.\nSome analyses and visualisations, particularly those used for temporal data, require a time variable in the dataset (for example, year). This requires the data to be in a different format, known as long format. Long format is where each row contains an observation per year (making the data much longer and narrower).\nTo convert data between wide and long formats, we can use the tidyverse functions pivot_longer and pivot_wider.\nThe first argument required by pivot_longer is the data we wish to transform. This is followed by the columns we wish to pivot (in this case, all variable other than the local authority codes, names, and regions). The next steps will depend on the format of data we wish to transform, format of the data we would like to generate, the values we need to include in the long dataset, and where this information will be extracted from.\nFor worked examples and a detailed explanation of different approaches that can be used to pivot data, access the vignette for these function by entering vignette(\"pivot\") into the R console.\nIn the core spending power example, the new dataset will contain a row per local authority per year. A new year variable will be created using the suffix of the original variable names, and the prefix of the original names (e.g. sfa) will be retained for the new variable names.\nUsing a combination of the helpfile (?pivot_longer) and vignette, the arguments required to convert this data are names_to, to specify the old variable names will be used in the new data, and names_pattern to define how the old variable names will be separated.\n\n# Create an object csp_long by pivotting csp_201520\ncsp_long &lt;- pivot_longer(csp_201520, \n                         # Pivot columns sfa_2015 up to and including rsdg_2020\n                         cols = sfa_2015:rsdg_2020,\n                         # Separate the old variable names in two, \n                         # keep the prefix as it was, and put the suffix \n                         # into a new variable, year\n                         names_to = c(\".value\", \"year\"),\n                         # The name prefix and suffix were separated by an _,\n                         # the prefix can take different lengths, the suffix \n                         # is always the final 4 characters\n                         names_pattern = \"(.*)_(....)\")\n\n# Check the new, long dataset's structure\nstr(csp_long)\n\ntibble [2,376 × 10] (S3: tbl_df/tbl/data.frame)\n $ ons_code   : chr [1:2376] \"E07000223\" \"E07000223\" \"E07000223\" \"E07000223\" ...\n $ authority  : chr [1:2376] \"Adur\" \"Adur\" \"Adur\" \"Adur\" ...\n $ region     : chr [1:2376] \"SE\" \"SE\" \"SE\" \"SE\" ...\n $ year       : chr [1:2376] \"2015\" \"2016\" \"2017\" \"2018\" ...\n $ sfa        : num [1:2376] 3.02 2.39 1.92 1.7 1.74 ...\n $ under_index: num [1:2376] 0.0234 0.0234 0.0248 0.039 0.0567 ...\n $ ct_total   : num [1:2376] 5.47 5.68 5.85 6.08 6.35 ...\n $ nhb        : num [1:2376] 0.652 0.767 0.553 0.202 0.126 ...\n $ nhb_return : num [1:2376] 0.00523 0.00374 0.00397 0 0 ...\n $ rsdg       : num [1:2376] 0 0 0 0 0 ...\n\n\nNotice that the new year variable is recognised as a character, not a numeric variable as we would like. This is because these values were taken from variable names, which R treats as characters. To fix this, we can use the mutate function to convert the new variable into a numeric variable.\nWe may also wish to calculate the total core spending power for each local authority per year to compare this over time:\n\n# Create a new object based on the long data\ncsp_long2 &lt;- mutate(csp_long, \n                    # Convert year to a numeric variable\n                    year = as.numeric(year),\n                    # Create a new total spend variable\n                    total_spend = sfa + under_index + ct_total + nhb + \n                      nhb_return + rsdg)\n\nAfter manipulating and transforming the data into the format we need for analysis and visualisation, we can save this object to reload later. Tibbles and data frame objects can be saved as CSV files using the write_csv function. Remember to save the data with a different name than the raw data to avoid overwriting these files.\n\nwrite_csv(csp_long2, file = \"data/CSP_long_201520.csv\")\n\n\n\n3.1.4 Summary tables\nSummary tables can be created using the summarise function. This returns tables in a tibble format, meaning they can easily be customised and exported as CSV files (using the write_csv function).\nThe summarise function is set up similarly to the mutate function: summaries are listed and given variable names, separated by a comma. The difference between these functions is that summarise collapses the tibble into a single summary row, and the new variables must be created using a summary function.\nCommon examples of summary functions include:\n\nmean\nmedian\nrange (gives the minimum and maximum values)\nmin\nmax\nIQR (interquartile range, gives the range of the middle 50% of the sample)\nsd (standard deviation, a measure of the spread when data are normally distributed)\nsum\nn (counts the number of rows the summary is calculated from)\n\nFor a full list of compatible summary functions, view the helpfile ?summarise.\nIf we wanted to summarise the total core spending power between 2015 and 2020 across all local authorities, we can apply summarise to the long format data from the previous section:\n\nsummarise(csp_long2, \n          # Return sum of the total_spend variable\n          total_spend_all = sum(total_spend),\n          # Return the mean total spend\n          mean_total_spend = mean(total_spend),\n          # Return the median total spend\n          median_total_spend = median(total_spend),\n          # Return the 10th percentile (the value that 10% of the sample lies below)\n          quantile10_total_spend = quantile(total_spend, 0.1),\n          # Count the number of rows that have been summarised\n          total_obs = n())\n\n# A tibble: 1 × 5\n  total_spend_all mean_total_spend median_total_spend quantile10_total_spend\n            &lt;dbl&gt;            &lt;dbl&gt;              &lt;dbl&gt;                  &lt;dbl&gt;\n1         263484.             111.               17.6                   8.34\n# ℹ 1 more variable: total_obs &lt;int&gt;\n\n\nThe summarise function can be used to produce grouped summaries. This is done by first grouping the data with the group_by function. For example, if we wished to produce a summary table with a row per local authority, summarising the total spending between 2015 and 2020, we would use the following:\n\ncsp_long2 %&gt;% \n  # Group by the local authority's unique identifiers\n  group_by(ons_code, authority) %&gt;% \n  # Total spend 2015 - 2020\n  summarise(total_spend_all = sum(total_spend),\n            # Mean spend 2015 - 2020  \n            mean_total_spend = mean(total_spend),\n            # Median spend 2015 - 2020\n            median_total_spend = median(total_spend),\n            # 10th percentile of total spend\n            quantile10_total_spend = quantile(total_spend, 0.1),\n            # Number of rows summarised over\n            total_obs = n()) %&gt;%\n  # Remove grouping structure\n  ungroup()\n\n# A tibble: 396 × 7\n   ons_code  authority       total_spend_all mean_total_spend median_total_spend\n   &lt;chr&gt;     &lt;chr&gt;                     &lt;dbl&gt;            &lt;dbl&gt;              &lt;dbl&gt;\n 1 -         Greater London…          12416.           2069.              2022. \n 2 E06000001 Hartlepool                 485.             80.8               80.3\n 3 E06000002 Middlesbrough              711.            119.               118. \n 4 E06000003 Redcar And Cle…            660.            110.               109. \n 5 E06000004 Stockton-on-Te…            832.            139.               138. \n 6 E06000005 Darlington                 474.             79.0               78.5\n 7 E06000006 Halton                     598.             99.7               99.0\n 8 E06000007 Warrington                 806.            134.               133. \n 9 E06000008 Blackburn with…            692.            115.               115. \n10 E06000009 Blackpool                  750.            125.               124. \n# ℹ 386 more rows\n# ℹ 2 more variables: quantile10_total_spend &lt;dbl&gt;, total_obs &lt;int&gt;\n\n\n\n\n\n\n\n\nWarning\n\n\n\nWhenever using group_by, make sure to ungroup the data before proceeding. The grouping structure can be large and slow analysis down, or may interact with other functions to produce unexpected analyses.\n\n\n\n\nExercise 4\n\nCreate a data frame with the minimum, maximum and median total spend per year for each region.\nProduce a frequency table containing the number and percentage of local authorities in each region.\nConvert the data object csp_long2 back into wide format, with one row per local authority and one variable per total spend per year (HINT: start by selecting only the variables you need from the long data frame). Use the help file ?pivot_wider and vignette(\"pivot\") for more hints.\nUsing your new wide data frame, calculate the difference in total spending for each local authority between 2015 and 2020. How many local authorities have had an overall reduction in spending since 2015?",
     "crumbs": [
-      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Session 3: Data preparation and manipulation</span>"
+      "<span class='chapter-number'>3</span>  <span class='chapter-title'>Data preparation and manipulation</span>"
     ]
   },
   {
     "objectID": "session4_notes.html",
     "href": "session4_notes.html",
-    "title": "5  Session 4: Data visualisation with ggplot2",
+    "title": "4  Data visualisation with ggplot2",
     "section": "",
-    "text": "5.1 Data visualisation with ggplot2\nData visualisation is a powerful tool with multiple important uses. First, visualisations allow us to explore the data, identify potential outliers and errors, or check that the variables behave in the way we would expect them to if they had been recorded correctly. Visualisations can also be used as an analysis tool, allowing us to identify trends in the data or differences between groups. Finally, visualisations can help to convey messages to an audience in a clear, concise way that is often more powerful than presenting them using numbers or text. In some cases, data visualisations can show results so clearly that further analysis is arguably unnecessary.",
+    "text": "4.1 Data visualisation with ggplot2\nData visualisation is a powerful tool with multiple important uses. First, visualisations allow us to explore the data, identify potential outliers and errors, or check that the variables behave in the way we would expect them to if they had been recorded correctly. Visualisations can also be used as an analysis tool, allowing us to identify trends in the data or differences between groups. Finally, visualisations can help to convey messages to an audience in a clear, concise way that is often more powerful than presenting them using numbers or text. In some cases, data visualisations can show results so clearly that further analysis is arguably unnecessary.",
     "crumbs": [
-      "<span class='chapter-number'>5</span>  <span class='chapter-title'>Session 4: Data visualisation with ggplot2</span>"
+      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Data visualisation with ggplot2</span>"
     ]
   },
   {
     "objectID": "session4_notes.html#data-visualisation-with-ggplot2",
     "href": "session4_notes.html#data-visualisation-with-ggplot2",
-    "title": "5  Session 4: Data visualisation with ggplot2",
+    "title": "4  Data visualisation with ggplot2",
     "section": "",
-    "text": "5.1.1 Choosing the most appropriate visualisation\nThe most appropriate choice of visualisation will depend on the type of variable(s) we wish to display, the number of variables and the message we are trying to disseminate. Common plots used to display combinations of different types of data are given in following table:\n\n\n\nTable 6.1: Common visualisations by number and type of variables, with ggplot2 geom\n\n\nR is very flexible when it comes to visualising data and contains a wide variety of options to customise graphs. This section will focus on the Tidyverse package ggplot2 and introduce some of the more commonly used graphical functions and parameters but is by no means comprehensive.\n\n\n5.1.2 The ggplot2 package\nThe ggplot2 package implements the ‘grammar of graphics’, a system that aims to describe all statistical graphics in terms of their components or layers. All graphics can be broken down into the same components: the data, a coordinate system (or plot area) and some visual markings of the data. More complex plots may have additional layers but all must contain these three.\nFor example, in the csp_2020 dataset, we may wish to explore the relationship between the settlement funding assessment (sfa_2020) and council tax total (ct_total_2020) spending for each local authority. To visualise the relationship between two continuous numeric variables, a scatterplot would be most appropriate.\nWithin the ggplot2 package, we first use the ggplot function to create a coordinate system (a blank plot space) that we can add layers and objects to. Within this function, we specify the data that we wish to display on the coordinate system:\n\nggplot(data = csp_2020)\n\nTo add information to this graph, we add a geom layer: a visual representation of the data. There are many different geom objects built into the ggplot2 package (begin typing ?geom into the console to see a list). The geom_point function is used to create scatterplots.\nEach geom object must contain a mapping argument, coupled with the aes function which defines how the variables in the dataset are visualised. In this case, we use the aes function to specify the variables on the x and y axes but it can also be used to set the colour, size or symbol based on variable values.\n\n\n\n\n\n\nWarning\n\n\n\nAlthough ggplot2 is a tidyverse package, it uses a different method of piping to the other packages. Use the + symbol to add an extra layer when working in ggplot.\n\n\n\n# Generate the chart area and specify the data\nggplot(data = csp_2020) + \n  # Add points, defined by sfa_2020 and ct_total_2020 \n  geom_point(mapping = aes(x = sfa_2020, y = ct_total_2020))\n\n\n\n\n\n\n\n\nThe resulting scatterplot shows a positive association between the SFA and council tax spending in English local authorities during 2020. We can identify an outlier in the top right corner of the graph. Before proceding, we want to ensure that this observation is an outlier and not an error to be removed from the data. We can use the filter function to return the name of the local authority that matches these values:\n\n# Using the data csp_2020\ncsp_2020 %&gt;% \n  # Return just rows where sfa_2020 is over 1000, and then\n  filter(sfa_2020 &gt; 1000) %&gt;% \n  # Return the authority names\n  select(authority)\n## # A tibble: 1 × 1\n##   authority               \n##   &lt;chr&gt;                   \n## 1 Greater London Authority\n\nThis outlier is the Greater London Authority which is a combination of local authorities that are already included in the dataset. Including this observation would introduce duplicates into the analysis, and so this observation should be removed to avoid invalid results. To remove the Greater London Authority observation, we can combine the filter and ggplot functions using pipes:\n\n# Take the csp_2020 data, and then\ncsp_2020 %&gt;% \n  # Return all rows where authority is not equal to Greater London Authority,\n  # and then\n  filter(authority != \"Greater London Authority\") %&gt;% \n  # Generate a plot\n  ggplot( ) + \n  # Add visual markings based on the data\n  geom_point(aes(x = sfa_2020, y = ct_total_2020))\n\n\n\n\n\n\n\n\nGraphs appear in the plot window in RStudio and can be opened in a new window using the  icon. Graphs in this window can also be copied and pasted into other documents using the  icon and selecting Copy to clipboard.\nNew graphs will replace existing ones in this window but all graphs created in the current session of R can be explored using the  icons.\nGraphs can be stored as objects using the &lt;- symbol. These objects can then be saved as picture or PDF files using the ggsave function:\n\n# Create a new object, beginning from csp_2020, and then\nsfa_ct_plot &lt;- csp_2020 %&gt;% \n  # Return all rows where authority name is not GLA, and then\n  filter(authority != \"Greater London Authority\") %&gt;%\n  # Create a ggplot area\n  ggplot( ) + \n  # Add visual markings from the data\n  geom_point(aes(x = sfa_2020, y = ct_total_2020))\n\n# Save the graph object as a png file\nggsave(sfa_ct_plot, filename = \"sfa_ct_plot.png\")\n\n\n\nExercise 5\n\nCreate a new data object containing the 2020 CSP data without the Greater London Authority observation. Name this data frame csp_nolon_2020.\nUsing the csp_nolon_2020 data, create a data visualisation to check the distribution (or shape) of the SFA variable.\nBased on the visualisation above, create a summary table for the SFA variable containing the minimum and maximum, and appropriate measures of the centre/average and spread.\n\n\n\n5.1.3 Customising visualisations\nAdditional variables can be included into a visualisation within the mapping argument of a geom function. For example, we could explore the relationship between SFA and council tax across regions by colouring points based on the region:\n\nggplot(data = csp_nolon_2020) + \n  geom_point(aes(x = sfa_2020, y = ct_total_2020, colour = region))\n\n\n\n\n\n\n\n\nBy default, R uses alphabetical ordering for character variables. To change this order, the variable must be converted into a factor. A factor is how R recognises categorical variables. For example, to order the region legend so that the London region appears first, followed by other regions from north to south, we would use the mutate function, combined with the factor function to create a new, ordered variable. The argument levels allows us to specify the order of categories in a factor:\n\ncsp_nolon_2020_new &lt;- csp_nolon_2020 %&gt;% \n  mutate(region_fct = factor(region, \n                             levels = c(\"L\", \"NW\", \"NE\", \"YH\", \"WM\", \n                                        \"EM\", \"EE\", \"SW\", \"SE\")))\n\nggplot(data = csp_nolon_2020_new) + \n  geom_point(aes(x = sfa_2020, y = ct_total_2020, colour = region_fct))\n\n\n\n\n\n\n\n\nArguments that can be adjusted within geoms include:\n\ncolour: change the colour (if point or line) or outline (if bar or histogram) of the markings\nsize: change the size of the markings (if point used)\nshape: change the shape of markings (for points)\nfill: Change the colour of bars in bar charts or histograms\nlinewidth: Change the line width\nlinetype: Choose the type of line (e.g. dotted)\nalpha: Change the transparency of a visualisation\n\n\n\n\n\n\n\nWarning\n\n\n\nAlthough it may be tempting to add many variables to the same visualisation, be sure that you are not overcomplicating the graph and losing important messages. It is better to have multiple, clear but simpler visualisations, than fewer confusing ones.\n\n\nAesthetic properties of the geom object may also be set manually, outside of the aes function, using the same argument but with a shared value rather than a variable. For example:\n\nggplot(csp_nolon_2020_new) + \n  geom_point(aes(x = sfa_2020, y = ct_total_2020), \n             # Adding the colour outside of the aes wrapper as it is not \n             # from the data\n             colour = \"blue\")\n\n\n\n\n\n\n\n\n\n\nExercise 6\n\nWhat is the problem with the following code? Fix the code to change the shape of all the points.\n\n\nggplot(csp_nolon_2020) + \n  geom_point(aes(x = sfa_2020, y = ct_total_2020, shape = \"*\"))\n\n\nAdd a line of best fit to the scatterplot showing the relationship between SFA and council tax total (hint: use ?geom_smooth).\nAdd a line of best fit for each region (hint: make each line a different colour).\n\n\n\n5.1.4 Scale functions\n\n5.1.4.1 Customising axes\nScale functions allow us to customise aesthetics defined in geom objects such as colours and axes labels. They take the form scale_'aesthetic to customise'_'scale of variable’. For example, scale_x_continuous customises the x axis when the variable is continuous, and scale_x_discrete can be used where the variable is discrete or categorical. Arguments to customise the x or y axes include:\n\nname = to change the axis title\nlimits = c(...) sets the axis limits\nbreaks = c(...) defines tick marks\nlabels = c(...) attaches labels to break values\ntrans = transforms the scale that the axis is shown on.\n\n\nggplot(csp_nolon_2020_new) + \n  # Scatterplot with SFA on x, CT on y, and colour by region\n  geom_point(aes(x = sfa_2020, y = ct_total_2020, colour = region_fct)) +\n  # Add title to x axis\n  scale_x_continuous(name = \"Settlement funding assessment (£ millions)\", \n                     # Set x axis limits from 0 to 600\n                     limits = c(0, 600), \n                     # Set tick marks ever 200\n                     breaks = c(0, 200, 400, 600)) +\n  # Add title to y axis\n  scale_y_continuous(name = \"Council tax (£ millions)\", \n                     # Show the y axis on a square root scale\n                     trans = \"sqrt\")\n\n\n\n\n\n\n\n\nA common transformation that can be useful to explore the relationship between variables which have clusters of smaller values is the logarithm (or log) function. Applying a log function to a scale increases the difference between smaller values (stretching out these clusters), while reducing the difference between the smaller values and largest ones. Log functions can only be applied to positive, non-zero numbers. Where a sample may contain zeroes, the transformation log1p can be applied instead which adds 1 to each value before applying the log transformation (\\(log(n + 1)\\)):\n\nggplot(csp_nolon_2020_new) + \n  geom_point(aes(x = sfa_2020, y = ct_total_2020, colour = region_fct)) +\n  scale_x_continuous(name = \"SFA\", limits = c(0, 600), \n                     breaks = c(0, 200, 400, 600),\n                     trans = \"log1p\") +\n  scale_y_continuous(name = \"Council tax\", \n                     trans = \"log1p\") \n\n\n\n\n\n\n\n\nWe can now clearly see the strong positive association between SFA and council tax spending in local authorities with lower values of this without losing any information.\n\n\n5.1.4.2 6.3.2 Customising colour scales\nThere are a wide range of options for customising the colour aesthetics of geoms. These include pre-defined colour palettes, such as scale_colour_viridis_c for continuous variables, or scale_colour_viridis_d for discrete or categorical variables. Viridis colour palettes are designed to be colourblind friendly and print well in grey scale. There are also many R packages containing colour palettes for different scenarios.\nColour palettes can be created manually for categorical variables using the scale_colour_manual function. Here, the argument values allows us to specify a colour per category.\n\n\n\n\n\n\nStyle tip\n\n\n\nR contains a list of 657 pre-programmed colours that can be used to create palettes (run colours() in the console for a full list).\nHexadecimal codes can also be included instead in the form #rrggbb (where rr (red), gg (green), and bb (blue) are numbers between 00 and 99 giving the level of intensity of each colour).\n\n\nWhere a colour palette will be used across multiple plots, defining this list of colours as a vector and then entering this into scale_colour_manual will reduce repetition:\n\nregion_palette &lt;- c(\"aquamarine2\", \"blue\", \"chartreuse2\", \"coral\", \"orchid\",\n                    \"firebrick\", \"gold3\", \"violetred\", \"grey50\")\n\nggplot(csp_nolon_2020_new) + \n  geom_point(aes(x = sfa_2020, y = ct_total_2020, colour = region_fct)) +\n  scale_x_continuous(name = \"SFA\",  trans = \"log1p\") +\n  scale_y_continuous(name = \"Council tax\", trans = \"log1p\") +\n  scale_colour_manual(name = \"Region\", values = region_palette)\n\n\n\n\n\n\n\n\nPalettes can also be created using gradients with the scale_colour_gradient function, that specifies a two colour gradient from low to high, scale_colour_gradient2 that creates a diverging gradient using low, medium, and high colours, and scale_colour_gradientn that creates an n-colour gradient.\n\n\n\n5.1.5 Other labelling functions\nAlthough axis and legend labels can be updated within scale functions, the labs function exist as an alternative. This function also allows us to add titles and subtitles to visualisations:\n\nlabs(x = “x-axis name”, y = “y-axis name”,\n    colour = “Grouping variable name”, title = “Main title”,\n    subtitle = “Subtitle”, caption = “Footnote”)\n\nThe annotate function allows us to add text and other objects to a ggplot object. For example:\n\nannotate(“text”, x = 50, y = 200, label = “Text label here”)\n\nAdds “Text label here” to a plot at the coordinates (50, 200) on a graph, and\n\nannotate(“rect”, xmin = 0, xmax = 10, ymin = 20, ymax = 50, alpha = 0.2)\n\nadds a rectangle to the graph.\n\n\n5.1.6 Theme functions\nThe theme function modifies non-data components of the visualisation. For example, the legend position, label fonts, the graph background, and gridlines. There are many options that exist within the theme function (use ?theme to list them all).\n\n\n\n\n\n\nNote\n\n\n\nMany of the elements that can be customised within the theme function require an element wrapper. This wrapper is determined by the type of object we are customising (e.g. element_text when customising text, element_rect when customising a background, element_blank to remove something). Check ?theme for more information.\n\n\nOne of the most common theme options is legend.position which can be used to move the legend to the top or bottom of the graph space (legend.position = “top” or legend.position = “bottom”) or remove the legend completely (legend.position = “none”).\nggplot also contains a number of pre-defined ‘complete themes’ which change all non-data elements of the plot to a programmed default. For example theme_void removes all gridlines and axes, theme_light changes the graph background white and the gridlines and axes light grey:\n\nggplot(csp_nolon_2020_new) + \n  geom_point(aes(x = sfa_2020, y = ct_total_2020, colour = region_fct)) +\n  scale_x_continuous(name = \"SFA\",  trans = \"log1p\") +\n  scale_y_continuous(name = \"Council tax\", trans = \"log1p\") +\n  scale_colour_manual(name = \"Region\", values = region_palette) + \n  theme_void( )\n\n\n\n\n\n\n\n\nOne benefit of using themes is that all visualisations will be consistent in terms of colour scheme, font size and gridlines. Although there are pre-built themes, we are able to create our own and save them as functions. These can then be used in place of R’s themes. For example:\n\n# Create a theme function\ntheme_intro_course &lt;- function( ) {\n  # Move the legend to the bottom \n  theme(legend.position =  \"bottom\",\n        # Make the axis labels font size 10\n        axis.text = element_text(size = 10),\n        # Make the axis titles font size 15\n        axis.title = element_text(size = 15),\n        # Make the graph title font size 20\n        title = element_text(size = 20),\n        # Make the plot area white with a grey outline\n        panel.background = element_rect(fill = \"white\", colour = \"grey50\"))\n}\n\nThe function theme_intro_course can be added to the end of any visualisation and will move the legend to the bottom of the graph, change the axis text to size 10, the axis titles to size 15, the plot title to size 20, and the graph background to white with a grey outline:\n\nggplot(csp_nolon_2020_new) + \n  geom_point(aes(x = sfa_2020, y = ct_total_2020, colour = region_fct)) +\n  scale_x_continuous(name = \"SFA\",  trans = \"log1p\") +\n  scale_y_continuous(name = \"Council tax\", trans = \"log1p\") +\n  scale_colour_manual(name = \"Region\", values = region_palette) + \n  theme_intro_course( )\n\n\n\n\n\n\n\n\nCreating a custom theme is useful to ensure all visualisations are formatted consistently.\n\n\n5.1.7 Facet functions\nFaceting allows us to divide a plot into subplots based on some grouping variable within the data. This allows us to show multiple variables in the same visualisation without risking overloading the plot and losing the intended message.\nFor example, if we wish to show the relationship between SFA, council tax total and regions over the entire time period, we may wish to create a scatterplot per year. Faceting allows us to do this in one piece of code rather than repeating it per year. Faceting will also ensure that plots are on the same scale and therefore easier to compare. The function facet_wrap creates these facetted plots:\n\n# Take the long formatted dataset\ncsp_long2 %&gt;% \n  # Remove the Greater London Authority row\n  filter(authority != \"Greater London Authority\") %&gt;% \n  ggplot( ) +\n  # Plot the SFA against CT total and colour by region\n  geom_point(aes(x = sfa, y = ct_total, colour = region)) +\n  # Use the region colour palette\n  scale_colour_manual(name = \"Region\", values = region_palette) + \n  # Change the axis titles\n  labs(x = \"Settlement funding assessment (£ millions)\", \n       y = \"Council tax total (£ millions)\", colour = \"Region\") +\n  # Separate data into a plot per region\n  facet_wrap(~ year) +\n  # Use the intro course theme\n  theme_intro_course()\n\n\n\n\n\n\n\n\n\n\nExercise 7\nUse an appropriate data visualisation to show how the total spend in each local authority has changed over the years between 2015 and 2020. Choose a visualisation that shows these trends over time and allows us to compare them between regions.",
+    "text": "4.1.1 Choosing the most appropriate visualisation\nThe most appropriate choice of visualisation will depend on the type of variable(s) we wish to display, the number of variables and the message we are trying to disseminate. Common plots used to display combinations of different types of data are given in following table:\n\n\n\nTable 6.1: Common visualisations by number and type of variables, with ggplot2 geom\n\n\nR is very flexible when it comes to visualising data and contains a wide variety of options to customise graphs. This section will focus on the Tidyverse package ggplot2 and introduce some of the more commonly used graphical functions and parameters but is by no means comprehensive.\n\n\n4.1.2 The ggplot2 package\nThe ggplot2 package implements the ‘grammar of graphics’, a system that aims to describe all statistical graphics in terms of their components or layers. All graphics can be broken down into the same components: the data, a coordinate system (or plot area) and some visual markings of the data. More complex plots may have additional layers but all must contain these three.\nFor example, in the csp_2020 dataset, we may wish to explore the relationship between the settlement funding assessment (sfa_2020) and council tax total (ct_total_2020) spending for each local authority. To visualise the relationship between two continuous numeric variables, a scatterplot would be most appropriate.\nWithin the ggplot2 package, we first use the ggplot function to create a coordinate system (a blank plot space) that we can add layers and objects to. Within this function, we specify the data that we wish to display on the coordinate system:\n\nggplot(data = csp_2020)\n\nTo add information to this graph, we add a geom layer: a visual representation of the data. There are many different geom objects built into the ggplot2 package (begin typing ?geom into the console to see a list). The geom_point function is used to create scatterplots.\nEach geom object must contain a mapping argument, coupled with the aes function which defines how the variables in the dataset are visualised. In this case, we use the aes function to specify the variables on the x and y axes but it can also be used to set the colour, size or symbol based on variable values.\n\n\n\n\n\n\nWarning\n\n\n\nAlthough ggplot2 is a tidyverse package, it uses a different method of piping to the other packages. Use the + symbol to add an extra layer when working in ggplot.\n\n\n\n# Generate the chart area and specify the data\nggplot(data = csp_2020) + \n  # Add points, defined by sfa_2020 and ct_total_2020 \n  geom_point(mapping = aes(x = sfa_2020, y = ct_total_2020))\n\n\n\n\n\n\n\n\nThe resulting scatterplot shows a positive association between the SFA and council tax spending in English local authorities during 2020. We can identify an outlier in the top right corner of the graph. Before proceding, we want to ensure that this observation is an outlier and not an error to be removed from the data. We can use the filter function to return the name of the local authority that matches these values:\n\n# Using the data csp_2020\ncsp_2020 %&gt;% \n  # Return just rows where sfa_2020 is over 1000, and then\n  filter(sfa_2020 &gt; 1000) %&gt;% \n  # Return the authority names\n  select(authority)\n## # A tibble: 1 × 1\n##   authority               \n##   &lt;chr&gt;                   \n## 1 Greater London Authority\n\nThis outlier is the Greater London Authority which is a combination of local authorities that are already included in the dataset. Including this observation would introduce duplicates into the analysis, and so this observation should be removed to avoid invalid results. To remove the Greater London Authority observation, we can combine the filter and ggplot functions using pipes:\n\n# Take the csp_2020 data, and then\ncsp_2020 %&gt;% \n  # Return all rows where authority is not equal to Greater London Authority,\n  # and then\n  filter(authority != \"Greater London Authority\") %&gt;% \n  # Generate a plot\n  ggplot( ) + \n  # Add visual markings based on the data\n  geom_point(aes(x = sfa_2020, y = ct_total_2020))\n\n\n\n\n\n\n\n\nGraphs appear in the plot window in RStudio and can be opened in a new window using the  icon. Graphs in this window can also be copied and pasted into other documents using the  icon and selecting Copy to clipboard.\nNew graphs will replace existing ones in this window but all graphs created in the current session of R can be explored using the  icons.\nGraphs can be stored as objects using the &lt;- symbol. These objects can then be saved as picture or PDF files using the ggsave function:\n\n# Create a new object, beginning from csp_2020, and then\nsfa_ct_plot &lt;- csp_2020 %&gt;% \n  # Return all rows where authority name is not GLA, and then\n  filter(authority != \"Greater London Authority\") %&gt;%\n  # Create a ggplot area\n  ggplot( ) + \n  # Add visual markings from the data\n  geom_point(aes(x = sfa_2020, y = ct_total_2020))\n\n# Save the graph object as a png file\nggsave(sfa_ct_plot, filename = \"sfa_ct_plot.png\")\n\n\n\nExercise 5\n\nCreate a new data object containing the 2020 CSP data without the Greater London Authority observation. Name this data frame csp_nolon_2020.\nUsing the csp_nolon_2020 data, create a data visualisation to check the distribution (or shape) of the SFA variable.\nBased on the visualisation above, create a summary table for the SFA variable containing the minimum and maximum, and appropriate measures of the centre/average and spread.\n\n\n\n4.1.3 Customising visualisations\nAdditional variables can be included into a visualisation within the mapping argument of a geom function. For example, we could explore the relationship between SFA and council tax across regions by colouring points based on the region:\n\nggplot(data = csp_nolon_2020) + \n  geom_point(aes(x = sfa_2020, y = ct_total_2020, colour = region))\n\n\n\n\n\n\n\n\nBy default, R uses alphabetical ordering for character variables. To change this order, the variable must be converted into a factor. A factor is how R recognises categorical variables. For example, to order the region legend so that the London region appears first, followed by other regions from north to south, we would use the mutate function, combined with the factor function to create a new, ordered variable. The argument levels allows us to specify the order of categories in a factor:\n\ncsp_nolon_2020_new &lt;- csp_nolon_2020 %&gt;% \n  mutate(region_fct = factor(region, \n                             levels = c(\"L\", \"NW\", \"NE\", \"YH\", \"WM\", \n                                        \"EM\", \"EE\", \"SW\", \"SE\")))\n\nggplot(data = csp_nolon_2020_new) + \n  geom_point(aes(x = sfa_2020, y = ct_total_2020, colour = region_fct))\n\n\n\n\n\n\n\n\nArguments that can be adjusted within geoms include:\n\ncolour: change the colour (if point or line) or outline (if bar or histogram) of the markings\nsize: change the size of the markings (if point used)\nshape: change the shape of markings (for points)\nfill: Change the colour of bars in bar charts or histograms\nlinewidth: Change the line width\nlinetype: Choose the type of line (e.g. dotted)\nalpha: Change the transparency of a visualisation\n\n\n\n\n\n\n\nWarning\n\n\n\nAlthough it may be tempting to add many variables to the same visualisation, be sure that you are not overcomplicating the graph and losing important messages. It is better to have multiple, clear but simpler visualisations, than fewer confusing ones.\n\n\nAesthetic properties of the geom object may also be set manually, outside of the aes function, using the same argument but with a shared value rather than a variable. For example:\n\nggplot(csp_nolon_2020_new) + \n  geom_point(aes(x = sfa_2020, y = ct_total_2020), \n             # Adding the colour outside of the aes wrapper as it is not \n             # from the data\n             colour = \"blue\")\n\n\n\n\n\n\n\n\n\n\nExercise 6\n\nWhat is the problem with the following code? Fix the code to change the shape of all the points.\n\n\nggplot(csp_nolon_2020) + \n  geom_point(aes(x = sfa_2020, y = ct_total_2020, shape = \"*\"))\n\n\nAdd a line of best fit to the scatterplot showing the relationship between SFA and council tax total (hint: use ?geom_smooth).\nAdd a line of best fit for each region (hint: make each line a different colour).\n\n\n\n4.1.4 Scale functions\n\n4.1.4.1 Customising axes\nScale functions allow us to customise aesthetics defined in geom objects such as colours and axes labels. They take the form scale_'aesthetic to customise'_'scale of variable’. For example, scale_x_continuous customises the x axis when the variable is continuous, and scale_x_discrete can be used where the variable is discrete or categorical. Arguments to customise the x or y axes include:\n\nname = to change the axis title\nlimits = c(...) sets the axis limits\nbreaks = c(...) defines tick marks\nlabels = c(...) attaches labels to break values\ntrans = transforms the scale that the axis is shown on.\n\n\nggplot(csp_nolon_2020_new) + \n  # Scatterplot with SFA on x, CT on y, and colour by region\n  geom_point(aes(x = sfa_2020, y = ct_total_2020, colour = region_fct)) +\n  # Add title to x axis\n  scale_x_continuous(name = \"Settlement funding assessment (£ millions)\", \n                     # Set x axis limits from 0 to 600\n                     limits = c(0, 600), \n                     # Set tick marks ever 200\n                     breaks = c(0, 200, 400, 600)) +\n  # Add title to y axis\n  scale_y_continuous(name = \"Council tax (£ millions)\", \n                     # Show the y axis on a square root scale\n                     trans = \"sqrt\")\n\n\n\n\n\n\n\n\nA common transformation that can be useful to explore the relationship between variables which have clusters of smaller values is the logarithm (or log) function. Applying a log function to a scale increases the difference between smaller values (stretching out these clusters), while reducing the difference between the smaller values and largest ones. Log functions can only be applied to positive, non-zero numbers. Where a sample may contain zeroes, the transformation log1p can be applied instead which adds 1 to each value before applying the log transformation (\\(log(n + 1)\\)):\n\nggplot(csp_nolon_2020_new) + \n  geom_point(aes(x = sfa_2020, y = ct_total_2020, colour = region_fct)) +\n  scale_x_continuous(name = \"SFA\", limits = c(0, 600), \n                     breaks = c(0, 200, 400, 600),\n                     trans = \"log1p\") +\n  scale_y_continuous(name = \"Council tax\", \n                     trans = \"log1p\") \n\n\n\n\n\n\n\n\nWe can now clearly see the strong positive association between SFA and council tax spending in local authorities with lower values of this without losing any information.\n\n\n4.1.4.2 6.3.2 Customising colour scales\nThere are a wide range of options for customising the colour aesthetics of geoms. These include pre-defined colour palettes, such as scale_colour_viridis_c for continuous variables, or scale_colour_viridis_d for discrete or categorical variables. Viridis colour palettes are designed to be colourblind friendly and print well in grey scale. There are also many R packages containing colour palettes for different scenarios.\nColour palettes can be created manually for categorical variables using the scale_colour_manual function. Here, the argument values allows us to specify a colour per category.\n\n\n\n\n\n\nStyle tip\n\n\n\nR contains a list of 657 pre-programmed colours that can be used to create palettes (run colours() in the console for a full list).\nHexadecimal codes can also be included instead in the form #rrggbb (where rr (red), gg (green), and bb (blue) are numbers between 00 and 99 giving the level of intensity of each colour).\n\n\nWhere a colour palette will be used across multiple plots, defining this list of colours as a vector and then entering this into scale_colour_manual will reduce repetition:\n\nregion_palette &lt;- c(\"aquamarine2\", \"blue\", \"chartreuse2\", \"coral\", \"orchid\",\n                    \"firebrick\", \"gold3\", \"violetred\", \"grey50\")\n\nggplot(csp_nolon_2020_new) + \n  geom_point(aes(x = sfa_2020, y = ct_total_2020, colour = region_fct)) +\n  scale_x_continuous(name = \"SFA\",  trans = \"log1p\") +\n  scale_y_continuous(name = \"Council tax\", trans = \"log1p\") +\n  scale_colour_manual(name = \"Region\", values = region_palette)\n\n\n\n\n\n\n\n\nPalettes can also be created using gradients with the scale_colour_gradient function, that specifies a two colour gradient from low to high, scale_colour_gradient2 that creates a diverging gradient using low, medium, and high colours, and scale_colour_gradientn that creates an n-colour gradient.\n\n\n\n4.1.5 Other labelling functions\nAlthough axis and legend labels can be updated within scale functions, the labs function exist as an alternative. This function also allows us to add titles and subtitles to visualisations:\n\nlabs(x = “x-axis name”, y = “y-axis name”,\n    colour = “Grouping variable name”, title = “Main title”,\n    subtitle = “Subtitle”, caption = “Footnote”)\n\nThe annotate function allows us to add text and other objects to a ggplot object. For example:\n\nannotate(“text”, x = 50, y = 200, label = “Text label here”)\n\nAdds “Text label here” to a plot at the coordinates (50, 200) on a graph, and\n\nannotate(“rect”, xmin = 0, xmax = 10, ymin = 20, ymax = 50, alpha = 0.2)\n\nadds a rectangle to the graph.\n\n\n4.1.6 Theme functions\nThe theme function modifies non-data components of the visualisation. For example, the legend position, label fonts, the graph background, and gridlines. There are many options that exist within the theme function (use ?theme to list them all).\n\n\n\n\n\n\nNote\n\n\n\nMany of the elements that can be customised within the theme function require an element wrapper. This wrapper is determined by the type of object we are customising (e.g. element_text when customising text, element_rect when customising a background, element_blank to remove something). Check ?theme for more information.\n\n\nOne of the most common theme options is legend.position which can be used to move the legend to the top or bottom of the graph space (legend.position = “top” or legend.position = “bottom”) or remove the legend completely (legend.position = “none”).\nggplot also contains a number of pre-defined ‘complete themes’ which change all non-data elements of the plot to a programmed default. For example theme_void removes all gridlines and axes, theme_light changes the graph background white and the gridlines and axes light grey:\n\nggplot(csp_nolon_2020_new) + \n  geom_point(aes(x = sfa_2020, y = ct_total_2020, colour = region_fct)) +\n  scale_x_continuous(name = \"SFA\",  trans = \"log1p\") +\n  scale_y_continuous(name = \"Council tax\", trans = \"log1p\") +\n  scale_colour_manual(name = \"Region\", values = region_palette) + \n  theme_void( )\n\n\n\n\n\n\n\n\nOne benefit of using themes is that all visualisations will be consistent in terms of colour scheme, font size and gridlines. Although there are pre-built themes, we are able to create our own and save them as functions. These can then be used in place of R’s themes. For example:\n\n# Create a theme function\ntheme_intro_course &lt;- function( ) {\n  # Move the legend to the bottom \n  theme(legend.position =  \"bottom\",\n        # Make the axis labels font size 10\n        axis.text = element_text(size = 10),\n        # Make the axis titles font size 15\n        axis.title = element_text(size = 15),\n        # Make the graph title font size 20\n        title = element_text(size = 20),\n        # Make the plot area white with a grey outline\n        panel.background = element_rect(fill = \"white\", colour = \"grey50\"))\n}\n\nThe function theme_intro_course can be added to the end of any visualisation and will move the legend to the bottom of the graph, change the axis text to size 10, the axis titles to size 15, the plot title to size 20, and the graph background to white with a grey outline:\n\nggplot(csp_nolon_2020_new) + \n  geom_point(aes(x = sfa_2020, y = ct_total_2020, colour = region_fct)) +\n  scale_x_continuous(name = \"SFA\",  trans = \"log1p\") +\n  scale_y_continuous(name = \"Council tax\", trans = \"log1p\") +\n  scale_colour_manual(name = \"Region\", values = region_palette) + \n  theme_intro_course( )\n\n\n\n\n\n\n\n\nCreating a custom theme is useful to ensure all visualisations are formatted consistently.\n\n\n4.1.7 Facet functions\nFaceting allows us to divide a plot into subplots based on some grouping variable within the data. This allows us to show multiple variables in the same visualisation without risking overloading the plot and losing the intended message.\nFor example, if we wish to show the relationship between SFA, council tax total and regions over the entire time period, we may wish to create a scatterplot per year. Faceting allows us to do this in one piece of code rather than repeating it per year. Faceting will also ensure that plots are on the same scale and therefore easier to compare. The function facet_wrap creates these facetted plots:\n\n# Take the long formatted dataset\ncsp_long2 %&gt;% \n  # Remove the Greater London Authority row\n  filter(authority != \"Greater London Authority\") %&gt;% \n  ggplot( ) +\n  # Plot the SFA against CT total and colour by region\n  geom_point(aes(x = sfa, y = ct_total, colour = region)) +\n  # Use the region colour palette\n  scale_colour_manual(name = \"Region\", values = region_palette) + \n  # Change the axis titles\n  labs(x = \"Settlement funding assessment (£ millions)\", \n       y = \"Council tax total (£ millions)\", colour = \"Region\") +\n  # Separate data into a plot per region\n  facet_wrap(~ year) +\n  # Use the intro course theme\n  theme_intro_course()\n\n\n\n\n\n\n\n\n\n\nExercise 7\nUse an appropriate data visualisation to show how the total spend in each local authority has changed over the years between 2015 and 2020. Choose a visualisation that shows these trends over time and allows us to compare them between regions.",
     "crumbs": [
-      "<span class='chapter-number'>5</span>  <span class='chapter-title'>Session 4: Data visualisation with ggplot2</span>"
+      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Data visualisation with ggplot2</span>"
     ]
   },
   {
     "objectID": "session5_notes.html",
     "href": "session5_notes.html",
-    "title": "6  Reproducible research with RMarkdown",
+    "title": "5  Reproducible research with RMarkdown",
     "section": "",
-    "text": "6.1 Introduction to RMarkdown\nRMarkdown is a tool that is used to author high-quality documents, making it easy to communicate results efficiently. One of the main appeals of RMarkdown is that it is easy to integrate R code and output seamlessly into a document, encouraging openness and reproducibility in research.\nThere are a number of ways we can use RMarkdown to enhance the research process, such as:\nBefore we begin working with RMarkdown in RStudio, we must first download and install the rmarkdown package as we would any other package:\ninstall.packages(\"rmarkdown\")\n\nlibrary(rmarkdown)",
+    "text": "5.1 Introduction to RMarkdown\nRMarkdown is a tool that is used to author high-quality documents, making it easy to communicate results efficiently. One of the main appeals of RMarkdown is that it is easy to integrate R code and output seamlessly into a document, encouraging openness and reproducibility in research.\nThere are a number of ways we can use RMarkdown to enhance the research process, such as:\nBefore we begin working with RMarkdown in RStudio, we must first download and install the rmarkdown package as we would any other package:\ninstall.packages(\"rmarkdown\")\n\nlibrary(rmarkdown)",
     "crumbs": [
-      "<span class='chapter-number'>6</span>  <span class='chapter-title'>Reproducible research with RMarkdown</span>"
+      "<span class='chapter-number'>5</span>  <span class='chapter-title'>Reproducible research with RMarkdown</span>"
     ]
   },
   {
     "objectID": "session5_notes.html#introduction-to-rmarkdown",
     "href": "session5_notes.html#introduction-to-rmarkdown",
-    "title": "6  Reproducible research with RMarkdown",
+    "title": "5  Reproducible research with RMarkdown",
     "section": "",
-    "text": "Generating reports to show the latest findings in a project, combining research output with interpretations. Reports can be automated within RMarkdown, to ensure outputs show the latest data.\nKeeping track of projects as an alternative to a notebook. Documents can include R code and visualisations alongside thoughts and comments on findings so far.\nCreating a collaborative document that can be shared with colleagues. The inclusion of R code in documents provides an audit trail, making it easy to carry out quality assurance and resolve discrepancies in results.\n\n\n\n\n6.1.1 Creating an RMarkdown files\nRMarkdown files (.Rmd) are created and saved separately to the script files we have been using up to now on the course. To create a new RMarkdown file, either use the drop-down menu, following the File -&gt; New File -&gt; R Markdown… options, or using the  icon and selecting R Markdown….\n\n\n\nRmarkdown new file\n\n\nWhen creating a new RMarkdown file, we are given the option to set the title, author and date of the new document. We are also given options to select the type of document, presentation, or Shiny app we would like to create. This does not give a comprehensive list of documents available within RMarkdown and can be changed later. We will discuss output document types in more detail later in the session.\nClicking ‘OK’ on this window will produce an RMarkdown file (.Rmd) with some example code. If we do not want this, there is an option to ‘Create Empty Document’ on the bottom left of the window.\n\n\n6.1.2 Rmarkdown content\nRMarkdown files contain three main types of content:\n\nA YAML header (this sets the global options for the document)\nText, or syntax (this includes headings and comments)\nCode chunks containing R code\n\n\n6.1.2.1 The YAML header\nThe first part of an RMarkdown script, surrounded by ‘---’ is known as the YAML header. This sets global options for the document that will be produced by the script. YAML headers can include the title, author and date of a document, the output document type, table of contents options, and can include code to edit the appearance of text and figures.\nFor this course, we will just use the YAML to define the title, author, date, and output of our document:\n---\ntitle: \"Introduction to R with Tidyverse\"\nauthor: My name\ndate: 2024-07-15\noutput: html_document\n---\nThere are many output document types that can be produced using RMarkdown. Some of the most common include:\n\nhtml_document:HTML document,.html\npdf_document: PDF document,.pdf, created using a LaTeX template\nword_document: Microsoft word document (.docx)\nodt_document: OpenDocument text document (.odt), similar to Microsoft Word/Google Docs but compatible with free word processors)\ngithub_document: Github document (.md, markdown files that are compatible with Github and are converted to HTML when viewed there)\npowerpoint_presentation: Powerpoint presentation (.pptx)\n\nRMarkdown can also be combined with other R packages to create books (via bookdown), websites (via blogdown) and interactive dashboards (via flexdashboard).\n\n\n6.1.2.2 RMarkdown syntax\nRMarkdown text, or syntax, will generally make up the majority of a RMarkdown file. This can include headers and subheadings, equations, and any other text or comments in the document. Text is formatted using markdown syntax. A detailed list of syntax commands are given in the RMarkdown cheatsheet. Common syntax commands that may be used in an RMarkdown document include:\n\n*italic*\n**bold**\nAdd `code` into text\n# Header 1\n## Header 2\n…\n###### Header 6\n[This is a link](link url)\n![caption](image.png)\n\n- Unordered list\n  - List with indent\n1. Ordered list\n  - With indent\n2. Second item\nEquation: $r^2 = (x - a)^2 + (y - b)^2$\nRMarkdown equations are built using the same language as LaTeX. See here for a list of mathematical symbols that can be used in these equations.\n\n\n6.1.2.3 Code chunks\nCode chunks allow us to embed R code and outputs into our documents. This is one of the main draw of RMarkdown as it removes the need to copy and paste or import results from R into another document.\nCode chunks are pieces of code that begin ```{r} and end ```. For example,\n```{r}\n 1 + 1\n``` \nCode chunks can be created by manually typing these wrappers, by clicking the  icon and selecting ‘R’, or using the keyboard shortcut ctrl + alt + i on Windows, and Command + Option + i on Mac.\nCode chunks can be given titles to make an RMarkdown script easier to navigate (these will appear under the script window where lists of subheadings appear in script files). These are added inside the opening of the code chunk: ```{r chunk title}.\nThere are a number of chunk options to customise which code/output to display in the document. These are included in the opening of a chunk, after the title, separated by commas ,. Some of the most common, include:\n\necho = TRUE/FALSE: whether to display code in the output document\neval = TRUE/FALSE: whether to run the code in the chunk or not\ninclude = TRUE/FALSE: whether to include anything from the chunk (code or output) in the document\nerror/warning/message = TRUE/FALSE: whether to display error/warning/other messages in the document\n\nTop tip: It may be useful to add a setup code chunk at the beginning of an RMarkdown file that loads any packages and datasets that are required for the rest of the document. These can also include universal options for future code chunks to avoid repeating the code, using the knitr::opts_chunk$set function.\nFor example:\n```{r setup, include = FALSE} \n# Set global options for code chunks \n\n# Do not show any R code or messages unless specified \nknitr::opts_chunk$set(echo = FALSE, message = FALSE) \n\n# Load in the tidyverse package\nlibrary(tidyverse)\n```\n\n\n\n6.1.3 Compiling RMarkdown documents\nCompiling RMarkdown actually requires multiple steps and programmes. Luckily for us, this process takes place in the background so we don’t need to be aware of these steps happening!\nGenerating an output file from RMarkdown is know knitting a document. This process sends the .Rmd file to another R package knitr (which is installed alongside rmarkdown), which executes all the code chunks in the document and creates a markdown .md file including the code and output. This markdown file is then processed by another programme pandoc which converts markdown code into the finished document.\n\n\n\nRMarkdown to document process\n\n\nTo knit an RMarkdown file in RStudio is very simple. Either click the  icon above the RMarkdown script, or use the keyboard shortcut ctrl + shift + k on Windows or Command + shift + k on Mac. This initiates the process above and will return an output document (if there are no errors!) in the requested format to the working directory.\n\n\n6.1.4 Data visualisation in RMarkdown\nOutput such as graphs and tables can be embedded in code chunks, the code used to create them will be the same as it would be in any other R script.\n\n\n\n\n\n\nNote\n\n\n\nOften, when providing output in RMarkdown, we often do not want to show the code that was used to create this. Make sure to add echo = FALSE to the opening of the code chunk.\n\n\n\n6.1.4.1 Graphs in RMarkdown\nggplot can be used to create graphs that are embedded within code chunks and included in an output document. For example, we could use the data from previous sections to show the relationship between Settlement Funding Assessment (SFA) and council tax total in English local authorities in 2020, colour code by regions in a scatterplot:\n```{r scatterplot sfa_2020 and ct_total_2020 by region, message = FALSE}\n# Load and tidy the 2020 data\nread_csv(\"data/CSP_2020.csv\") %&gt;% \n  # Remove the Greater London Authority row\n  filter(authority != \"Greater London Authority\") %&gt;% \n  # Convert region variable to factor\n  mutate(region_fct = factor(region, \n                             levels = c(\"L\", \"NW\", \"NE\", \"YH\", \"WM\",\n                                        \"EM\", \"EE\", \"SW\", \"SE\"))) %&gt;% \n  # Create a ggplot \n  ggplot() +\n  # Scatterplot definition\n  geom_point(aes(x = ct_total_2020, y = sfa_2020, colour = region_fct)) +\n  # Add colour palette for region\n  scale_colour_manual(values = c(\"aquamarine2\", \"blue\", \"chartreuse2\", \n                                 \"coral\", \"orchid\", \"firebrick\", \n                                 \"gold3\", \"violetred\", \"grey50\")) +\n  # Change axis/label titles\n  labs(x = \"Council tax total (£millions)\", \n       y = \"Settlement funding assessment (£milliongs)\",\n       colour = \"Region\") +\n  theme_light()\n```\n\n\n\n\n\n\n\n\n\n\n\n6.1.4.2 Tables in RMarkdown\nThere are a number of ways to include tables within RMarkdown which can either be entered manually, or generated using an R package. The choice of approach to creating tables depends on the format and size of the data, the amount of flexibility you would like to customise the output, the type of output document you are creating, and personal preference of how it should look.\nIn this course, we will look at how tables can be created using RMarkdown syntax (without the need for additional packages), and using the kable function within the knitr package.\nManually creating tables\nTables can be created in RMarkdown syntax, using the | symbol to separate columns, and dashes - to separate column headings from the body of the table. These are created outside of code chunks within the text. For example,\n Header 1 | Header 2 | Header 3 |\n|----------|----------|----------|\n| This     | Is       | A        |\n| Very     | Simple   | Table    |\nproduces the following output:\n\n\n\nHeader 1\nHeader 2\nHeader 3\n\n\n\n\nThis\nIs\nA\n\n\nVery\nSimple\nTable\n\n\n\nColons can be added to the header/body separator row of the table to control the justification of the text in each column. For example,\n| Left | Right | Center  | Default   |\n|:-----|------:|:-------:|-----------|\n| This | Is    | Another | Simple    |\n| Table | But   | It is  | Justified |\nproduces the following output:\n\n\n\nLeft\nRight\nCenter\nDefault\n\n\n\n\nThis\nIs\nAnother\nSimple\n\n\nTable\nBut\nIt is\nJustified\n\n\n\nCreating tables from data frames\nThe knitr package that compiles RMarkdown files contains the kable function that can be used to create simple data tables. The kable function requires data to be stored as a matrix, data frame, or tibble object (although these can be easily created using the matrix, data.frame or tibble functions). Accessing the help file ?knitr::kable gives a list of arguments that can be used to customise these tables.\n\n\n\n\n\n\nNote\n\n\n\nAs these tables are created using R functions, they must be generated within a code chunk.\n\n\nFor example, we can create a simple data table using kable showing the first 6 rows of the mtcars dataset (a dataset pre-loaded into base R):\n```{r mtcars table, echo = FALSE} \nknitr::kable(head(mtcars))\n``` \nwhich produces the following output:\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nmpg\ncyl\ndisp\nhp\ndrat\nwt\nqsec\nvs\nam\ngear\ncarb\n\n\n\n\nMazda RX4\n21.0\n6\n160\n110\n3.90\n2.620\n16.46\n0\n1\n4\n4\n\n\nMazda RX4 Wag\n21.0\n6\n160\n110\n3.90\n2.875\n17.02\n0\n1\n4\n4\n\n\nDatsun 710\n22.8\n4\n108\n93\n3.85\n2.320\n18.61\n1\n1\n4\n1\n\n\nHornet 4 Drive\n21.4\n6\n258\n110\n3.08\n3.215\n19.44\n1\n0\n3\n1\n\n\nHornet Sportabout\n18.7\n8\n360\n175\n3.15\n3.440\n17.02\n0\n0\n3\n2\n\n\nValiant\n18.1\n6\n225\n105\n2.76\n3.460\n20.22\n1\n0\n3\n1\n\n\n\n\n\nWhere data are not already saved as an object, we need to create them first before generating a table. For example, the table we created manually earlier can be recreated using the kable function, by first creating a data frame with the information, and then piping it through to the function:\n```{r kable table}\n# Create a data frame with the table information\ndata.frame(col1 = c(\"This\", \"Very\"),\n           col2 = c(\"Is\", \"Simple\"),\n           col3 = c(\"A\", \"Table\")) %&gt;% \n  knitr::kable(., col.names = c(\"Header 1\", \"Header 2\", \"Header 3\"))\n``` \n\n\n\n\n\nHeader 1\nHeader 2\nHeader 3\n\n\n\n\nThis\nIs\nA\n\n\nVery\nSimple\nTable\n\n\n\n\n\nOther ways to create tables\nAlthough the kable function and RMarkdown syntax tables do not require additional R packages to be installed, they are fairly simple and do not give many options to customise the tables. For a more flexible alternative, I would recommend looking at the flextable package, which gives a much wider range of customisible features. The flextable user manual can be accessed for free from this website.\n\n\n\nExercise 8\nCreate an RMarkdown file that creates a html report describing the trends in core spending power in English local authorities between 2015 and 2020. Your report should include:\n\nA summary table of the total spending per year per region\nA suitable visualisation showing how the total annual spending has changed over this period, compared between regions\nA short interpretation of the table and visualisation\n\n\n\n\n\n\n\nNote\n\n\n\nYou are not expected to be an expert in this data! Interpret these outputs as you would any other numeric variable measured over time.",
+    "text": "Generating reports to show the latest findings in a project, combining research output with interpretations. Reports can be automated within RMarkdown, to ensure outputs show the latest data.\nKeeping track of projects as an alternative to a notebook. Documents can include R code and visualisations alongside thoughts and comments on findings so far.\nCreating a collaborative document that can be shared with colleagues. The inclusion of R code in documents provides an audit trail, making it easy to carry out quality assurance and resolve discrepancies in results.\n\n\n\n\n5.1.1 Creating an RMarkdown files\nRMarkdown files (.Rmd) are created and saved separately to the script files we have been using up to now on the course. To create a new RMarkdown file, either use the drop-down menu, following the File -&gt; New File -&gt; R Markdown… options, or using the  icon and selecting R Markdown….\n\n\n\nRmarkdown new file\n\n\nWhen creating a new RMarkdown file, we are given the option to set the title, author and date of the new document. We are also given options to select the type of document, presentation, or Shiny app we would like to create. This does not give a comprehensive list of documents available within RMarkdown and can be changed later. We will discuss output document types in more detail later in the session.\nClicking ‘OK’ on this window will produce an RMarkdown file (.Rmd) with some example code. If we do not want this, there is an option to ‘Create Empty Document’ on the bottom left of the window.\n\n\n5.1.2 Rmarkdown content\nRMarkdown files contain three main types of content:\n\nA YAML header (this sets the global options for the document)\nText, or syntax (this includes headings and comments)\nCode chunks containing R code\n\n\n5.1.2.1 The YAML header\nThe first part of an RMarkdown script, surrounded by ‘---’ is known as the YAML header. This sets global options for the document that will be produced by the script. YAML headers can include the title, author and date of a document, the output document type, table of contents options, and can include code to edit the appearance of text and figures.\nFor this course, we will just use the YAML to define the title, author, date, and output of our document:\n---\ntitle: \"Introduction to R with Tidyverse\"\nauthor: My name\ndate: 2024-07-15\noutput: html_document\n---\nThere are many output document types that can be produced using RMarkdown. Some of the most common include:\n\nhtml_document:HTML document,.html\npdf_document: PDF document,.pdf, created using a LaTeX template\nword_document: Microsoft word document (.docx)\nodt_document: OpenDocument text document (.odt), similar to Microsoft Word/Google Docs but compatible with free word processors)\ngithub_document: Github document (.md, markdown files that are compatible with Github and are converted to HTML when viewed there)\npowerpoint_presentation: Powerpoint presentation (.pptx)\n\nRMarkdown can also be combined with other R packages to create books (via bookdown), websites (via blogdown) and interactive dashboards (via flexdashboard).\n\n\n5.1.2.2 RMarkdown syntax\nRMarkdown text, or syntax, will generally make up the majority of a RMarkdown file. This can include headers and subheadings, equations, and any other text or comments in the document. Text is formatted using markdown syntax. A detailed list of syntax commands are given in the RMarkdown cheatsheet. Common syntax commands that may be used in an RMarkdown document include:\n\n*italic*\n**bold**\nAdd `code` into text\n# Header 1\n## Header 2\n…\n###### Header 6\n[This is a link](link url)\n![caption](image.png)\n\n- Unordered list\n  - List with indent\n1. Ordered list\n  - With indent\n2. Second item\nEquation: $r^2 = (x - a)^2 + (y - b)^2$\nRMarkdown equations are built using the same language as LaTeX. See here for a list of mathematical symbols that can be used in these equations.\n\n\n5.1.2.3 Code chunks\nCode chunks allow us to embed R code and outputs into our documents. This is one of the main draw of RMarkdown as it removes the need to copy and paste or import results from R into another document.\nCode chunks are pieces of code that begin ```{r} and end ```. For example,\n```{r}\n 1 + 1\n``` \nCode chunks can be created by manually typing these wrappers, by clicking the  icon and selecting ‘R’, or using the keyboard shortcut ctrl + alt + i on Windows, and Command + Option + i on Mac.\nCode chunks can be given titles to make an RMarkdown script easier to navigate (these will appear under the script window where lists of subheadings appear in script files). These are added inside the opening of the code chunk: ```{r chunk title}.\nThere are a number of chunk options to customise which code/output to display in the document. These are included in the opening of a chunk, after the title, separated by commas ,. Some of the most common, include:\n\necho = TRUE/FALSE: whether to display code in the output document\neval = TRUE/FALSE: whether to run the code in the chunk or not\ninclude = TRUE/FALSE: whether to include anything from the chunk (code or output) in the document\nerror/warning/message = TRUE/FALSE: whether to display error/warning/other messages in the document\n\nTop tip: It may be useful to add a setup code chunk at the beginning of an RMarkdown file that loads any packages and datasets that are required for the rest of the document. These can also include universal options for future code chunks to avoid repeating the code, using the knitr::opts_chunk$set function.\nFor example:\n```{r setup, include = FALSE} \n# Set global options for code chunks \n\n# Do not show any R code or messages unless specified \nknitr::opts_chunk$set(echo = FALSE, message = FALSE) \n\n# Load in the tidyverse package\nlibrary(tidyverse)\n```\n\n\n\n5.1.3 Compiling RMarkdown documents\nCompiling RMarkdown actually requires multiple steps and programmes. Luckily for us, this process takes place in the background so we don’t need to be aware of these steps happening!\nGenerating an output file from RMarkdown is know knitting a document. This process sends the .Rmd file to another R package knitr (which is installed alongside rmarkdown), which executes all the code chunks in the document and creates a markdown .md file including the code and output. This markdown file is then processed by another programme pandoc which converts markdown code into the finished document.\n\n\n\nRMarkdown to document process\n\n\nTo knit an RMarkdown file in RStudio is very simple. Either click the  icon above the RMarkdown script, or use the keyboard shortcut ctrl + shift + k on Windows or Command + shift + k on Mac. This initiates the process above and will return an output document (if there are no errors!) in the requested format to the working directory.\n\n\n5.1.4 Data visualisation in RMarkdown\nOutput such as graphs and tables can be embedded in code chunks, the code used to create them will be the same as it would be in any other R script.\n\n\n\n\n\n\nNote\n\n\n\nOften, when providing output in RMarkdown, we often do not want to show the code that was used to create this. Make sure to add echo = FALSE to the opening of the code chunk.\n\n\n\n5.1.4.1 Graphs in RMarkdown\nggplot can be used to create graphs that are embedded within code chunks and included in an output document. For example, we could use the data from previous sections to show the relationship between Settlement Funding Assessment (SFA) and council tax total in English local authorities in 2020, colour code by regions in a scatterplot:\n```{r scatterplot sfa_2020 and ct_total_2020 by region, message = FALSE}\n# Load and tidy the 2020 data\nread_csv(\"data/CSP_2020.csv\") %&gt;% \n  # Remove the Greater London Authority row\n  filter(authority != \"Greater London Authority\") %&gt;% \n  # Convert region variable to factor\n  mutate(region_fct = factor(region, \n                             levels = c(\"L\", \"NW\", \"NE\", \"YH\", \"WM\",\n                                        \"EM\", \"EE\", \"SW\", \"SE\"))) %&gt;% \n  # Create a ggplot \n  ggplot() +\n  # Scatterplot definition\n  geom_point(aes(x = ct_total_2020, y = sfa_2020, colour = region_fct)) +\n  # Add colour palette for region\n  scale_colour_manual(values = c(\"aquamarine2\", \"blue\", \"chartreuse2\", \n                                 \"coral\", \"orchid\", \"firebrick\", \n                                 \"gold3\", \"violetred\", \"grey50\")) +\n  # Change axis/label titles\n  labs(x = \"Council tax total (£millions)\", \n       y = \"Settlement funding assessment (£milliongs)\",\n       colour = \"Region\") +\n  theme_light()\n```\n\n\n\n\n\n\n\n\n\n\n\n5.1.4.2 Tables in RMarkdown\nThere are a number of ways to include tables within RMarkdown which can either be entered manually, or generated using an R package. The choice of approach to creating tables depends on the format and size of the data, the amount of flexibility you would like to customise the output, the type of output document you are creating, and personal preference of how it should look.\nIn this course, we will look at how tables can be created using RMarkdown syntax (without the need for additional packages), and using the kable function within the knitr package.\nManually creating tables\nTables can be created in RMarkdown syntax, using the | symbol to separate columns, and dashes - to separate column headings from the body of the table. These are created outside of code chunks within the text. For example,\n Header 1 | Header 2 | Header 3 |\n|----------|----------|----------|\n| This     | Is       | A        |\n| Very     | Simple   | Table    |\nproduces the following output:\n\n\n\nHeader 1\nHeader 2\nHeader 3\n\n\n\n\nThis\nIs\nA\n\n\nVery\nSimple\nTable\n\n\n\nColons can be added to the header/body separator row of the table to control the justification of the text in each column. For example,\n| Left | Right | Center  | Default   |\n|:-----|------:|:-------:|-----------|\n| This | Is    | Another | Simple    |\n| Table | But   | It is  | Justified |\nproduces the following output:\n\n\n\nLeft\nRight\nCenter\nDefault\n\n\n\n\nThis\nIs\nAnother\nSimple\n\n\nTable\nBut\nIt is\nJustified\n\n\n\nCreating tables from data frames\nThe knitr package that compiles RMarkdown files contains the kable function that can be used to create simple data tables. The kable function requires data to be stored as a matrix, data frame, or tibble object (although these can be easily created using the matrix, data.frame or tibble functions). Accessing the help file ?knitr::kable gives a list of arguments that can be used to customise these tables.\n\n\n\n\n\n\nNote\n\n\n\nAs these tables are created using R functions, they must be generated within a code chunk.\n\n\nFor example, we can create a simple data table using kable showing the first 6 rows of the mtcars dataset (a dataset pre-loaded into base R):\n```{r mtcars table, echo = FALSE} \nknitr::kable(head(mtcars))\n``` \nwhich produces the following output:\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nmpg\ncyl\ndisp\nhp\ndrat\nwt\nqsec\nvs\nam\ngear\ncarb\n\n\n\n\nMazda RX4\n21.0\n6\n160\n110\n3.90\n2.620\n16.46\n0\n1\n4\n4\n\n\nMazda RX4 Wag\n21.0\n6\n160\n110\n3.90\n2.875\n17.02\n0\n1\n4\n4\n\n\nDatsun 710\n22.8\n4\n108\n93\n3.85\n2.320\n18.61\n1\n1\n4\n1\n\n\nHornet 4 Drive\n21.4\n6\n258\n110\n3.08\n3.215\n19.44\n1\n0\n3\n1\n\n\nHornet Sportabout\n18.7\n8\n360\n175\n3.15\n3.440\n17.02\n0\n0\n3\n2\n\n\nValiant\n18.1\n6\n225\n105\n2.76\n3.460\n20.22\n1\n0\n3\n1\n\n\n\n\n\nWhere data are not already saved as an object, we need to create them first before generating a table. For example, the table we created manually earlier can be recreated using the kable function, by first creating a data frame with the information, and then piping it through to the function:\n```{r kable table}\n# Create a data frame with the table information\ndata.frame(col1 = c(\"This\", \"Very\"),\n           col2 = c(\"Is\", \"Simple\"),\n           col3 = c(\"A\", \"Table\")) %&gt;% \n  knitr::kable(., col.names = c(\"Header 1\", \"Header 2\", \"Header 3\"))\n``` \n\n\n\n\n\nHeader 1\nHeader 2\nHeader 3\n\n\n\n\nThis\nIs\nA\n\n\nVery\nSimple\nTable\n\n\n\n\n\nOther ways to create tables\nAlthough the kable function and RMarkdown syntax tables do not require additional R packages to be installed, they are fairly simple and do not give many options to customise the tables. For a more flexible alternative, I would recommend looking at the flextable package, which gives a much wider range of customisible features. The flextable user manual can be accessed for free from this website.\n\n\n\nExercise 8\nCreate an RMarkdown file that creates a html report describing the trends in core spending power in English local authorities between 2015 and 2020. Your report should include:\n\nA summary table of the total spending per year per region\nA suitable visualisation showing how the total annual spending has changed over this period, compared between regions\nA short interpretation of the table and visualisation\n\n\n\n\n\n\n\nNote\n\n\n\nYou are not expected to be an expert in this data! Interpret these outputs as you would any other numeric variable measured over time.",
     "crumbs": [
-      "<span class='chapter-number'>6</span>  <span class='chapter-title'>Reproducible research with RMarkdown</span>"
+      "<span class='chapter-number'>5</span>  <span class='chapter-title'>Reproducible research with RMarkdown</span>"
+    ]
+  },
+  {
+    "objectID": "data_description.html",
+    "href": "data_description.html",
+    "title": "Data description",
+    "section": "",
+    "text": "What is ‘CSP’?\nThe data we will be using throughout this course relates to the Core Spending Power (CSP) of English local authorities between 2015 and 2020. This is a measure of the resources available to local authorities in England to fund service delivery. The CSP is broken down into several components, presented as variables in the data. These components include:\nSpending power is given in millions of pounds (£). The data were provided by the UK government’s Department for Levelling Up, Housing and Communities. Full guidance on the data can be found on the Department’s website. A brief description of the variables included in the data are given below.",
+    "crumbs": [
+      "Appendices",
+      "Data description"
+    ]
+  },
+  {
+    "objectID": "data_description.html#what-is-csp",
+    "href": "data_description.html#what-is-csp",
+    "title": "Data description",
+    "section": "",
+    "text": "Settlement Funding Assessment (sfa)\nCompensation for under-indexing the business rates multipler (under_index)\nIncome from council tax (ct_total)\nNew Homes Bonus (nhb)\nRural Services Delivery Grant (rsdg)",
+    "crumbs": [
+      "Appendices",
+      "Data description"
+    ]
+  },
+  {
+    "objectID": "data_description.html#descriptions-of-variables",
+    "href": "data_description.html#descriptions-of-variables",
+    "title": "Data description",
+    "section": "Descriptions of variables",
+    "text": "Descriptions of variables\n\nIdentifier variables\nEach dataset contains a unique identifier code variable, ons_code. This is a code given by the Government’s Office for National Statistics (ONS), and is used to join different datasets. There is also an authority variable which contains the local authority name (to see where each local authority lies on a map, you can visit the Government’s geoportal website).\n\n\nRegions of England\nIn addition to each local authority’s unique code and name, we are given the region that they lie within. England is separated into 9 regions (shown on this map) which are given as acronyms in the data. These are:\n\nL = London\nNW = North West\nNE = North East\nYH = Yorkshire and the Humber\nWM = West Midlands\nEM = East Midlands\nEE = East England\nSW = South West\nSE = South East\n\n\n\nSettlement Funding Assessment (SFA)\nThe Settlement Funding Assessment (sfa in the data) is the baseline funding level of local authorities, and includes the Revenue Support Grant (a central government grant given to local authorities).\n\n\nUnder-indexing business rate multipliers\nThe under_index variable is given to compensate local authorities that under-indexed business rate multipliers in previous years (i.e. those that used a measure of inflation that was lower than that should have been used).\n\n\nCouncil tax\nCouncil tax (ct_total) is the income made by each local authority from council tax. In England, the amount of council tax charged to residents is set by each local authority to make up additional revenue needed to cover planned spending.\n\n\nNew Homes Bonus\nThe nhb variables is the funding received as part of the New Homes Bonus, a government inncentive to encourage local authorities to promote new housing delevopment.\n\n\nRural Services Delivery Grant\nThe rsdg variable is funding received as part of the Rural Services Delivery Grant, provided to rural councils to recognise additional costs in these areas.",
+    "crumbs": [
+      "Appendices",
+      "Data description"
+    ]
+  },
+  {
+    "objectID": "exercise_solutions.html",
+    "href": "exercise_solutions.html",
+    "title": "Exercise solutions",
+    "section": "",
+    "text": "Exercise 1",
+    "crumbs": [
+      "Appendices",
+      "Exercise solutions"
+    ]
+  },
+  {
+    "objectID": "exercise_solutions.html#exercise-1",
+    "href": "exercise_solutions.html#exercise-1",
+    "title": "Exercise solutions",
+    "section": "",
+    "text": "Open a new script file if you have not already done so.\nSave this script file into an appropriate location.",
+    "crumbs": [
+      "Appendices",
+      "Exercise solutions"
+    ]
+  },
+  {
+    "objectID": "exercise_solutions.html#solutions",
+    "href": "exercise_solutions.html#solutions",
+    "title": "Exercise solutions",
+    "section": "Solutions",
+    "text": "Solutions\n\nTo open a new R script, click the  icon and select ‘R script’.\nSave this file by following File -&gt; Save as… from the drop-down menu, selecting the folder you are working from in this course, and giving the file an appropriate name.",
+    "crumbs": [
+      "Appendices",
+      "Exercise solutions"
+    ]
+  },
+  {
+    "objectID": "exercise_solutions.html#exercise-2",
+    "href": "exercise_solutions.html#exercise-2",
+    "title": "Exercise solutions",
+    "section": "Exercise 2",
+    "text": "Exercise 2\n\nAdd your name and the date to the top of your script file (hint: comment this out so R does not try to run it)\nUse R to calculate the following calculations. Add the result to the same line of the script file in a way that ensures there are no errors in the code.\n\n\n\\(64^2\\)\n\\(3432 \\div 8\\)\n\\(96 \\times 72\\)\n\nWhen you have finished this exercise, select the entire script file (using Ctrl + a on windows or Command + a on Mac) and run it to ensure there are no errors in the code.",
+    "crumbs": [
+      "Appendices",
+      "Exercise solutions"
+    ]
+  },
+  {
+    "objectID": "exercise_solutions.html#solutions-1",
+    "href": "exercise_solutions.html#solutions-1",
+    "title": "Exercise solutions",
+    "section": "Solutions",
+    "text": "Solutions\n\nAdd a # symbol to the script file before printing your name and the date,\nAfter running the calculation, copy and paste the result after a # symbol to ensure there are no errors:\n\n\n64 ^ 2 # 4096\n## [1] 4096\n3432 / 8 # 429\n## [1] 429\n96 * 72 # 6912\n## [1] 6912",
+    "crumbs": [
+      "Appendices",
+      "Exercise solutions"
+    ]
+  },
+  {
+    "objectID": "exercise_solutions.html#exercise-3",
+    "href": "exercise_solutions.html#exercise-3",
+    "title": "Exercise solutions",
+    "section": "Exercise 3",
+    "text": "Exercise 3\n\nHow many local authorities were included in the London region?\nGive three different ways that it would be possible to select all spend variables (sfa_2020, nhb_2020, etc.) from the CSP_2020 dataset.\nCreate a new tibble, em_2020, that just includes local authorities from the East Midlands (EM) region.\n\n\nHow many local authorities in the East Midlands had an SFA of between £5 and 10 million?\nCreate a new variable with the total overall spend in 2020 for local authorities in the East Midlands.",
+    "crumbs": [
+      "Appendices",
+      "Exercise solutions"
+    ]
+  },
+  {
+    "objectID": "exercise_solutions.html#solutions-2",
+    "href": "exercise_solutions.html#solutions-2",
+    "title": "Exercise solutions",
+    "section": "Solutions",
+    "text": "Solutions\n\nFirst filter the data to return only rows which belong to the London region, then count the number of rows in this subgroup:\n\n\ncsp_2020 %&gt;% \n  filter(region == \"L\") %&gt;% \n  count()\n## # A tibble: 1 × 1\n##       n\n##   &lt;int&gt;\n## 1    34\n\n\nThere are many different ways to select variables from a dataset. For a list of selection helpers, check the helpfile ?tidyr_tidy_select. Some example include:\n\n\n# Using the : symbol to return consecutive columns\n\n# By variable name:\nselect(csp_2020, sfa_2020:rsdg_2020)\n\n# Or by column number:\nselect(csp_2020, 4:9)\n\n# Returning all variables with names ending \"_2020\"\nselect(csp_2020, ends_with(\"_2020\"))\n\n# Return all numeric variables\nselect(csp_2020, where(is.numeric))\n\n# Return all variables that are not character \nselect(csp_2020, where(!is.character))\n\n\nTo create a new tibble, use filter to select the subgroup where region is “EM”, and attach as an object using the &lt;- symbol\n\n\nem_2020 &lt;- filter(csp_2020, region == \"EM\")\n\n\nUse filter to select the subgroup and then count the number of rows:\n\n\nem_2020 %&gt;% \n  filter(between(sfa_2020, 5, 10)) %&gt;% \n  count()\n## # A tibble: 1 × 1\n##       n\n##   &lt;int&gt;\n## 1     3\n\n\nUse the mutate function to create a new variable from the existing ones. Hint: If you are not sure of the variable names in the data, use the names function and copy them from the console:\n\n\nnames(em_2020)\n## [1] \"ons_code\"         \"authority\"        \"region\"           \"sfa_2020\"        \n## [5] \"under_index_2020\" \"ct_total_2020\"    \"nhb_2020\"         \"nhb_return_2020\" \n## [9] \"rsdg_2020\"\n\n\nem_2020 &lt;- em_2020 %&gt;% \n  mutate(total_spend = sfa_2020 + under_index_2020 + ct_total_2020 + \n           nhb_2020 + nhb_return_2020 + rsdg_2020)",
+    "crumbs": [
+      "Appendices",
+      "Exercise solutions"
+    ]
+  },
+  {
+    "objectID": "exercise_solutions.html#exercise-4",
+    "href": "exercise_solutions.html#exercise-4",
+    "title": "Exercise solutions",
+    "section": "Exercise 4",
+    "text": "Exercise 4\n\nCreate a data frame with the minimum, maximum and median total spend per year for each region.\nProduce a frequency table containing the number and percentage of local authorities in each region.\nConvert the data object csp_long2 back into wide format, with one row per local authority and one variable per total spend per year (HINT: start by selecting only the variables you need from the long data frame). Use the help file ?pivot_wider and vignette(\"pivot\") for more hints.\nUsing your new wide data frame, calculate the difference in total spending for each local authority between 2015 and 2020. How many local authorities have had an overall reduction in spending since 2015?",
+    "crumbs": [
+      "Appendices",
+      "Exercise solutions"
+    ]
+  },
+  {
+    "objectID": "exercise_solutions.html#solutions-3",
+    "href": "exercise_solutions.html#solutions-3",
+    "title": "Exercise solutions",
+    "section": "Solutions",
+    "text": "Solutions\n\nUse the summarise function after grouping by the year and region variables:\n\n\ncsp_long2 %&gt;% \n  group_by(region, year) %&gt;% \n  summarise(min_spend = min(total_spend),\n            max_spend = max(total_spend),\n            median_spend = median(total_spend)) %&gt;% \n  ungroup()\n## # A tibble: 54 × 5\n##    region  year min_spend max_spend median_spend\n##    &lt;chr&gt;  &lt;dbl&gt;     &lt;dbl&gt;     &lt;dbl&gt;        &lt;dbl&gt;\n##  1 EE      2015         0      883.         15.9\n##  2 EE      2016         0      860.         16.2\n##  3 EE      2017         0      845.         15.0\n##  4 EE      2018         0      860.         14.4\n##  5 EE      2019         0      874.         14.7\n##  6 EE      2020         0      915.         15.2\n##  7 EM      2015         0      492.         12.4\n##  8 EM      2016         0      479.         12.0\n##  9 EM      2017         0      475.         11.1\n## 10 EM      2018         0      483.         11.0\n## # ℹ 44 more rows\n\n\nTo calculate the percentage of local authorities in each region, we need the total number in each region and the overall number of local authorities:\n\n\n# Use the csp_2020 data as only require one row per local authority\ncsp_2020 %&gt;% \n  # Begin by calculating number of local authorities per region\n  group_by(region) %&gt;% \n  # Count number of rows in each group\n  summarise(n_la_region = n()) %&gt;% \n  ungroup() %&gt;% \n  # Create a new variable with the total number of local authorities (the sum)\n  mutate(n_la_overall = sum(n_la_region),\n         # Calculate the percentage (and round to make easier to read)\n         perc_la_region = round((n_la_region / n_la_overall) * 100, 2)) %&gt;% \n  # Remove the total local authority column\n  select(-n_la_overall)\n## # A tibble: 9 × 3\n##   region n_la_region perc_la_region\n##   &lt;chr&gt;        &lt;int&gt;          &lt;dbl&gt;\n## 1 EE              57          14.4 \n## 2 EM              51          12.9 \n## 3 L               34           8.59\n## 4 NE              15           3.79\n## 5 NW              46          11.6 \n## 6 SE              81          20.4 \n## 7 SW              47          11.9 \n## 8 WM              38           9.6 \n## 9 YH              27           6.82\n\n\nUse the pivot_wider function, use the year to set the new variable names suffix (names_from =), add a prefix to avoid variable names beginning with a number (names_prefix =), and take the values_from the current total_spend column:\n\n\ncsp_total_wide &lt;- csp_long2 %&gt;% \n  # Select variables to keep\n  select(ons_code:year, total_spend) %&gt;% \n  pivot_wider(names_from = year,\n              names_prefix = \"total_spend_\",\n              values_from = total_spend)\n\n\nBegin by using mutate to create a variable with the difference between total spend 2015 - 2020. Use filter to return rows where there is a reduction in spend, count the number of rows:\n\n\ncsp_total_wide %&gt;% \n  mutate(total_diff = total_spend_2020 - total_spend_2015) %&gt;% \n  filter(total_diff &lt; 0) %&gt;% \n  count()\n## # A tibble: 1 × 1\n##       n\n##   &lt;int&gt;\n## 1   234",
+    "crumbs": [
+      "Appendices",
+      "Exercise solutions"
+    ]
+  },
+  {
+    "objectID": "exercise_solutions.html#exercise-5",
+    "href": "exercise_solutions.html#exercise-5",
+    "title": "Exercise solutions",
+    "section": "Exercise 5",
+    "text": "Exercise 5\n\nCreate a new data object containing the 2020 CSP data without the Greater London Authority observation. Name this data frame csp_nolon_2020.\nUsing the csp_nolon_2020 data, create a data visualisation to check the distribution (or shape) of the SFA variable.\nBased on the visualisation above, create a summary table for the SFA variable containing the minimum and maximum, and appropriate measures of the centre/average and spread.",
+    "crumbs": [
+      "Appendices",
+      "Exercise solutions"
+    ]
+  },
+  {
+    "objectID": "exercise_solutions.html#solutions-4",
+    "href": "exercise_solutions.html#solutions-4",
+    "title": "Exercise solutions",
+    "section": "Solutions",
+    "text": "Solutions\n\nCreate a new object using the &lt;- symbol, use filter to remove the duplicate row:\n\n\ncsp_nolon_2020 &lt;- filter(csp_2020, authority != \"Greater London Authority\")\n\n\nHistograms are used to check the distribution of numeric variables:\n\n\nggplot(data = csp_nolon_2020) +\n  geom_histogram(aes(x = sfa_2020))\n\n\n\n\n\n\n\n\n\nThe histogram shows that the sfa_2020 variable is very skewed, therefore the median and IQR are the most appropriate measures of centre and spread:\n\n\nsummarise(csp_nolon_2020,\n          min_sfa = min(sfa_2020),\n          max_sfa = max(sfa_2020),\n          median_sfa = median(sfa_2020),\n          iqr_sfa = IQR(sfa_2020))\n## # A tibble: 1 × 4\n##   min_sfa max_sfa median_sfa iqr_sfa\n##     &lt;dbl&gt;   &lt;dbl&gt;      &lt;dbl&gt;   &lt;dbl&gt;\n## 1       0    470.       4.62    54.7",
+    "crumbs": [
+      "Appendices",
+      "Exercise solutions"
+    ]
+  },
+  {
+    "objectID": "exercise_solutions.html#exercise-6",
+    "href": "exercise_solutions.html#exercise-6",
+    "title": "Exercise solutions",
+    "section": "Exercise 6",
+    "text": "Exercise 6\n\nWhat is the problem with the following code? Fix the code to change the shape of all the points.\n\n\nggplot(csp_nolon_2020) + \n  geom_point(aes(x = sfa_2020, y = ct_total_2020, shape = \"*\"))\n\n\nAdd a line of best fit to the scatterplot showing the relationship between SFA and council tax total (hint: use ?geom_smooth).\nAdd a line of best fit for each region (hint: make each line a different colour).",
+    "crumbs": [
+      "Appendices",
+      "Exercise solutions"
+    ]
+  },
+  {
+    "objectID": "exercise_solutions.html#solutions-5",
+    "href": "exercise_solutions.html#solutions-5",
+    "title": "Exercise solutions",
+    "section": "Solutions",
+    "text": "Solutions\n\nOnly aesthetics determined by variables in the data should lie inside the aes function, the shape argument should be outside of this:\n\n\nggplot(csp_nolon_2020) + \n  geom_point(aes(x = sfa_2020, y = ct_total_2020), shape = \"*\")\n\n\n\n\n\n\n\n\n\nThe function geom_smooth adds a line of best fit, make sure to set method = lm to fit a linear trend:\n\n\nggplot(data = csp_nolon_2020, aes(x = ct_total_2020, y = sfa_2020)) +\n  geom_point() +\n  geom_smooth(method = \"lm\")\n\n\n\n\n\n\n\n\nHint: To reduce repetitive coding, setting aes in the ggplot function applies these to the entire object.\n\nA line of best fit for each group simply requires adding this to the aes function as a colour:\n\n\nggplot(data = csp_nolon_2020, \n       aes(x = ct_total_2020, y = sfa_2020, colour = region)) +\n  geom_point() +\n  geom_smooth(method = \"lm\")",
+    "crumbs": [
+      "Appendices",
+      "Exercise solutions"
+    ]
+  },
+  {
+    "objectID": "exercise_solutions.html#exercise-7",
+    "href": "exercise_solutions.html#exercise-7",
+    "title": "Exercise solutions",
+    "section": "Exercise 7",
+    "text": "Exercise 7\nUse an appropriate data visualisation to show how the total spend in each local authority has changed over the years between 2015 and 2020. Choose a visualisation that shows these trends over time and allows us to compare them between regions.",
+    "crumbs": [
+      "Appendices",
+      "Exercise solutions"
+    ]
+  },
+  {
+    "objectID": "exercise_solutions.html#solution",
+    "href": "exercise_solutions.html#solution",
+    "title": "Exercise solutions",
+    "section": "Solution",
+    "text": "Solution\nThe most appropriate plot to show a change in variable over time is a line graph (with year on the x-axis and spend on the y-axis). To compare these between regions, we could set the colour of these lines, but as there are so many local authorities, this would overload the graph and make it hard to compare. As an alternative, we can facet this graph by region to show the line graphs on the same scale on the same output.\nBe sure to set appropriate axis labels, font sizes, etc.\n\n# Remove the Greater London Authority duplicate\ncsp_long2 %&gt;% \n  filter(authority != \"Greater London Authority\") %&gt;% \n  ggplot() + \n  # Need to add a group to know what each line represents\n  geom_line(aes(x = year, y = total_spend, group = ons_code,\n                # OPTIONAL: colour by region to make it prettier!\n                colour = region)) +\n  facet_wrap( ~ region) + \n  labs(x = \"Year\", y = \"Total core spending power (millions)\") +\n  # Add theme_light to make the background a nicer colour\n  theme_light() + \n  # Rotate the x-axis labels to avoid overlap\n  theme(axis.text.x.bottom = element_text(angle = 45),\n        # Remove the legend (not needed, we have labels on the facets)\n        legend.position = \"none\")",
+    "crumbs": [
+      "Appendices",
+      "Exercise solutions"
+    ]
+  },
+  {
+    "objectID": "exercise_solutions.html#exercise-8",
+    "href": "exercise_solutions.html#exercise-8",
+    "title": "Exercise solutions",
+    "section": "Exercise 8",
+    "text": "Exercise 8\nCreate an RMarkdown file that creates a html report describing the trends in core spending power in English local authorities between 2015 and 2020. Your report should include:\n\nA summary table of the total spending per year per region\nA suitable visualisation showing how the total annual spending has changed over this period, compared between regions\nA short interpretation of the table and visualisation\n\nNote: You are not expected to be an expert in this data! Interpret these outputs as you would any other numeric variable measured over time.",
+    "crumbs": [
+      "Appendices",
+      "Exercise solutions"
+    ]
+  },
+  {
+    "objectID": "exercise_solutions.html#solutions-6",
+    "href": "exercise_solutions.html#solutions-6",
+    "title": "Exercise solutions",
+    "section": "Solutions",
+    "text": "Solutions\nThere are many different correct solutions to this exercise. All RMarkdown files should begin with a YAML header similar to the one below:\n---\ntitle: \"Core spending power in English local authorities, 2015 - 2020\"\nauthor: Sophie Lee\noutput: html_document\n---  \nNext, you may have a code chunk that sets up the global chunk options, loads any packages you needed, and loads the data that we will be using for the report. For example:\n```{r setup, include = FALSE}\n# Set global chunk options to not show R code or messages\nknitr::opts_chunk$set(echo = FALSE, message = FALSE)\n\n# Load the tidyverse package\nlibrary(tidyverse)\n\n# Load the long dataset\ncsp_long2 &lt;- read_csv(\"data/CSP_long_201520.csv\")\n```\nYou may have began with an introduction using RMarkdown syntax:\n# Introduction\nThe following report will investigate the trends in core spending power \nacross England between 2015 and 2020. All values are give in millions \nof pounds. \n\nThe core spending power was made up of the following provisions:\n\n- Settlement funding assessment (SFA)\n- Compensation for under-indexing the business rates multipliers\n- council tax \n- New homes bonus\n- New homes bonus returned funding\n- Rural Services Delivery Grant (RSDG)\nFollowed by a summary table, created using summarise and displayed using kable:\n# Total core spending power by region\nBelow is a summary table containing the total core spending power per year \nper region, given in millions of £:\n\n```{r csp total summary table}\ncsp_long2 %&gt;% \n  group_by(region, year) %&gt;% \n  summarise(min_spend = min(total_spend),\n            max_spend = max(total_spend),\n            median_spend = median(total_spend),\n            iqr_spend = IQR(total_spend)) %&gt;% \n  ungroup() %&gt;% \n  knitr::kable(.,\n               col.names = c(\"Region\", \"Year\", \"Minimum\", \n                             \"Maximum\", \"Median\", \"IQR\"))\n```\nThen an additional code chunk producing a faceted line chart, similar to the one in Exercise 7:\n```{r}\ncsp_long2 %&gt;% \n  filter(authority != \"Greater London Authority\") %&gt;% \n  ggplot() + \n  geom_line(aes(x = year, y = total_spend, group = ons_code,\n                colour = region)) +\n  facet_wrap( ~ region) + \n  labs(x = \"Year\", y = \"Total core spending power (millions)\") +\n  theme_light() + \n  theme(axis.text.x.bottom = element_text(angle = 45),\n        legend.position = \"none\")\n\n```",
+    "crumbs": [
+      "Appendices",
+      "Exercise solutions"
     ]
   }
 ]
\ No newline at end of file
diff --git a/_book/session1_notes.html b/_book/session1_notes.html
index f04cdda..8162326 100644
--- a/_book/session1_notes.html
+++ b/_book/session1_notes.html
@@ -7,7 +7,7 @@
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
 
-<title>Introduction to R with Tidyverse - 2&nbsp; Session 1: Introduction to R and RStudio</title>
+<title>Introduction to R with Tidyverse - 1&nbsp; Introduction to R and RStudio</title>
 <style>
 code{white-space: pre-wrap;}
 span.smallcaps{font-variant: small-caps;}
@@ -165,14 +165,17 @@
   <li class="nav-item compact">
     <a class="nav-link" href="https://x.com/SophieStats10"> <i class="bi bi-twitter" role="img" aria-label="Twitter">
 </i> 
+<span class="menu-text"></span></a>
+  </li>  
+  <li class="nav-item compact">
+    <a class="nav-link" href="https://buymeacoffee.com/sophie_a_lee"> <i class="bi bi-cup-hot" role="img" aria-label="Buy me a coffee">
+</i> 
 <span class="menu-text"></span></a>
   </li>  
 </ul>
           </div> <!-- /navcollapse -->
-          <div class="quarto-navbar-tools tools-wide">
-    <a href="https://github.com/sophie-a-lee/Introduction_R_Tidyverse_course" title="Source Code" class="quarto-navigation-tool px-1" aria-label="Source Code"><i class="bi bi-github"></i></a>
+          <div class="quarto-navbar-tools">
     <a href="./Introduction-to-R-with-Tidyverse.pdf" title="Download PDF" class="quarto-navigation-tool px-1" aria-label="Download PDF"><i class="bi bi-file-pdf"></i></a>
-    <a href="https://twitter.com/intent/tweet?url=|url|" title="Twitter" class="quarto-navigation-tool px-1" aria-label="Twitter"><i class="bi bi-twitter"></i></a>
 </div>
       </div> <!-- /container-fluid -->
     </nav>
@@ -181,7 +184,7 @@
       <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
         <i class="bi bi-layout-text-sidebar-reverse"></i>
       </button>
-        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./session1_notes.html"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Session 1: Introduction to R and RStudio</span></a></li></ol></nav>
+        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./session1_notes.html"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction to R and RStudio</span></a></li></ol></nav>
         <a class="flex-grow-1" role="button" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">      
         </a>
     </div>
@@ -196,39 +199,62 @@
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./index.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Welcome!</span></span></a>
+ <span class="menu-text">Welcome!</span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session1_notes.html" class="sidebar-item-text sidebar-link active">
- <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Session 1: Introduction to R and RStudio</span></span></a>
+ <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction to R and RStudio</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session2_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Session 2: Introduction to tidyverse and data wrangling</span></span></a>
+ <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Introduction to tidyverse and data wrangling</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session3_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Session 3: Data preparation and manipulation</span></span></a>
+ <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Data preparation and manipulation</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session4_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Session 4: Data visualisation with ggplot2</span></span></a>
+ <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Data visualisation with ggplot2</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session5_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">6</span>&nbsp; <span class="chapter-title">Reproducible research with RMarkdown</span></span></a>
+ <span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Reproducible research with RMarkdown</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true">
+ <span class="menu-text">Appendices</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">  
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./data_description.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Data description</span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./exercise_solutions.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Exercise solutions</span></a>
   </div>
 </li>
+      </ul>
+  </li>
     </ul>
     </div>
 </nav>
@@ -239,22 +265,22 @@
     <h2 id="toc-title">Table of contents</h2>
    
   <ul>
-  <li><a href="#the-rstudio-interface" id="toc-the-rstudio-interface" class="nav-link active" data-scroll-target="#the-rstudio-interface"><span class="header-section-number">2.1</span> The RStudio interface</a>
+  <li><a href="#the-rstudio-interface" id="toc-the-rstudio-interface" class="nav-link active" data-scroll-target="#the-rstudio-interface"><span class="header-section-number">1.1</span> The RStudio interface</a>
   <ul class="collapse">
   <li><a href="#exercise-1" id="toc-exercise-1" class="nav-link" data-scroll-target="#exercise-1">Exercise 1</a></li>
   </ul></li>
-  <li><a href="#r-syntax" id="toc-r-syntax" class="nav-link" data-scroll-target="#r-syntax"><span class="header-section-number">2.2</span> R syntax</a>
+  <li><a href="#r-syntax" id="toc-r-syntax" class="nav-link" data-scroll-target="#r-syntax"><span class="header-section-number">1.2</span> R syntax</a>
   <ul class="collapse">
   <li><a href="#exercise-2" id="toc-exercise-2" class="nav-link" data-scroll-target="#exercise-2">Exercise 2</a></li>
   </ul></li>
-  <li><a href="#r-objects-and-functions" id="toc-r-objects-and-functions" class="nav-link" data-scroll-target="#r-objects-and-functions"><span class="header-section-number">2.3</span> R objects and functions</a>
+  <li><a href="#r-objects-and-functions" id="toc-r-objects-and-functions" class="nav-link" data-scroll-target="#r-objects-and-functions"><span class="header-section-number">1.3</span> R objects and functions</a>
   <ul class="collapse">
-  <li><a href="#objects" id="toc-objects" class="nav-link" data-scroll-target="#objects"><span class="header-section-number">2.3.1</span> Objects</a></li>
-  <li><a href="#functions" id="toc-functions" class="nav-link" data-scroll-target="#functions"><span class="header-section-number">2.3.2</span> Functions</a></li>
-  <li><a href="#help-files" id="toc-help-files" class="nav-link" data-scroll-target="#help-files"><span class="header-section-number">2.3.3</span> Help files</a></li>
-  <li><a href="#error-and-warning-messages" id="toc-error-and-warning-messages" class="nav-link" data-scroll-target="#error-and-warning-messages"><span class="header-section-number">2.3.4</span> Error and warning messages</a></li>
-  <li><a href="#cleaning-the-environment" id="toc-cleaning-the-environment" class="nav-link" data-scroll-target="#cleaning-the-environment"><span class="header-section-number">2.3.5</span> Cleaning the environment</a></li>
-  <li><a href="#r-packages" id="toc-r-packages" class="nav-link" data-scroll-target="#r-packages"><span class="header-section-number">2.3.6</span> R packages</a></li>
+  <li><a href="#objects" id="toc-objects" class="nav-link" data-scroll-target="#objects"><span class="header-section-number">1.3.1</span> Objects</a></li>
+  <li><a href="#functions" id="toc-functions" class="nav-link" data-scroll-target="#functions"><span class="header-section-number">1.3.2</span> Functions</a></li>
+  <li><a href="#help-files" id="toc-help-files" class="nav-link" data-scroll-target="#help-files"><span class="header-section-number">1.3.3</span> Help files</a></li>
+  <li><a href="#error-and-warning-messages" id="toc-error-and-warning-messages" class="nav-link" data-scroll-target="#error-and-warning-messages"><span class="header-section-number">1.3.4</span> Error and warning messages</a></li>
+  <li><a href="#cleaning-the-environment" id="toc-cleaning-the-environment" class="nav-link" data-scroll-target="#cleaning-the-environment"><span class="header-section-number">1.3.5</span> Cleaning the environment</a></li>
+  <li><a href="#r-packages" id="toc-r-packages" class="nav-link" data-scroll-target="#r-packages"><span class="header-section-number">1.3.6</span> R packages</a></li>
   </ul></li>
   </ul>
 </nav>
@@ -264,7 +290,7 @@ <h2 id="toc-title">Table of contents</h2>
 
 <header id="title-block-header" class="quarto-title-block default">
 <div class="quarto-title">
-<h1 class="title"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Session 1: Introduction to R and RStudio</span></h1>
+<h1 class="title"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction to R and RStudio</span></h1>
 </div>
 
 
@@ -281,14 +307,14 @@ <h1 class="title"><span class="chapter-number">2</span>&nbsp; <span class="chapt
 </header>
 
 
-<section id="the-rstudio-interface" class="level2" data-number="2.1">
-<h2 data-number="2.1" class="anchored" data-anchor-id="the-rstudio-interface"><span class="header-section-number">2.1</span> The RStudio interface</h2>
+<section id="the-rstudio-interface" class="level2" data-number="1.1">
+<h2 data-number="1.1" class="anchored" data-anchor-id="the-rstudio-interface"><span class="header-section-number">1.1</span> The RStudio interface</h2>
 <p>There are a number of software packages based on the R programming language aimed at making writing and running analyses easier for users. They all run R in the background but look different and contain different features.</p>
 <p><strong>RStudio</strong> has been chosen for this course as it allows users to create script files, allowing code to be re-run, edited, and shared easily. RStudio also provides tools to help easily identify errors in R code, integrates help documentation into the main console and uses colour-coding to help read code at a glance.</p>
 <p>Before installing RStudio, we must ensure that R is downloaded onto the machine. R is available to download for free for Windows, Mac, or Linux via the <a href="https://cran.r-project.org/"><strong>CRAN</strong></a> website.</p>
 <p>Rstudio is also free to download from the <a href="https://posit.co/"><strong>Posit</strong></a> website.</p>
-<section id="the-rstudio-console-window" class="level4" data-number="2.1.0.1">
-<h4 data-number="2.1.0.1" class="anchored" data-anchor-id="the-rstudio-console-window"><span class="header-section-number">2.1.0.1</span> The RStudio console window</h4>
+<section id="the-rstudio-console-window" class="level4" data-number="1.1.0.1">
+<h4 data-number="1.1.0.1" class="anchored" data-anchor-id="the-rstudio-console-window"><span class="header-section-number">1.1.0.1</span> The RStudio console window</h4>
 <p>The screenshot below shows the RStudio interface which comprises of four windows:</p>
 <div class="quarto-figure quarto-figure-center">
 <figure class="figure">
@@ -346,8 +372,8 @@ <h3 class="unnumbered anchored" data-anchor-id="exercise-1">Exercise 1</h3>
 </ol>
 </section>
 </section>
-<section id="r-syntax" class="level2" data-number="2.2">
-<h2 data-number="2.2" class="anchored" data-anchor-id="r-syntax"><span class="header-section-number">2.2</span> R syntax</h2>
+<section id="r-syntax" class="level2" data-number="1.2">
+<h2 data-number="1.2" class="anchored" data-anchor-id="r-syntax"><span class="header-section-number">1.2</span> R syntax</h2>
 <p>All analyses within R are carried out using <strong>syntax</strong>, the R programming language. It is important to note that R is case-sensitive, so always ensure that you use the correct combination of upper and lower case letters when running functions or calling objects.</p>
 <p>Any text written in the R console or script file can be treated the same as text from other documents or programmes: text can be highlighted, copied and pasted to make coding more efficient.</p>
 <p>When creating script files, it is important to ensure they are clear and easy to read. Comments can be added to script files using the <code>#</code> symbol. R will ignore any text following the <code>#</code> on the same line.</p>
@@ -427,10 +453,10 @@ <h3 class="unnumbered anchored" data-anchor-id="exercise-2">Exercise 2</h3>
 <p>When you have finished this exercise, select the entire script file (using <code>ctrl + a</code> on windows or <code>Command + a</code> on Mac) and run it to ensure there are no errors in the code.</p>
 </section>
 </section>
-<section id="r-objects-and-functions" class="level2" data-number="2.3">
-<h2 data-number="2.3" class="anchored" data-anchor-id="r-objects-and-functions"><span class="header-section-number">2.3</span> R objects and functions</h2>
-<section id="objects" class="level3" data-number="2.3.1">
-<h3 data-number="2.3.1" class="anchored" data-anchor-id="objects"><span class="header-section-number">2.3.1</span> Objects</h3>
+<section id="r-objects-and-functions" class="level2" data-number="1.3">
+<h2 data-number="1.3" class="anchored" data-anchor-id="r-objects-and-functions"><span class="header-section-number">1.3</span> R objects and functions</h2>
+<section id="objects" class="level3" data-number="1.3.1">
+<h3 data-number="1.3.1" class="anchored" data-anchor-id="objects"><span class="header-section-number">1.3.1</span> Objects</h3>
 <p>One of the main advantages to using R over other software packages such as SPSS is that more than one dataset can be accessed at the same time. A collection of data stored in any format within the R session is known as an <strong>object</strong>. Objects can include single numbers, single variables, entire datasets, lists of datasets, or even tables and graphs.</p>
 <div class="callout callout-style-default callout-tip callout-titled">
 <div class="callout-header d-flex align-content-center">
@@ -479,12 +505,12 @@ <h3 data-number="2.3.1" class="anchored" data-anchor-id="objects"><span class="h
 <p>The <code>[1]</code> that appears at the beginning of each output line indicates that this is the first element in the object. If there were two lines then the second line would start with the number of that element in square brackets.</p>
 <p>For example, if we had an object with 6 elements and when called the first line contained the first 5 elements, each line would begin with <code>[1]</code> and <code>[6]</code> respectively.</p>
 </section>
-<section id="functions" class="level3" data-number="2.3.2">
-<h3 data-number="2.3.2" class="anchored" data-anchor-id="functions"><span class="header-section-number">2.3.2</span> Functions</h3>
+<section id="functions" class="level3" data-number="1.3.2">
+<h3 data-number="1.3.2" class="anchored" data-anchor-id="functions"><span class="header-section-number">1.3.2</span> Functions</h3>
 <p><strong>Functions</strong> are built-in commands that allow R users to run analyses. All functions require the definition of arguments within round brackets <code>()</code>. Each function requires different information and has different arguments that can be used to customise the analysis. A detailed list of these arguments and a description of the function can be found in the function’s associated <strong>help file</strong>.</p>
 </section>
-<section id="help-files" class="level3" data-number="2.3.3">
-<h3 data-number="2.3.3" class="anchored" data-anchor-id="help-files"><span class="header-section-number">2.3.3</span> Help files</h3>
+<section id="help-files" class="level3" data-number="1.3.3">
+<h3 data-number="1.3.3" class="anchored" data-anchor-id="help-files"><span class="header-section-number">1.3.3</span> Help files</h3>
 <p>Each function that exists within R has an associated help file. RStudio does not require an internet connection to access these help files if the function is available in the current session of R.</p>
 <p>To retrieve help files, enter <code>?</code> followed by the function name into the console window, e.g <code>?mean</code>. The help file will appear in window D of the interface shown in the introduction.</p>
 <p>Help files contain the following information:</p>
@@ -498,8 +524,8 @@ <h3 data-number="2.3.3" class="anchored" data-anchor-id="help-files"><span class
 <li>Examples: example code with applications of the function</li>
 </ul>
 </section>
-<section id="error-and-warning-messages" class="level3" data-number="2.3.4">
-<h3 data-number="2.3.4" class="anchored" data-anchor-id="error-and-warning-messages"><span class="header-section-number">2.3.4</span> Error and warning messages</h3>
+<section id="error-and-warning-messages" class="level3" data-number="1.3.4">
+<h3 data-number="1.3.4" class="anchored" data-anchor-id="error-and-warning-messages"><span class="header-section-number">1.3.4</span> Error and warning messages</h3>
 <p>Where a function or object has not been correctly specified, or their is some mistake in the syntax that has been sent to the console, R will return an error message. These messages are generally informative and include the location of the error.</p>
 <p>The most common errors include misspelling functions or objects:</p>
 <div class="cell">
@@ -519,8 +545,8 @@ <h3 data-number="2.3.4" class="anchored" data-anchor-id="error-and-warning-messa
 <p>When working within the R console, if an incomplete command is run, a <code>+</code> symbol will appear in the console, rather than the usual <code>&gt;</code>. This indicates that R expects you to keep writing the previous code. To overcome this issue, either finish the command on the next line of the console, or press the <code>esc</code> button on your keyboard to start from scratch.</p>
 <p>One of the benefits of using RStudio rather than the basic R package is that it will suggest object or function names after typing the first few letters. This avoids spelling mistakes and accidental errors when running code. To accept the suggestion, either click the correct choice with your mouse or use the <code>tab</code> button on your keyboard.</p>
 </section>
-<section id="cleaning-the-environment" class="level3" data-number="2.3.5">
-<h3 data-number="2.3.5" class="anchored" data-anchor-id="cleaning-the-environment"><span class="header-section-number">2.3.5</span> Cleaning the environment</h3>
+<section id="cleaning-the-environment" class="level3" data-number="1.3.5">
+<h3 data-number="1.3.5" class="anchored" data-anchor-id="cleaning-the-environment"><span class="header-section-number">1.3.5</span> Cleaning the environment</h3>
 <p>To remove objects from the RStudio environment, we can use the <code>rm</code> function. This can be combined with the <code>ls()</code> function, which lists all objects in the environment, to remove all objects currently loaded:</p>
 <div class="cell">
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rm</span>(<span class="at">list =</span> <span class="fu">ls</span>())</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -539,11 +565,11 @@ <h3 data-number="2.3.5" class="anchored" data-anchor-id="cleaning-the-environmen
 </div>
 </div>
 </section>
-<section id="r-packages" class="level3" data-number="2.3.6">
-<h3 data-number="2.3.6" class="anchored" data-anchor-id="r-packages"><span class="header-section-number">2.3.6</span> R packages</h3>
+<section id="r-packages" class="level3" data-number="1.3.6">
+<h3 data-number="1.3.6" class="anchored" data-anchor-id="r-packages"><span class="header-section-number">1.3.6</span> R packages</h3>
 <p>R packages are a collection of functions and datasets developed by R users that expand existing R capabilities or add completely new ones. Packages allow users to apply the most up-to-date methods shortly after they are developed, unlike other statistical software packages that require an entirely new version.</p>
-<section id="installing-packages-from-cran" class="level4" data-number="2.3.6.1">
-<h4 data-number="2.3.6.1" class="anchored" data-anchor-id="installing-packages-from-cran"><span class="header-section-number">2.3.6.1</span> Installing packages from CRAN</h4>
+<section id="installing-packages-from-cran" class="level4" data-number="1.3.6.1">
+<h4 data-number="1.3.6.1" class="anchored" data-anchor-id="installing-packages-from-cran"><span class="header-section-number">1.3.6.1</span> Installing packages from CRAN</h4>
 <p>The quickest way to install a package in R is by using the <code>install.packages</code> function. This sends RStudio to the online repository of tested and verified R packages (known as <a href="https://cran.r-project.org/">CRAN</a>) and downloads the package files onto the machine you are currently working from in temporary files. Ensure that the package you wish to install is spelled correctly and surrounded by <code>''</code>.</p>
 <div class="callout callout-style-default callout-warning callout-titled">
 <div class="callout-header d-flex align-content-center">
@@ -560,8 +586,8 @@ <h4 data-number="2.3.6.1" class="anchored" data-anchor-id="installing-packages-f
 </div>
 </div>
 </section>
-<section id="loading-packages-to-an-r-session" class="level4" data-number="2.3.6.2">
-<h4 data-number="2.3.6.2" class="anchored" data-anchor-id="loading-packages-to-an-r-session"><span class="header-section-number">2.3.6.2</span> Loading packages to an R session</h4>
+<section id="loading-packages-to-an-r-session" class="level4" data-number="1.3.6.2">
+<h4 data-number="1.3.6.2" class="anchored" data-anchor-id="loading-packages-to-an-r-session"><span class="header-section-number">1.3.6.2</span> Loading packages to an R session</h4>
 <p>Every time a new session of RStudio is opened, packages must be reloaded. To load a package into R (and gain access to the associated functions and data), use the <code>library</code> function.</p>
 <p>Loading a package does not require an internet connection, but will only work if the package has already been installed and saved onto the computer you are working from. If you are unsure, use the function <code>installed.packages</code> to return a list of all packages that are loaded onto the machine you are working from.</p>
 <div class="callout callout-style-default callout-tip callout-titled">
@@ -996,12 +1022,12 @@ <h4 data-number="2.3.6.2" class="anchored" data-anchor-id="loading-packages-to-a
 <nav class="page-navigation">
   <div class="nav-page nav-page-previous">
       <a href="./index.html" class="pagination-link" aria-label="Welcome!">
-        <i class="bi bi-arrow-left-short"></i> <span class="nav-page-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Welcome!</span></span>
+        <i class="bi bi-arrow-left-short"></i> <span class="nav-page-text">Welcome!</span>
       </a>          
   </div>
   <div class="nav-page nav-page-next">
-      <a href="./session2_notes.html" class="pagination-link" aria-label="Session 2: Introduction to tidyverse and data wrangling">
-        <span class="nav-page-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Session 2: Introduction to tidyverse and data wrangling</span></span> <i class="bi bi-arrow-right-short"></i>
+      <a href="./session2_notes.html" class="pagination-link" aria-label="Introduction to tidyverse and data wrangling">
+        <span class="nav-page-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Introduction to tidyverse and data wrangling</span></span> <i class="bi bi-arrow-right-short"></i>
       </a>
   </div>
 </nav>
diff --git a/_book/session2_notes.html b/_book/session2_notes.html
index 3f4a623..94b5a42 100644
--- a/_book/session2_notes.html
+++ b/_book/session2_notes.html
@@ -7,7 +7,7 @@
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
 
-<title>Introduction to R with Tidyverse - 3&nbsp; Session 2: Introduction to tidyverse and data wrangling</title>
+<title>Introduction to R with Tidyverse - 2&nbsp; Introduction to tidyverse and data wrangling</title>
 <style>
 code{white-space: pre-wrap;}
 span.smallcaps{font-variant: small-caps;}
@@ -136,14 +136,17 @@
   <li class="nav-item compact">
     <a class="nav-link" href="https://x.com/SophieStats10"> <i class="bi bi-twitter" role="img" aria-label="Twitter">
 </i> 
+<span class="menu-text"></span></a>
+  </li>  
+  <li class="nav-item compact">
+    <a class="nav-link" href="https://buymeacoffee.com/sophie_a_lee"> <i class="bi bi-cup-hot" role="img" aria-label="Buy me a coffee">
+</i> 
 <span class="menu-text"></span></a>
   </li>  
 </ul>
           </div> <!-- /navcollapse -->
-          <div class="quarto-navbar-tools tools-wide">
-    <a href="https://github.com/sophie-a-lee/Introduction_R_Tidyverse_course" title="Source Code" class="quarto-navigation-tool px-1" aria-label="Source Code"><i class="bi bi-github"></i></a>
+          <div class="quarto-navbar-tools">
     <a href="./Introduction-to-R-with-Tidyverse.pdf" title="Download PDF" class="quarto-navigation-tool px-1" aria-label="Download PDF"><i class="bi bi-file-pdf"></i></a>
-    <a href="https://twitter.com/intent/tweet?url=|url|" title="Twitter" class="quarto-navigation-tool px-1" aria-label="Twitter"><i class="bi bi-twitter"></i></a>
 </div>
       </div> <!-- /container-fluid -->
     </nav>
@@ -152,7 +155,7 @@
       <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
         <i class="bi bi-layout-text-sidebar-reverse"></i>
       </button>
-        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./session2_notes.html"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Session 2: Introduction to tidyverse and data wrangling</span></a></li></ol></nav>
+        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./session2_notes.html"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Introduction to tidyverse and data wrangling</span></a></li></ol></nav>
         <a class="flex-grow-1" role="button" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">      
         </a>
     </div>
@@ -167,39 +170,62 @@
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./index.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Welcome!</span></span></a>
+ <span class="menu-text">Welcome!</span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session1_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Session 1: Introduction to R and RStudio</span></span></a>
+ <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction to R and RStudio</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session2_notes.html" class="sidebar-item-text sidebar-link active">
- <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Session 2: Introduction to tidyverse and data wrangling</span></span></a>
+ <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Introduction to tidyverse and data wrangling</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session3_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Session 3: Data preparation and manipulation</span></span></a>
+ <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Data preparation and manipulation</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session4_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Session 4: Data visualisation with ggplot2</span></span></a>
+ <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Data visualisation with ggplot2</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session5_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">6</span>&nbsp; <span class="chapter-title">Reproducible research with RMarkdown</span></span></a>
+ <span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Reproducible research with RMarkdown</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true">
+ <span class="menu-text">Appendices</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">  
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./data_description.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Data description</span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./exercise_solutions.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Exercise solutions</span></a>
   </div>
 </li>
+      </ul>
+  </li>
     </ul>
     </div>
 </nav>
@@ -210,17 +236,17 @@
     <h2 id="toc-title">Table of contents</h2>
    
   <ul>
-  <li><a href="#opening-and-exploring-data" id="toc-opening-and-exploring-data" class="nav-link active" data-scroll-target="#opening-and-exploring-data"><span class="header-section-number">3.1</span> Opening and exploring data</a>
+  <li><a href="#opening-and-exploring-data" id="toc-opening-and-exploring-data" class="nav-link active" data-scroll-target="#opening-and-exploring-data"><span class="header-section-number">2.1</span> Opening and exploring data</a>
   <ul class="collapse">
-  <li><a href="#styles-of-r-coding" id="toc-styles-of-r-coding" class="nav-link" data-scroll-target="#styles-of-r-coding"><span class="header-section-number">3.1.1</span> Styles of R coding</a></li>
-  <li><a href="#the-working-directory" id="toc-the-working-directory" class="nav-link" data-scroll-target="#the-working-directory"><span class="header-section-number">3.1.2</span> The working directory</a></li>
+  <li><a href="#styles-of-r-coding" id="toc-styles-of-r-coding" class="nav-link" data-scroll-target="#styles-of-r-coding"><span class="header-section-number">2.1.1</span> Styles of R coding</a></li>
+  <li><a href="#the-working-directory" id="toc-the-working-directory" class="nav-link" data-scroll-target="#the-working-directory"><span class="header-section-number">2.1.2</span> The working directory</a></li>
   </ul></li>
-  <li><a href="#data-input" id="toc-data-input" class="nav-link" data-scroll-target="#data-input"><span class="header-section-number">3.2</span> 4.3 Data input</a>
+  <li><a href="#data-input" id="toc-data-input" class="nav-link" data-scroll-target="#data-input"><span class="header-section-number">2.2</span> 4.3 Data input</a>
   <ul class="collapse">
-  <li><a href="#selecting-variables" id="toc-selecting-variables" class="nav-link" data-scroll-target="#selecting-variables"><span class="header-section-number">3.2.1</span> Selecting variables</a></li>
-  <li><a href="#filtering-data" id="toc-filtering-data" class="nav-link" data-scroll-target="#filtering-data"><span class="header-section-number">3.2.2</span> Filtering data</a></li>
-  <li><a href="#pipes" id="toc-pipes" class="nav-link" data-scroll-target="#pipes"><span class="header-section-number">3.2.3</span> Pipes</a></li>
-  <li><a href="#creating-new-variables" id="toc-creating-new-variables" class="nav-link" data-scroll-target="#creating-new-variables"><span class="header-section-number">3.2.4</span> Creating new variables</a></li>
+  <li><a href="#selecting-variables" id="toc-selecting-variables" class="nav-link" data-scroll-target="#selecting-variables"><span class="header-section-number">2.2.1</span> Selecting variables</a></li>
+  <li><a href="#filtering-data" id="toc-filtering-data" class="nav-link" data-scroll-target="#filtering-data"><span class="header-section-number">2.2.2</span> Filtering data</a></li>
+  <li><a href="#pipes" id="toc-pipes" class="nav-link" data-scroll-target="#pipes"><span class="header-section-number">2.2.3</span> Pipes</a></li>
+  <li><a href="#creating-new-variables" id="toc-creating-new-variables" class="nav-link" data-scroll-target="#creating-new-variables"><span class="header-section-number">2.2.4</span> Creating new variables</a></li>
   <li><a href="#exercise-3" id="toc-exercise-3" class="nav-link" data-scroll-target="#exercise-3">Exercise 3</a></li>
   </ul></li>
   </ul>
@@ -231,7 +257,7 @@ <h2 id="toc-title">Table of contents</h2>
 
 <header id="title-block-header" class="quarto-title-block default">
 <div class="quarto-title">
-<h1 class="title"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Session 2: Introduction to tidyverse and data wrangling</span></h1>
+<h1 class="title"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Introduction to tidyverse and data wrangling</span></h1>
 </div>
 
 
@@ -248,10 +274,10 @@ <h1 class="title"><span class="chapter-number">3</span>&nbsp; <span class="chapt
 </header>
 
 
-<section id="opening-and-exploring-data" class="level2" data-number="3.1">
-<h2 data-number="3.1" class="anchored" data-anchor-id="opening-and-exploring-data"><span class="header-section-number">3.1</span> Opening and exploring data</h2>
-<section id="styles-of-r-coding" class="level3" data-number="3.1.1">
-<h3 data-number="3.1.1" class="anchored" data-anchor-id="styles-of-r-coding"><span class="header-section-number">3.1.1</span> Styles of R coding</h3>
+<section id="opening-and-exploring-data" class="level2" data-number="2.1">
+<h2 data-number="2.1" class="anchored" data-anchor-id="opening-and-exploring-data"><span class="header-section-number">2.1</span> Opening and exploring data</h2>
+<section id="styles-of-r-coding" class="level3" data-number="2.1.1">
+<h3 data-number="2.1.1" class="anchored" data-anchor-id="styles-of-r-coding"><span class="header-section-number">2.1.1</span> Styles of R coding</h3>
 <p>Up to this point, we have not thought about the style of R coding we will be using. There are different approaches to R coding that we can use, they can be thought of as different dialects of the R programming language.</p>
 <p>The choice of R ‘dialect’ depends on personal preference. Some prefer to use the ‘base R’ approach that does not rely on any packages that may need updating, making it a more stable approach. However, base R can be difficult to read for those not comfortable with coding.</p>
 <div class="cell">
@@ -299,13 +325,13 @@ <h3 data-number="3.1.1" class="anchored" data-anchor-id="styles-of-r-coding"><sp
 </div>
 </div>
 </section>
-<section id="the-working-directory" class="level3" data-number="3.1.2">
-<h3 data-number="3.1.2" class="anchored" data-anchor-id="the-working-directory"><span class="header-section-number">3.1.2</span> The working directory</h3>
+<section id="the-working-directory" class="level3" data-number="2.1.2">
+<h3 data-number="2.1.2" class="anchored" data-anchor-id="the-working-directory"><span class="header-section-number">2.1.2</span> The working directory</h3>
 <p>The working directory is a file path on your computer that R sets as the default location when opening, saving, or exporting documents, files, and graphics. This file path can be specified manually but setting the working directory saves time and makes code more efficient.</p>
 <p>The working directory can be set manually by using the <em>Session -&gt; Set Working Directory -&gt; Change Directory…</em> option from the drop-down menu, or the <code>setwd</code> function. Both options require the directory to be specified each time R is restarted, are sensitive to changes in folders within the file path, and cannot be used when script files are shared between colleagues.</p>
 <p>An alternative approach that overcomes these issues is to create an R project.</p>
-<section id="r-projects" class="level4" data-number="3.1.2.1">
-<h4 data-number="3.1.2.1" class="anchored" data-anchor-id="r-projects"><span class="header-section-number">3.1.2.1</span> R projects</h4>
+<section id="r-projects" class="level4" data-number="2.1.2.1">
+<h4 data-number="2.1.2.1" class="anchored" data-anchor-id="r-projects"><span class="header-section-number">2.1.2.1</span> R projects</h4>
 <p>R projects are files (saved with the <code>.Rproj</code> extension) that keep associated files (including scripts, data, and outputs) grouped together. An R project automatically sets the working directory relative to its current location, which makes collaborative work easier, and avoids issues when a file path is changed.</p>
 <p>Projects are created by using the <em>File -&gt; New project</em> option from the drop-down menu, or using the <img src="images/project_icon.png" class="img-fluid" alt="R project icon"> icon from the top-right corner of the RStudio interface. Existing projects can be opened under the <em>File -&gt; Open project…</em> drop-down menu or using the project icon.</p>
 <p>When creating a new project, we must choose whether we want to create a new directory or use an existing one. Usually, we will have already set up a folder containing data or other documents related to the analysis we plan to carry out. If this is the case, we are using an existing directory and selecting the analysis folder as the project directory.</p>
@@ -325,8 +351,8 @@ <h4 data-number="3.1.2.1" class="anchored" data-anchor-id="r-projects"><span cla
 </section>
 </section>
 </section>
-<section id="data-input" class="level2" data-number="3.2">
-<h2 data-number="3.2" class="anchored" data-anchor-id="data-input"><span class="header-section-number">3.2</span> 4.3 Data input</h2>
+<section id="data-input" class="level2" data-number="2.2">
+<h2 data-number="2.2" class="anchored" data-anchor-id="data-input"><span class="header-section-number">2.2</span> 4.3 Data input</h2>
 <p>To ensure our code is collaborative and reproducible, we should strive to store data in formats that can be used across multiple platforms. One of the best ways to do this is to store data as a comma-delimited file (.csv). CSV files can be opened by a range of different softwares (including R, SPSS, STATA and excel), and base R can be used to open these files without requiring additional packages.</p>
 <p>Before loading files in R, it is essential to check that they are correctly formatted. Data files should only contain one sheet with no pictures or graphics, each row should correspond to a case or observation and each column should correspond to a variable.</p>
 <p>To avoid any errors arising from spelling mistakes, we can use the <code>list.files</code> function to return a list of files and folders from the current working directory. The file names can be copied from the console and pasted into the script file. As the data are saved in a folder within the working directory, we must add the argument <code>path =</code> to specify the folder we want to list files from.</p>
@@ -419,8 +445,8 @@ <h2 data-number="3.2" class="anchored" data-anchor-id="data-input"><span class="
 <span id="cb8-11"><a href="#cb8-11" aria-hidden="true" tabindex="-1"></a><span class="do">## 6 E06000014 York        YH        27.1             1.06          93.8     2.68 </span></span>
 <span id="cb8-12"><a href="#cb8-12" aria-hidden="true" tabindex="-1"></a><span class="do">## # ℹ 2 more variables: nhb_return_2020 &lt;dbl&gt;, rsdg_2020 &lt;dbl&gt;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<section id="selecting-variables" class="level3" data-number="3.2.1">
-<h3 data-number="3.2.1" class="anchored" data-anchor-id="selecting-variables"><span class="header-section-number">3.2.1</span> Selecting variables</h3>
+<section id="selecting-variables" class="level3" data-number="2.2.1">
+<h3 data-number="2.2.1" class="anchored" data-anchor-id="selecting-variables"><span class="header-section-number">2.2.1</span> Selecting variables</h3>
 <p>Often, our analysis will not require every variable in a downloaded dataset, and we may wish to create a smaller analysis tibble. We may also wish to select individual variables from the tibble to apply functions to them without including the entire dataset.</p>
 <p>To select one or more variable and return them as a new tibble, we can use the <code>select</code> function from tidyverse’s <code>dplyr</code> package.</p>
 <p>For example, if we wanted to return the new homes bonus (<code>nhb</code>) for each local authority (the seventh column of the dataset), we can either <code>select</code> this based on the variable name or its location in the object:</p>
@@ -550,8 +576,8 @@ <h3 data-number="3.2.1" class="anchored" data-anchor-id="selecting-variables"><s
 </div>
 </div>
 </section>
-<section id="filtering-data" class="level3" data-number="3.2.2">
-<h3 data-number="3.2.2" class="anchored" data-anchor-id="filtering-data"><span class="header-section-number">3.2.2</span> Filtering data</h3>
+<section id="filtering-data" class="level3" data-number="2.2.2">
+<h3 data-number="2.2.2" class="anchored" data-anchor-id="filtering-data"><span class="header-section-number">2.2.2</span> Filtering data</h3>
 <p>The <code>filter</code> function, from tidyverse’s <code>dplyr</code> package allows us to return subgroups of the data based on conditional statements. These conditional statements can include mathematical operators, e.g.&nbsp;<code>&lt;=</code> (less than or equal to), <code>==</code> (is equal to), and <code>!=</code> (is not equal to), or can be based on conditional functions, e.g.&nbsp;<code>is.na(variable)</code> (is missing), <code>between(a, b)</code> (number lies between a and b).</p>
 <p>A more comprehensive list of conditional statements can be found in the help file using <code>?filter</code>.</p>
 <p>For example, to return the core spending power for local authorities in the North West region of England, we use the following:</p>
@@ -594,8 +620,8 @@ <h3 data-number="3.2.2" class="anchored" data-anchor-id="filtering-data"><span c
 <span id="cb16-16"><a href="#cb16-16" aria-hidden="true" tabindex="-1"></a><span class="do">## # ℹ 2 more variables: nhb_return_2020 &lt;dbl&gt;, rsdg_2020 &lt;dbl&gt;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 </section>
-<section id="pipes" class="level3" data-number="3.2.3">
-<h3 data-number="3.2.3" class="anchored" data-anchor-id="pipes"><span class="header-section-number">3.2.3</span> Pipes</h3>
+<section id="pipes" class="level3" data-number="2.2.3">
+<h3 data-number="2.2.3" class="anchored" data-anchor-id="pipes"><span class="header-section-number">2.2.3</span> Pipes</h3>
 <p>When creating an analysis-ready dataset, we often want to combine functions such as <code>select</code> and <code>filter</code>. Previously, these would need to be carried out separately and a new object would need to be created or overwritten at each step, clogging up the environment.</p>
 <p>In tidyverse, we combine multiple functions into a single process by using the ‘pipe’ symbol <code>%&gt;%</code>, which is read as ‘and then’ within the code.</p>
 <div class="callout callout-style-default callout-note callout-titled">
@@ -661,8 +687,8 @@ <h3 data-number="3.2.3" class="anchored" data-anchor-id="pipes"><span class="hea
 <span id="cb18-10"><a href="#cb18-10" aria-hidden="true" tabindex="-1"></a><span class="do">## 1    46</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 </section>
-<section id="creating-new-variables" class="level3" data-number="3.2.4">
-<h3 data-number="3.2.4" class="anchored" data-anchor-id="creating-new-variables"><span class="header-section-number">3.2.4</span> Creating new variables</h3>
+<section id="creating-new-variables" class="level3" data-number="2.2.4">
+<h3 data-number="2.2.4" class="anchored" data-anchor-id="creating-new-variables"><span class="header-section-number">2.2.4</span> Creating new variables</h3>
 <p>The function <code>mutate</code> from tidyverse’s <code>dplyr</code> package allows us to add new variables to a dataset, either by manually specifying them or by creating them from existing variables. We can add multiple variables within the same function, separating each with a comma <code>,</code>.</p>
 <p>For example, we can create a new variables with the squared settlement funding assessment (<code>sfa_2020</code>), and another that recodes the council tax variable (<code>ct_total_2020</code>) into a categorical variable with three levels (low: below £5 million, medium: between £5 million and £15 million, and high: above £15 million):</p>
 <div class="cell">
@@ -1156,13 +1182,13 @@ <h3 class="unnumbered anchored" data-anchor-id="exercise-3">Exercise 3</h3>
 </script>
 <nav class="page-navigation">
   <div class="nav-page nav-page-previous">
-      <a href="./session1_notes.html" class="pagination-link" aria-label="Session 1: Introduction to R and RStudio">
-        <i class="bi bi-arrow-left-short"></i> <span class="nav-page-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Session 1: Introduction to R and RStudio</span></span>
+      <a href="./session1_notes.html" class="pagination-link" aria-label="Introduction to R and RStudio">
+        <i class="bi bi-arrow-left-short"></i> <span class="nav-page-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction to R and RStudio</span></span>
       </a>          
   </div>
   <div class="nav-page nav-page-next">
-      <a href="./session3_notes.html" class="pagination-link" aria-label="Session 3: Data preparation and manipulation">
-        <span class="nav-page-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Session 3: Data preparation and manipulation</span></span> <i class="bi bi-arrow-right-short"></i>
+      <a href="./session3_notes.html" class="pagination-link" aria-label="Data preparation and manipulation">
+        <span class="nav-page-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Data preparation and manipulation</span></span> <i class="bi bi-arrow-right-short"></i>
       </a>
   </div>
 </nav>
diff --git a/_book/session3_notes.html b/_book/session3_notes.html
index 14665c4..e20d3d4 100644
--- a/_book/session3_notes.html
+++ b/_book/session3_notes.html
@@ -7,7 +7,7 @@
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
 
-<title>Introduction to R with Tidyverse - 4&nbsp; Session 3: Data preparation and manipulation</title>
+<title>Introduction to R with Tidyverse - 3&nbsp; Data preparation and manipulation</title>
 <style>
 code{white-space: pre-wrap;}
 span.smallcaps{font-variant: small-caps;}
@@ -136,14 +136,17 @@
   <li class="nav-item compact">
     <a class="nav-link" href="https://x.com/SophieStats10"> <i class="bi bi-twitter" role="img" aria-label="Twitter">
 </i> 
+<span class="menu-text"></span></a>
+  </li>  
+  <li class="nav-item compact">
+    <a class="nav-link" href="https://buymeacoffee.com/sophie_a_lee"> <i class="bi bi-cup-hot" role="img" aria-label="Buy me a coffee">
+</i> 
 <span class="menu-text"></span></a>
   </li>  
 </ul>
           </div> <!-- /navcollapse -->
-          <div class="quarto-navbar-tools tools-wide">
-    <a href="https://github.com/sophie-a-lee/Introduction_R_Tidyverse_course" title="Source Code" class="quarto-navigation-tool px-1" aria-label="Source Code"><i class="bi bi-github"></i></a>
+          <div class="quarto-navbar-tools">
     <a href="./Introduction-to-R-with-Tidyverse.pdf" title="Download PDF" class="quarto-navigation-tool px-1" aria-label="Download PDF"><i class="bi bi-file-pdf"></i></a>
-    <a href="https://twitter.com/intent/tweet?url=|url|" title="Twitter" class="quarto-navigation-tool px-1" aria-label="Twitter"><i class="bi bi-twitter"></i></a>
 </div>
       </div> <!-- /container-fluid -->
     </nav>
@@ -152,7 +155,7 @@
       <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
         <i class="bi bi-layout-text-sidebar-reverse"></i>
       </button>
-        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./session3_notes.html"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Session 3: Data preparation and manipulation</span></a></li></ol></nav>
+        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./session3_notes.html"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Data preparation and manipulation</span></a></li></ol></nav>
         <a class="flex-grow-1" role="button" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">      
         </a>
     </div>
@@ -167,39 +170,62 @@
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./index.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Welcome!</span></span></a>
+ <span class="menu-text">Welcome!</span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session1_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Session 1: Introduction to R and RStudio</span></span></a>
+ <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction to R and RStudio</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session2_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Session 2: Introduction to tidyverse and data wrangling</span></span></a>
+ <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Introduction to tidyverse and data wrangling</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session3_notes.html" class="sidebar-item-text sidebar-link active">
- <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Session 3: Data preparation and manipulation</span></span></a>
+ <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Data preparation and manipulation</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session4_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Session 4: Data visualisation with ggplot2</span></span></a>
+ <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Data visualisation with ggplot2</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session5_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">6</span>&nbsp; <span class="chapter-title">Reproducible research with RMarkdown</span></span></a>
+ <span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Reproducible research with RMarkdown</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true">
+ <span class="menu-text">Appendices</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">  
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./data_description.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Data description</span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./exercise_solutions.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Exercise solutions</span></a>
   </div>
 </li>
+      </ul>
+  </li>
     </ul>
     </div>
 </nav>
@@ -210,12 +236,12 @@
     <h2 id="toc-title">Table of contents</h2>
    
   <ul>
-  <li><a href="#data-wrangling-and-summarising" id="toc-data-wrangling-and-summarising" class="nav-link active" data-scroll-target="#data-wrangling-and-summarising"><span class="header-section-number">4.1</span> Data wrangling and summarising</a>
+  <li><a href="#data-wrangling-and-summarising" id="toc-data-wrangling-and-summarising" class="nav-link active" data-scroll-target="#data-wrangling-and-summarising"><span class="header-section-number">3.1</span> Data wrangling and summarising</a>
   <ul class="collapse">
-  <li><a href="#combining-two-datasets" id="toc-combining-two-datasets" class="nav-link" data-scroll-target="#combining-two-datasets"><span class="header-section-number">4.1.1</span> Combining two datasets</a></li>
-  <li><a href="#joining-multiple-datasets" id="toc-joining-multiple-datasets" class="nav-link" data-scroll-target="#joining-multiple-datasets"><span class="header-section-number">4.1.2</span> Joining multiple datasets</a></li>
-  <li><a href="#transforming-data" id="toc-transforming-data" class="nav-link" data-scroll-target="#transforming-data"><span class="header-section-number">4.1.3</span> Transforming data</a></li>
-  <li><a href="#summary-tables" id="toc-summary-tables" class="nav-link" data-scroll-target="#summary-tables"><span class="header-section-number">4.1.4</span> Summary tables</a></li>
+  <li><a href="#combining-two-datasets" id="toc-combining-two-datasets" class="nav-link" data-scroll-target="#combining-two-datasets"><span class="header-section-number">3.1.1</span> Combining two datasets</a></li>
+  <li><a href="#joining-multiple-datasets" id="toc-joining-multiple-datasets" class="nav-link" data-scroll-target="#joining-multiple-datasets"><span class="header-section-number">3.1.2</span> Joining multiple datasets</a></li>
+  <li><a href="#transforming-data" id="toc-transforming-data" class="nav-link" data-scroll-target="#transforming-data"><span class="header-section-number">3.1.3</span> Transforming data</a></li>
+  <li><a href="#summary-tables" id="toc-summary-tables" class="nav-link" data-scroll-target="#summary-tables"><span class="header-section-number">3.1.4</span> Summary tables</a></li>
   <li><a href="#exercise-4" id="toc-exercise-4" class="nav-link" data-scroll-target="#exercise-4">Exercise 4</a></li>
   </ul></li>
   </ul>
@@ -226,7 +252,7 @@ <h2 id="toc-title">Table of contents</h2>
 
 <header id="title-block-header" class="quarto-title-block default">
 <div class="quarto-title">
-<h1 class="title"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Session 3: Data preparation and manipulation</span></h1>
+<h1 class="title"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Data preparation and manipulation</span></h1>
 </div>
 
 
@@ -243,10 +269,10 @@ <h1 class="title"><span class="chapter-number">4</span>&nbsp; <span class="chapt
 </header>
 
 
-<section id="data-wrangling-and-summarising" class="level2" data-number="4.1">
-<h2 data-number="4.1" class="anchored" data-anchor-id="data-wrangling-and-summarising"><span class="header-section-number">4.1</span> Data wrangling and summarising</h2>
-<section id="combining-two-datasets" class="level3" data-number="4.1.1">
-<h3 data-number="4.1.1" class="anchored" data-anchor-id="combining-two-datasets"><span class="header-section-number">4.1.1</span> Combining two datasets</h3>
+<section id="data-wrangling-and-summarising" class="level2" data-number="3.1">
+<h2 data-number="3.1" class="anchored" data-anchor-id="data-wrangling-and-summarising"><span class="header-section-number">3.1</span> Data wrangling and summarising</h2>
+<section id="combining-two-datasets" class="level3" data-number="3.1.1">
+<h3 data-number="3.1.1" class="anchored" data-anchor-id="combining-two-datasets"><span class="header-section-number">3.1.1</span> Combining two datasets</h3>
 <p>We may need to combine data from different files within R to perform an analysis. For example, in our case we have the core spending power for each year between 2015 and 2020. If our analysis required comparing this spending over the time period, we would need to combine these files together.</p>
 <p>Before the data can be combined, it must be loaded into R. We will begin combining data from 2015 and 2016, then extend this to the entire period.</p>
 <div class="cell">
@@ -273,8 +299,8 @@ <h3 data-number="4.1.1" class="anchored" data-anchor-id="combining-two-datasets"
 <span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a>                        <span class="at">by =</span> <span class="fu">c</span>(<span class="st">"ons_code"</span>, <span class="st">"authority"</span>, <span class="st">"region"</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 </section>
-<section id="joining-multiple-datasets" class="level3" data-number="4.1.2">
-<h3 data-number="4.1.2" class="anchored" data-anchor-id="joining-multiple-datasets"><span class="header-section-number">4.1.2</span> Joining multiple datasets</h3>
+<section id="joining-multiple-datasets" class="level3" data-number="3.1.2">
+<h3 data-number="3.1.2" class="anchored" data-anchor-id="joining-multiple-datasets"><span class="header-section-number">3.1.2</span> Joining multiple datasets</h3>
 <p>R’s joining functions can only be applied to two datasets at a time. To combine all 6 core spending power datasets from 2015 to 2020 in this way would require a lot of repetitive coding (which we want to avoid where necessary).</p>
 <p>An alternative approach would be to automate this process by using <strong>functional programming</strong>, implemented using tidyverse’s <code>purrr</code> package.</p>
 <p>The first step of this process requires loading all csv files into R by repeatedly applying <code>read_csv</code>. This requires a list of file names from the working directory. The function <code>list.files</code> introduced earlier contains an optional argument, <code>pattern</code> which can be used to return files and folders that match a naming pattern. In this case, all csv files begin “CSP_20”, so to return this list of names from the <em>data</em> folder, we use the function:</p>
@@ -302,8 +328,8 @@ <h3 data-number="4.1.2" class="anchored" data-anchor-id="joining-multiple-datase
 <span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a>  <span class="fu">reduce</span>(full_join, <span class="at">by =</span> <span class="fu">c</span>(<span class="st">"ons_code"</span>, <span class="st">"authority"</span>, <span class="st">"region"</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 </section>
-<section id="transforming-data" class="level3" data-number="4.1.3">
-<h3 data-number="4.1.3" class="anchored" data-anchor-id="transforming-data"><span class="header-section-number">4.1.3</span> Transforming data</h3>
+<section id="transforming-data" class="level3" data-number="3.1.3">
+<h3 data-number="3.1.3" class="anchored" data-anchor-id="transforming-data"><span class="header-section-number">3.1.3</span> Transforming data</h3>
 <p>The dataset containing core spending power in England between 2015 and 2020 is currently in what is known as <strong>wide format</strong>. This means there is a variable per measure per year, making the object very wide.</p>
 <p>Some analyses and visualisations, particularly those used for temporal data, require a time variable in the dataset (for example, year). This requires the data to be in a different format, known as <strong>long format</strong>. Long format is where each row contains an observation per year (making the data much longer and narrower).</p>
 <p>To convert data between wide and long formats, we can use the tidyverse functions <code>pivot_longer</code> and <code>pivot_wider</code>.</p>
@@ -357,8 +383,8 @@ <h3 data-number="4.1.3" class="anchored" data-anchor-id="transforming-data"><spa
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="fu">write_csv</span>(csp_long2, <span class="at">file =</span> <span class="st">"data/CSP_long_201520.csv"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 </section>
-<section id="summary-tables" class="level3" data-number="4.1.4">
-<h3 data-number="4.1.4" class="anchored" data-anchor-id="summary-tables"><span class="header-section-number">4.1.4</span> Summary tables</h3>
+<section id="summary-tables" class="level3" data-number="3.1.4">
+<h3 data-number="3.1.4" class="anchored" data-anchor-id="summary-tables"><span class="header-section-number">3.1.4</span> Summary tables</h3>
 <p>Summary tables can be created using the <code>summarise</code> function. This returns tables in a tibble format, meaning they can easily be customised and exported as CSV files (using the <code>write_csv</code> function).</p>
 <p>The <code>summarise</code> function is set up similarly to the <code>mutate</code> function: summaries are listed and given variable names, separated by a comma. The difference between these functions is that <code>summarise</code> collapses the tibble into a single summary row, and the new variables must be created using a summary function.</p>
 <p>Common examples of summary functions include:</p>
@@ -869,13 +895,13 @@ <h3 class="unnumbered anchored" data-anchor-id="exercise-4">Exercise 4</h3>
 </script>
 <nav class="page-navigation">
   <div class="nav-page nav-page-previous">
-      <a href="./session2_notes.html" class="pagination-link" aria-label="Session 2: Introduction to tidyverse and data wrangling">
-        <i class="bi bi-arrow-left-short"></i> <span class="nav-page-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Session 2: Introduction to tidyverse and data wrangling</span></span>
+      <a href="./session2_notes.html" class="pagination-link" aria-label="Introduction to tidyverse and data wrangling">
+        <i class="bi bi-arrow-left-short"></i> <span class="nav-page-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Introduction to tidyverse and data wrangling</span></span>
       </a>          
   </div>
   <div class="nav-page nav-page-next">
-      <a href="./session4_notes.html" class="pagination-link" aria-label="Session 4: Data visualisation with ggplot2">
-        <span class="nav-page-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Session 4: Data visualisation with ggplot2</span></span> <i class="bi bi-arrow-right-short"></i>
+      <a href="./session4_notes.html" class="pagination-link" aria-label="Data visualisation with ggplot2">
+        <span class="nav-page-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Data visualisation with ggplot2</span></span> <i class="bi bi-arrow-right-short"></i>
       </a>
   </div>
 </nav>
diff --git a/_book/session4_notes.html b/_book/session4_notes.html
index eada834..406b735 100644
--- a/_book/session4_notes.html
+++ b/_book/session4_notes.html
@@ -7,7 +7,7 @@
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
 
-<title>Introduction to R with Tidyverse - 5&nbsp; Session 4: Data visualisation with ggplot2</title>
+<title>Introduction to R with Tidyverse - 4&nbsp; Data visualisation with ggplot2</title>
 <style>
 code{white-space: pre-wrap;}
 span.smallcaps{font-variant: small-caps;}
@@ -165,14 +165,17 @@
   <li class="nav-item compact">
     <a class="nav-link" href="https://x.com/SophieStats10"> <i class="bi bi-twitter" role="img" aria-label="Twitter">
 </i> 
+<span class="menu-text"></span></a>
+  </li>  
+  <li class="nav-item compact">
+    <a class="nav-link" href="https://buymeacoffee.com/sophie_a_lee"> <i class="bi bi-cup-hot" role="img" aria-label="Buy me a coffee">
+</i> 
 <span class="menu-text"></span></a>
   </li>  
 </ul>
           </div> <!-- /navcollapse -->
-          <div class="quarto-navbar-tools tools-wide">
-    <a href="https://github.com/sophie-a-lee/Introduction_R_Tidyverse_course" title="Source Code" class="quarto-navigation-tool px-1" aria-label="Source Code"><i class="bi bi-github"></i></a>
+          <div class="quarto-navbar-tools">
     <a href="./Introduction-to-R-with-Tidyverse.pdf" title="Download PDF" class="quarto-navigation-tool px-1" aria-label="Download PDF"><i class="bi bi-file-pdf"></i></a>
-    <a href="https://twitter.com/intent/tweet?url=|url|" title="Twitter" class="quarto-navigation-tool px-1" aria-label="Twitter"><i class="bi bi-twitter"></i></a>
 </div>
       </div> <!-- /container-fluid -->
     </nav>
@@ -181,7 +184,7 @@
       <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
         <i class="bi bi-layout-text-sidebar-reverse"></i>
       </button>
-        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./session4_notes.html"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Session 4: Data visualisation with ggplot2</span></a></li></ol></nav>
+        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./session4_notes.html"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Data visualisation with ggplot2</span></a></li></ol></nav>
         <a class="flex-grow-1" role="button" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">      
         </a>
     </div>
@@ -196,39 +199,62 @@
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./index.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Welcome!</span></span></a>
+ <span class="menu-text">Welcome!</span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session1_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Session 1: Introduction to R and RStudio</span></span></a>
+ <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction to R and RStudio</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session2_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Session 2: Introduction to tidyverse and data wrangling</span></span></a>
+ <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Introduction to tidyverse and data wrangling</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session3_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Session 3: Data preparation and manipulation</span></span></a>
+ <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Data preparation and manipulation</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session4_notes.html" class="sidebar-item-text sidebar-link active">
- <span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Session 4: Data visualisation with ggplot2</span></span></a>
+ <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Data visualisation with ggplot2</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session5_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">6</span>&nbsp; <span class="chapter-title">Reproducible research with RMarkdown</span></span></a>
+ <span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Reproducible research with RMarkdown</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true">
+ <span class="menu-text">Appendices</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">  
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./data_description.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Data description</span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./exercise_solutions.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Exercise solutions</span></a>
   </div>
 </li>
+      </ul>
+  </li>
     </ul>
     </div>
 </nav>
@@ -239,17 +265,17 @@
     <h2 id="toc-title">Table of contents</h2>
    
   <ul>
-  <li><a href="#data-visualisation-with-ggplot2" id="toc-data-visualisation-with-ggplot2" class="nav-link active" data-scroll-target="#data-visualisation-with-ggplot2"><span class="header-section-number">5.1</span> Data visualisation with ggplot2</a>
+  <li><a href="#data-visualisation-with-ggplot2" id="toc-data-visualisation-with-ggplot2" class="nav-link active" data-scroll-target="#data-visualisation-with-ggplot2"><span class="header-section-number">4.1</span> Data visualisation with ggplot2</a>
   <ul class="collapse">
-  <li><a href="#choosing-the-most-appropriate-visualisation" id="toc-choosing-the-most-appropriate-visualisation" class="nav-link" data-scroll-target="#choosing-the-most-appropriate-visualisation"><span class="header-section-number">5.1.1</span> Choosing the most appropriate visualisation</a></li>
-  <li><a href="#the-ggplot2-package" id="toc-the-ggplot2-package" class="nav-link" data-scroll-target="#the-ggplot2-package"><span class="header-section-number">5.1.2</span> The ggplot2 package</a></li>
+  <li><a href="#choosing-the-most-appropriate-visualisation" id="toc-choosing-the-most-appropriate-visualisation" class="nav-link" data-scroll-target="#choosing-the-most-appropriate-visualisation"><span class="header-section-number">4.1.1</span> Choosing the most appropriate visualisation</a></li>
+  <li><a href="#the-ggplot2-package" id="toc-the-ggplot2-package" class="nav-link" data-scroll-target="#the-ggplot2-package"><span class="header-section-number">4.1.2</span> The ggplot2 package</a></li>
   <li><a href="#exercise-5" id="toc-exercise-5" class="nav-link" data-scroll-target="#exercise-5">Exercise 5</a></li>
-  <li><a href="#customising-visualisations" id="toc-customising-visualisations" class="nav-link" data-scroll-target="#customising-visualisations"><span class="header-section-number">5.1.3</span> Customising visualisations</a></li>
+  <li><a href="#customising-visualisations" id="toc-customising-visualisations" class="nav-link" data-scroll-target="#customising-visualisations"><span class="header-section-number">4.1.3</span> Customising visualisations</a></li>
   <li><a href="#exercise-6" id="toc-exercise-6" class="nav-link" data-scroll-target="#exercise-6">Exercise 6</a></li>
-  <li><a href="#scale-functions" id="toc-scale-functions" class="nav-link" data-scroll-target="#scale-functions"><span class="header-section-number">5.1.4</span> Scale functions</a></li>
-  <li><a href="#other-labelling-functions" id="toc-other-labelling-functions" class="nav-link" data-scroll-target="#other-labelling-functions"><span class="header-section-number">5.1.5</span> Other labelling functions</a></li>
-  <li><a href="#theme-functions" id="toc-theme-functions" class="nav-link" data-scroll-target="#theme-functions"><span class="header-section-number">5.1.6</span> Theme functions</a></li>
-  <li><a href="#facet-functions" id="toc-facet-functions" class="nav-link" data-scroll-target="#facet-functions"><span class="header-section-number">5.1.7</span> Facet functions</a></li>
+  <li><a href="#scale-functions" id="toc-scale-functions" class="nav-link" data-scroll-target="#scale-functions"><span class="header-section-number">4.1.4</span> Scale functions</a></li>
+  <li><a href="#other-labelling-functions" id="toc-other-labelling-functions" class="nav-link" data-scroll-target="#other-labelling-functions"><span class="header-section-number">4.1.5</span> Other labelling functions</a></li>
+  <li><a href="#theme-functions" id="toc-theme-functions" class="nav-link" data-scroll-target="#theme-functions"><span class="header-section-number">4.1.6</span> Theme functions</a></li>
+  <li><a href="#facet-functions" id="toc-facet-functions" class="nav-link" data-scroll-target="#facet-functions"><span class="header-section-number">4.1.7</span> Facet functions</a></li>
   <li><a href="#exercise-7" id="toc-exercise-7" class="nav-link" data-scroll-target="#exercise-7">Exercise 7</a></li>
   </ul></li>
   </ul>
@@ -260,7 +286,7 @@ <h2 id="toc-title">Table of contents</h2>
 
 <header id="title-block-header" class="quarto-title-block default">
 <div class="quarto-title">
-<h1 class="title"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Session 4: Data visualisation with ggplot2</span></h1>
+<h1 class="title"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Data visualisation with ggplot2</span></h1>
 </div>
 
 
@@ -277,11 +303,11 @@ <h1 class="title"><span class="chapter-number">5</span>&nbsp; <span class="chapt
 </header>
 
 
-<section id="data-visualisation-with-ggplot2" class="level2" data-number="5.1">
-<h2 data-number="5.1" class="anchored" data-anchor-id="data-visualisation-with-ggplot2"><span class="header-section-number">5.1</span> Data visualisation with ggplot2</h2>
+<section id="data-visualisation-with-ggplot2" class="level2" data-number="4.1">
+<h2 data-number="4.1" class="anchored" data-anchor-id="data-visualisation-with-ggplot2"><span class="header-section-number">4.1</span> Data visualisation with ggplot2</h2>
 <p>Data visualisation is a powerful tool with multiple important uses. First, visualisations allow us to explore the data, identify potential outliers and errors, or check that the variables behave in the way we would expect them to if they had been recorded correctly. Visualisations can also be used as an analysis tool, allowing us to identify trends in the data or differences between groups. Finally, visualisations can help to convey messages to an audience in a clear, concise way that is often more powerful than presenting them using numbers or text. In some cases, data visualisations can show results so clearly that further analysis is arguably unnecessary.</p>
-<section id="choosing-the-most-appropriate-visualisation" class="level3" data-number="5.1.1">
-<h3 data-number="5.1.1" class="anchored" data-anchor-id="choosing-the-most-appropriate-visualisation"><span class="header-section-number">5.1.1</span> Choosing the most appropriate visualisation</h3>
+<section id="choosing-the-most-appropriate-visualisation" class="level3" data-number="4.1.1">
+<h3 data-number="4.1.1" class="anchored" data-anchor-id="choosing-the-most-appropriate-visualisation"><span class="header-section-number">4.1.1</span> Choosing the most appropriate visualisation</h3>
 <p>The most appropriate choice of visualisation will depend on the type of variable(s) we wish to display, the number of variables and the message we are trying to disseminate. Common plots used to display combinations of different types of data are given in following table:</p>
 <div class="quarto-figure quarto-figure-center">
 <figure class="figure">
@@ -291,8 +317,8 @@ <h3 data-number="5.1.1" class="anchored" data-anchor-id="choosing-the-most-appro
 </div>
 <p>R is very flexible when it comes to visualising data and contains a wide variety of options to customise graphs. This section will focus on the Tidyverse package <code>ggplot2</code> and introduce some of the more commonly used graphical functions and parameters but is by no means comprehensive.</p>
 </section>
-<section id="the-ggplot2-package" class="level3" data-number="5.1.2">
-<h3 data-number="5.1.2" class="anchored" data-anchor-id="the-ggplot2-package"><span class="header-section-number">5.1.2</span> The ggplot2 package</h3>
+<section id="the-ggplot2-package" class="level3" data-number="4.1.2">
+<h3 data-number="4.1.2" class="anchored" data-anchor-id="the-ggplot2-package"><span class="header-section-number">4.1.2</span> The ggplot2 package</h3>
 <p>The <code>ggplot2</code> package implements the ‘grammar of graphics’, a system that aims to describe all statistical graphics in terms of their components or layers. All graphics can be broken down into the same components: the data, a coordinate system (or plot area) and some visual markings of the data. More complex plots may have additional layers but all must contain these three.</p>
 <p>For example, in the <code>csp_2020</code> dataset, we may wish to explore the relationship between the settlement funding assessment (<code>sfa_2020</code>) and council tax total (<code>ct_total_2020</code>) spending for each local authority. To visualise the relationship between two continuous numeric variables, a <strong>scatterplot</strong> would be most appropriate.</p>
 <p>Within the <code>ggplot2</code> package, we first use the <code>ggplot</code> function to create a coordinate system (a blank plot space) that we can add layers and objects to. Within this function, we specify the data that we wish to display on the coordinate system:</p>
@@ -384,8 +410,8 @@ <h3 class="unnumbered anchored" data-anchor-id="exercise-5">Exercise 5</h3>
 <li>Based on the visualisation above, create a summary table for the SFA variable containing the minimum and maximum, and appropriate measures of the centre/average and spread.</li>
 </ol>
 </section>
-<section id="customising-visualisations" class="level3" data-number="5.1.3">
-<h3 data-number="5.1.3" class="anchored" data-anchor-id="customising-visualisations"><span class="header-section-number">5.1.3</span> Customising visualisations</h3>
+<section id="customising-visualisations" class="level3" data-number="4.1.3">
+<h3 data-number="4.1.3" class="anchored" data-anchor-id="customising-visualisations"><span class="header-section-number">4.1.3</span> Customising visualisations</h3>
 <p>Additional variables can be included into a visualisation within the mapping argument of a <code>geom</code> function. For example, we could explore the relationship between SFA and council tax across regions by colouring points based on the region:</p>
 <div class="cell">
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(<span class="at">data =</span> csp_nolon_2020) <span class="sc">+</span> </span>
@@ -468,10 +494,10 @@ <h3 class="unnumbered anchored" data-anchor-id="exercise-6">Exercise 6</h3>
 <li><p>Add a line of best fit for each region (hint: make each line a different colour).</p></li>
 </ol>
 </section>
-<section id="scale-functions" class="level3" data-number="5.1.4">
-<h3 data-number="5.1.4" class="anchored" data-anchor-id="scale-functions"><span class="header-section-number">5.1.4</span> Scale functions</h3>
-<section id="customising-axes" class="level4" data-number="5.1.4.1">
-<h4 data-number="5.1.4.1" class="anchored" data-anchor-id="customising-axes"><span class="header-section-number">5.1.4.1</span> Customising axes</h4>
+<section id="scale-functions" class="level3" data-number="4.1.4">
+<h3 data-number="4.1.4" class="anchored" data-anchor-id="scale-functions"><span class="header-section-number">4.1.4</span> Scale functions</h3>
+<section id="customising-axes" class="level4" data-number="4.1.4.1">
+<h4 data-number="4.1.4.1" class="anchored" data-anchor-id="customising-axes"><span class="header-section-number">4.1.4.1</span> Customising axes</h4>
 <p>Scale functions allow us to customise aesthetics defined in geom objects such as colours and axes labels. They take the form <code>scale_'aesthetic to customise'_'scale of variable’</code>. For example, <code>scale_x_continuous</code> customises the x axis when the variable is continuous, and <code>scale_x_discrete</code> can be used where the variable is discrete or categorical. Arguments to customise the x or y axes include:</p>
 <ul>
 <li><code>name =</code> to change the axis title</li>
@@ -521,8 +547,8 @@ <h4 data-number="5.1.4.1" class="anchored" data-anchor-id="customising-axes"><sp
 </div>
 <p>We can now clearly see the strong positive association between SFA and council tax spending in local authorities with lower values of this without losing any information.</p>
 </section>
-<section id="customising-colour-scales" class="level4" data-number="5.1.4.2">
-<h4 data-number="5.1.4.2" class="anchored" data-anchor-id="customising-colour-scales"><span class="header-section-number">5.1.4.2</span> 6.3.2 Customising colour scales</h4>
+<section id="customising-colour-scales" class="level4" data-number="4.1.4.2">
+<h4 data-number="4.1.4.2" class="anchored" data-anchor-id="customising-colour-scales"><span class="header-section-number">4.1.4.2</span> 6.3.2 Customising colour scales</h4>
 <p>There are a wide range of options for customising the colour aesthetics of geoms. These include pre-defined colour palettes, such as <code>scale_colour_viridis_c</code> for continuous variables, or <code>scale_colour_viridis_d</code> for discrete or categorical variables. Viridis colour palettes are designed to be colourblind friendly and print well in grey scale. There are also many R packages containing colour palettes for different scenarios.</p>
 <p>Colour palettes can be created manually for categorical variables using the <code>scale_colour_manual</code> function. Here, the argument <code>values</code> allows us to specify a colour per category.</p>
 <div class="callout callout-style-default callout-tip callout-titled">
@@ -560,8 +586,8 @@ <h4 data-number="5.1.4.2" class="anchored" data-anchor-id="customising-colour-sc
 <p>Palettes can also be created using gradients with the <code>scale_colour_gradient</code> function, that specifies a two colour gradient from low to high, <code>scale_colour_gradient2</code> that creates a diverging gradient using low, medium, and high colours, and <code>scale_colour_gradientn</code> that creates an n-colour gradient.</p>
 </section>
 </section>
-<section id="other-labelling-functions" class="level3" data-number="5.1.5">
-<h3 data-number="5.1.5" class="anchored" data-anchor-id="other-labelling-functions"><span class="header-section-number">5.1.5</span> Other labelling functions</h3>
+<section id="other-labelling-functions" class="level3" data-number="4.1.5">
+<h3 data-number="4.1.5" class="anchored" data-anchor-id="other-labelling-functions"><span class="header-section-number">4.1.5</span> Other labelling functions</h3>
 <p>Although axis and legend labels can be updated within scale functions, the <code>labs</code> function exist as an alternative. This function also allows us to add titles and subtitles to visualisations:</p>
 <div class="cell">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="fu">labs</span>(<span class="at">x =</span> “x<span class="sc">-</span>axis name”, <span class="at">y =</span> “y<span class="sc">-</span>axis name”,</span>
@@ -578,8 +604,8 @@ <h3 data-number="5.1.5" class="anchored" data-anchor-id="other-labelling-functio
 </div>
 <p>adds a rectangle to the graph.</p>
 </section>
-<section id="theme-functions" class="level3" data-number="5.1.6">
-<h3 data-number="5.1.6" class="anchored" data-anchor-id="theme-functions"><span class="header-section-number">5.1.6</span> Theme functions</h3>
+<section id="theme-functions" class="level3" data-number="4.1.6">
+<h3 data-number="4.1.6" class="anchored" data-anchor-id="theme-functions"><span class="header-section-number">4.1.6</span> Theme functions</h3>
 <p>The <code>theme</code> function modifies non-data components of the visualisation. For example, the legend position, label fonts, the graph background, and gridlines. There are many options that exist within the <code>theme</code> function (use <code>?theme</code> to list them all).</p>
 <div class="callout callout-style-default callout-note callout-titled">
 <div class="callout-header d-flex align-content-center">
@@ -645,8 +671,8 @@ <h3 data-number="5.1.6" class="anchored" data-anchor-id="theme-functions"><span
 </div>
 <p>Creating a custom theme is useful to ensure all visualisations are formatted consistently.</p>
 </section>
-<section id="facet-functions" class="level3" data-number="5.1.7">
-<h3 data-number="5.1.7" class="anchored" data-anchor-id="facet-functions"><span class="header-section-number">5.1.7</span> Facet functions</h3>
+<section id="facet-functions" class="level3" data-number="4.1.7">
+<h3 data-number="4.1.7" class="anchored" data-anchor-id="facet-functions"><span class="header-section-number">4.1.7</span> Facet functions</h3>
 <p>Faceting allows us to divide a plot into subplots based on some grouping variable within the data. This allows us to show multiple variables in the same visualisation without risking overloading the plot and losing the intended message.</p>
 <p>For example, if we wish to show the relationship between SFA, council tax total and regions over the entire time period, we may wish to create a scatterplot per year. Faceting allows us to do this in one piece of code rather than repeating it per year. Faceting will also ensure that plots are on the same scale and therefore easier to compare. The function <code>facet_wrap</code> creates these facetted plots:</p>
 <div class="cell">
@@ -1095,13 +1121,13 @@ <h3 class="unnumbered anchored" data-anchor-id="exercise-7">Exercise 7</h3>
 </script>
 <nav class="page-navigation">
   <div class="nav-page nav-page-previous">
-      <a href="./session3_notes.html" class="pagination-link" aria-label="Session 3: Data preparation and manipulation">
-        <i class="bi bi-arrow-left-short"></i> <span class="nav-page-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Session 3: Data preparation and manipulation</span></span>
+      <a href="./session3_notes.html" class="pagination-link" aria-label="Data preparation and manipulation">
+        <i class="bi bi-arrow-left-short"></i> <span class="nav-page-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Data preparation and manipulation</span></span>
       </a>          
   </div>
   <div class="nav-page nav-page-next">
       <a href="./session5_notes.html" class="pagination-link" aria-label="Reproducible research with RMarkdown">
-        <span class="nav-page-text"><span class="chapter-number">6</span>&nbsp; <span class="chapter-title">Reproducible research with RMarkdown</span></span> <i class="bi bi-arrow-right-short"></i>
+        <span class="nav-page-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Reproducible research with RMarkdown</span></span> <i class="bi bi-arrow-right-short"></i>
       </a>
   </div>
 </nav>
diff --git a/_book/session5_notes.html b/_book/session5_notes.html
index c9ffc35..19d44b7 100644
--- a/_book/session5_notes.html
+++ b/_book/session5_notes.html
@@ -7,7 +7,7 @@
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
 
-<title>Introduction to R with Tidyverse - 6&nbsp; Reproducible research with RMarkdown</title>
+<title>Introduction to R with Tidyverse - 5&nbsp; Reproducible research with RMarkdown</title>
 <style>
 code{white-space: pre-wrap;}
 span.smallcaps{font-variant: small-caps;}
@@ -64,6 +64,7 @@
 <script src="site_libs/quarto-search/fuse.min.js"></script>
 <script src="site_libs/quarto-search/quarto-search.js"></script>
 <meta name="quarto:offset" content="./">
+<link href="./data_description.html" rel="next">
 <link href="./session4_notes.html" rel="prev">
 <script src="site_libs/quarto-html/quarto.js"></script>
 <script src="site_libs/quarto-html/popper.min.js"></script>
@@ -135,14 +136,17 @@
   <li class="nav-item compact">
     <a class="nav-link" href="https://x.com/SophieStats10"> <i class="bi bi-twitter" role="img" aria-label="Twitter">
 </i> 
+<span class="menu-text"></span></a>
+  </li>  
+  <li class="nav-item compact">
+    <a class="nav-link" href="https://buymeacoffee.com/sophie_a_lee"> <i class="bi bi-cup-hot" role="img" aria-label="Buy me a coffee">
+</i> 
 <span class="menu-text"></span></a>
   </li>  
 </ul>
           </div> <!-- /navcollapse -->
-          <div class="quarto-navbar-tools tools-wide">
-    <a href="https://github.com/sophie-a-lee/Introduction_R_Tidyverse_course" title="Source Code" class="quarto-navigation-tool px-1" aria-label="Source Code"><i class="bi bi-github"></i></a>
+          <div class="quarto-navbar-tools">
     <a href="./Introduction-to-R-with-Tidyverse.pdf" title="Download PDF" class="quarto-navigation-tool px-1" aria-label="Download PDF"><i class="bi bi-file-pdf"></i></a>
-    <a href="https://twitter.com/intent/tweet?url=|url|" title="Twitter" class="quarto-navigation-tool px-1" aria-label="Twitter"><i class="bi bi-twitter"></i></a>
 </div>
       </div> <!-- /container-fluid -->
     </nav>
@@ -151,7 +155,7 @@
       <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
         <i class="bi bi-layout-text-sidebar-reverse"></i>
       </button>
-        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./session5_notes.html"><span class="chapter-number">6</span>&nbsp; <span class="chapter-title">Reproducible research with RMarkdown</span></a></li></ol></nav>
+        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./session5_notes.html"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Reproducible research with RMarkdown</span></a></li></ol></nav>
         <a class="flex-grow-1" role="button" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">      
         </a>
     </div>
@@ -166,39 +170,62 @@
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./index.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Welcome!</span></span></a>
+ <span class="menu-text">Welcome!</span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session1_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Session 1: Introduction to R and RStudio</span></span></a>
+ <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction to R and RStudio</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session2_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Session 2: Introduction to tidyverse and data wrangling</span></span></a>
+ <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Introduction to tidyverse and data wrangling</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session3_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Session 3: Data preparation and manipulation</span></span></a>
+ <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Data preparation and manipulation</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session4_notes.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Session 4: Data visualisation with ggplot2</span></span></a>
+ <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Data visualisation with ggplot2</span></span></a>
   </div>
 </li>
         <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./session5_notes.html" class="sidebar-item-text sidebar-link active">
- <span class="menu-text"><span class="chapter-number">6</span>&nbsp; <span class="chapter-title">Reproducible research with RMarkdown</span></span></a>
+ <span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Reproducible research with RMarkdown</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item sidebar-item-section">
+      <div class="sidebar-item-container"> 
+            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true">
+ <span class="menu-text">Appendices</span></a>
+          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true" aria-label="Toggle section">
+            <i class="bi bi-chevron-right ms-2"></i>
+          </a> 
+      </div>
+      <ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">  
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./data_description.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Data description</span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./exercise_solutions.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Exercise solutions</span></a>
   </div>
 </li>
+      </ul>
+  </li>
     </ul>
     </div>
 </nav>
@@ -209,12 +236,12 @@
     <h2 id="toc-title">Table of contents</h2>
    
   <ul>
-  <li><a href="#introduction-to-rmarkdown" id="toc-introduction-to-rmarkdown" class="nav-link active" data-scroll-target="#introduction-to-rmarkdown"><span class="header-section-number">6.1</span> Introduction to RMarkdown</a>
+  <li><a href="#introduction-to-rmarkdown" id="toc-introduction-to-rmarkdown" class="nav-link active" data-scroll-target="#introduction-to-rmarkdown"><span class="header-section-number">5.1</span> Introduction to RMarkdown</a>
   <ul class="collapse">
-  <li><a href="#creating-an-rmarkdown-files" id="toc-creating-an-rmarkdown-files" class="nav-link" data-scroll-target="#creating-an-rmarkdown-files"><span class="header-section-number">6.1.1</span> Creating an RMarkdown files</a></li>
-  <li><a href="#rmarkdown-content" id="toc-rmarkdown-content" class="nav-link" data-scroll-target="#rmarkdown-content"><span class="header-section-number">6.1.2</span> Rmarkdown content</a></li>
-  <li><a href="#compiling-rmarkdown-documents" id="toc-compiling-rmarkdown-documents" class="nav-link" data-scroll-target="#compiling-rmarkdown-documents"><span class="header-section-number">6.1.3</span> Compiling RMarkdown documents</a></li>
-  <li><a href="#data-visualisation-in-rmarkdown" id="toc-data-visualisation-in-rmarkdown" class="nav-link" data-scroll-target="#data-visualisation-in-rmarkdown"><span class="header-section-number">6.1.4</span> Data visualisation in RMarkdown</a></li>
+  <li><a href="#creating-an-rmarkdown-files" id="toc-creating-an-rmarkdown-files" class="nav-link" data-scroll-target="#creating-an-rmarkdown-files"><span class="header-section-number">5.1.1</span> Creating an RMarkdown files</a></li>
+  <li><a href="#rmarkdown-content" id="toc-rmarkdown-content" class="nav-link" data-scroll-target="#rmarkdown-content"><span class="header-section-number">5.1.2</span> Rmarkdown content</a></li>
+  <li><a href="#compiling-rmarkdown-documents" id="toc-compiling-rmarkdown-documents" class="nav-link" data-scroll-target="#compiling-rmarkdown-documents"><span class="header-section-number">5.1.3</span> Compiling RMarkdown documents</a></li>
+  <li><a href="#data-visualisation-in-rmarkdown" id="toc-data-visualisation-in-rmarkdown" class="nav-link" data-scroll-target="#data-visualisation-in-rmarkdown"><span class="header-section-number">5.1.4</span> Data visualisation in RMarkdown</a></li>
   <li><a href="#exercise-8" id="toc-exercise-8" class="nav-link" data-scroll-target="#exercise-8">Exercise 8</a></li>
   </ul></li>
   </ul>
@@ -225,7 +252,7 @@ <h2 id="toc-title">Table of contents</h2>
 
 <header id="title-block-header" class="quarto-title-block default">
 <div class="quarto-title">
-<h1 class="title"><span class="chapter-number">6</span>&nbsp; <span class="chapter-title">Reproducible research with RMarkdown</span></h1>
+<h1 class="title"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Reproducible research with RMarkdown</span></h1>
 </div>
 
 
@@ -242,8 +269,8 @@ <h1 class="title"><span class="chapter-number">6</span>&nbsp; <span class="chapt
 </header>
 
 
-<section id="introduction-to-rmarkdown" class="level2" data-number="6.1">
-<h2 data-number="6.1" class="anchored" data-anchor-id="introduction-to-rmarkdown"><span class="header-section-number">6.1</span> Introduction to RMarkdown</h2>
+<section id="introduction-to-rmarkdown" class="level2" data-number="5.1">
+<h2 data-number="5.1" class="anchored" data-anchor-id="introduction-to-rmarkdown"><span class="header-section-number">5.1</span> Introduction to RMarkdown</h2>
 <p>RMarkdown is a tool that is used to author high-quality documents, making it easy to communicate results efficiently. One of the main appeals of RMarkdown is that it is easy to integrate R code and output seamlessly into a document, encouraging openness and reproducibility in research.</p>
 <p>There are a number of ways we can use RMarkdown to enhance the research process, such as:</p>
 <ul>
@@ -257,8 +284,8 @@ <h2 data-number="6.1" class="anchored" data-anchor-id="introduction-to-rmarkdown
 <span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(rmarkdown)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<section id="creating-an-rmarkdown-files" class="level3" data-number="6.1.1">
-<h3 data-number="6.1.1" class="anchored" data-anchor-id="creating-an-rmarkdown-files"><span class="header-section-number">6.1.1</span> Creating an RMarkdown files</h3>
+<section id="creating-an-rmarkdown-files" class="level3" data-number="5.1.1">
+<h3 data-number="5.1.1" class="anchored" data-anchor-id="creating-an-rmarkdown-files"><span class="header-section-number">5.1.1</span> Creating an RMarkdown files</h3>
 <p>RMarkdown files (<code>.Rmd</code>) are created and saved separately to the script files we have been using up to now on the course. To create a new RMarkdown file, either use the drop-down menu, following the <em>File -&gt; New File -&gt; R Markdown…</em> options, or using the <img src="images/new_file_shortcut.png" class="img-fluid" alt="new file icon"> icon and selecting <em>R Markdown…</em>.</p>
 <div class="quarto-figure quarto-figure-center">
 <figure class="figure">
@@ -269,16 +296,16 @@ <h3 data-number="6.1.1" class="anchored" data-anchor-id="creating-an-rmarkdown-f
 <p>When creating a new RMarkdown file, we are given the option to set the title, author and date of the new document. We are also given options to select the type of document, presentation, or Shiny app we would like to create. This does not give a comprehensive list of documents available within RMarkdown and can be changed later. We will discuss output document types in more detail later in the session.</p>
 <p>Clicking ‘OK’ on this window will produce an RMarkdown file (<code>.Rmd</code>) with some example code. If we do not want this, there is an option to ‘Create Empty Document’ on the bottom left of the window.</p>
 </section>
-<section id="rmarkdown-content" class="level3" data-number="6.1.2">
-<h3 data-number="6.1.2" class="anchored" data-anchor-id="rmarkdown-content"><span class="header-section-number">6.1.2</span> Rmarkdown content</h3>
+<section id="rmarkdown-content" class="level3" data-number="5.1.2">
+<h3 data-number="5.1.2" class="anchored" data-anchor-id="rmarkdown-content"><span class="header-section-number">5.1.2</span> Rmarkdown content</h3>
 <p>RMarkdown files contain three main types of content:</p>
 <ul>
 <li>A YAML header (this sets the global options for the document)</li>
 <li>Text, or syntax (this includes headings and comments)</li>
 <li>Code chunks containing R code</li>
 </ul>
-<section id="the-yaml-header" class="level4" data-number="6.1.2.1">
-<h4 data-number="6.1.2.1" class="anchored" data-anchor-id="the-yaml-header"><span class="header-section-number">6.1.2.1</span> The YAML header</h4>
+<section id="the-yaml-header" class="level4" data-number="5.1.2.1">
+<h4 data-number="5.1.2.1" class="anchored" data-anchor-id="the-yaml-header"><span class="header-section-number">5.1.2.1</span> The YAML header</h4>
 <p>The first part of an RMarkdown script, surrounded by ‘<code>---</code>’ is known as the <strong>YAML header</strong>. This sets global options for the document that will be produced by the script. YAML headers can include the title, author and date of a document, the output document type, table of contents options, and can include code to edit the appearance of text and figures.</p>
 <p>For this course, we will just use the YAML to define the <code>title</code>, <code>author</code>, <code>date</code>, and <code>output</code> of our document:</p>
 <div class="sourceCode" id="cb2"><pre class="sourceCode markdown code-with-copy"><code class="sourceCode markdown"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
@@ -298,8 +325,8 @@ <h4 data-number="6.1.2.1" class="anchored" data-anchor-id="the-yaml-header"><spa
 </ul>
 <p>RMarkdown can also be combined with other R packages to create books (via <code>bookdown</code>), websites (via <code>blogdown</code>) and interactive dashboards (via <code>flexdashboard</code>).</p>
 </section>
-<section id="rmarkdown-syntax" class="level4" data-number="6.1.2.2">
-<h4 data-number="6.1.2.2" class="anchored" data-anchor-id="rmarkdown-syntax"><span class="header-section-number">6.1.2.2</span> RMarkdown syntax</h4>
+<section id="rmarkdown-syntax" class="level4" data-number="5.1.2.2">
+<h4 data-number="5.1.2.2" class="anchored" data-anchor-id="rmarkdown-syntax"><span class="header-section-number">5.1.2.2</span> RMarkdown syntax</h4>
 <p>RMarkdown text, or syntax, will generally make up the majority of a RMarkdown file. This can include headers and subheadings, equations, and any other text or comments in the document. Text is formatted using <strong>markdown syntax</strong>. A detailed list of syntax commands are given in the RMarkdown <a href="https://rstudio.github.io/cheatsheets/html/rmarkdown.html#write-with-markdown">cheatsheet</a>. Common syntax commands that may be used in an RMarkdown document include:</p>
 <ul>
 <li><code>*italic*</code></li>
@@ -320,8 +347,8 @@ <h4 data-number="6.1.2.2" class="anchored" data-anchor-id="rmarkdown-syntax"><sp
 <p><code>Equation: $r^2 = (x - a)^2 + (y - b)^2$</code></p>
 <p><em>RMarkdown equations are built using the same language as LaTeX. <a href="https://oeis.org/wiki/List_of_LaTeX_mathematical_symbols">See here</a> for a list of mathematical symbols that can be used in these equations.</em></p>
 </section>
-<section id="code-chunks" class="level4" data-number="6.1.2.3">
-<h4 data-number="6.1.2.3" class="anchored" data-anchor-id="code-chunks"><span class="header-section-number">6.1.2.3</span> Code chunks</h4>
+<section id="code-chunks" class="level4" data-number="5.1.2.3">
+<h4 data-number="5.1.2.3" class="anchored" data-anchor-id="code-chunks"><span class="header-section-number">5.1.2.3</span> Code chunks</h4>
 <p>Code chunks allow us to embed R code and outputs into our documents. This is one of the main draw of RMarkdown as it removes the need to copy and paste or import results from R into another document.</p>
 <p>Code chunks are pieces of code that begin <code>```{r}</code> and end <code>```</code>. For example,</p>
 <div class="sourceCode" id="cb5"><pre class="sourceCode default code-with-copy"><code class="sourceCode default"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>```{r}</span>
@@ -349,8 +376,8 @@ <h4 data-number="6.1.2.3" class="anchored" data-anchor-id="code-chunks"><span cl
 <span id="cb6-9"><a href="#cb6-9" aria-hidden="true" tabindex="-1"></a>```</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </section>
 </section>
-<section id="compiling-rmarkdown-documents" class="level3" data-number="6.1.3">
-<h3 data-number="6.1.3" class="anchored" data-anchor-id="compiling-rmarkdown-documents"><span class="header-section-number">6.1.3</span> Compiling RMarkdown documents</h3>
+<section id="compiling-rmarkdown-documents" class="level3" data-number="5.1.3">
+<h3 data-number="5.1.3" class="anchored" data-anchor-id="compiling-rmarkdown-documents"><span class="header-section-number">5.1.3</span> Compiling RMarkdown documents</h3>
 <p>Compiling RMarkdown actually requires multiple steps and programmes. Luckily for us, this process takes place in the background so we don’t need to be aware of these steps happening!</p>
 <p>Generating an output file from RMarkdown is know <strong>knitting</strong> a document. This process sends the <code>.Rmd</code> file to another R package <code>knitr</code> (which is installed alongside <code>rmarkdown</code>), which executes all the code chunks in the document and creates a markdown <code>.md</code> file including the code and output. This markdown file is then processed by another programme <strong>pandoc</strong> which converts markdown code into the finished document.</p>
 <div class="quarto-figure quarto-figure-center">
@@ -361,8 +388,8 @@ <h3 data-number="6.1.3" class="anchored" data-anchor-id="compiling-rmarkdown-doc
 </div>
 <p>To <strong>knit</strong> an RMarkdown file in RStudio is very simple. Either click the <img src="images/knit_icon.png" class="img-fluid" alt="knit icon"> icon above the RMarkdown script, or use the keyboard shortcut <code>ctrl + shift + k</code> on Windows or <code>Command + shift + k</code> on Mac. This initiates the process above and will return an output document (if there are no errors!) in the requested format to the working directory.</p>
 </section>
-<section id="data-visualisation-in-rmarkdown" class="level3" data-number="6.1.4">
-<h3 data-number="6.1.4" class="anchored" data-anchor-id="data-visualisation-in-rmarkdown"><span class="header-section-number">6.1.4</span> Data visualisation in RMarkdown</h3>
+<section id="data-visualisation-in-rmarkdown" class="level3" data-number="5.1.4">
+<h3 data-number="5.1.4" class="anchored" data-anchor-id="data-visualisation-in-rmarkdown"><span class="header-section-number">5.1.4</span> Data visualisation in RMarkdown</h3>
 <p>Output such as graphs and tables can be embedded in code chunks, the code used to create them will be the same as it would be in any other R script.</p>
 <div class="callout callout-style-default callout-note callout-titled">
 <div class="callout-header d-flex align-content-center">
@@ -377,8 +404,8 @@ <h3 data-number="6.1.4" class="anchored" data-anchor-id="data-visualisation-in-r
 <p>Often, when providing output in RMarkdown, we often do not want to show the code that was used to create this. Make sure to add <code>echo = FALSE</code> to the opening of the code chunk.</p>
 </div>
 </div>
-<section id="graphs-in-rmarkdown" class="level4" data-number="6.1.4.1">
-<h4 data-number="6.1.4.1" class="anchored" data-anchor-id="graphs-in-rmarkdown"><span class="header-section-number">6.1.4.1</span> Graphs in RMarkdown</h4>
+<section id="graphs-in-rmarkdown" class="level4" data-number="5.1.4.1">
+<h4 data-number="5.1.4.1" class="anchored" data-anchor-id="graphs-in-rmarkdown"><span class="header-section-number">5.1.4.1</span> Graphs in RMarkdown</h4>
 <p><code>ggplot</code> can be used to create graphs that are embedded within code chunks and included in an output document. For example, we could use the data from previous sections to show the relationship between Settlement Funding Assessment (SFA) and council tax total in English local authorities in 2020, colour code by regions in a scatterplot:</p>
 <div class="sourceCode" id="cb7"><pre class="sourceCode default code-with-copy"><code class="sourceCode default"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>```{r scatterplot sfa_2020 and ct_total_2020 by region, message = FALSE}</span>
 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a># Load and tidy the 2020 data</span>
@@ -413,8 +440,8 @@ <h4 data-number="6.1.4.1" class="anchored" data-anchor-id="graphs-in-rmarkdown">
 </div>
 </div>
 </section>
-<section id="tables-in-rmarkdown" class="level4" data-number="6.1.4.2">
-<h4 data-number="6.1.4.2" class="anchored" data-anchor-id="tables-in-rmarkdown"><span class="header-section-number">6.1.4.2</span> Tables in RMarkdown</h4>
+<section id="tables-in-rmarkdown" class="level4" data-number="5.1.4.2">
+<h4 data-number="5.1.4.2" class="anchored" data-anchor-id="tables-in-rmarkdown"><span class="header-section-number">5.1.4.2</span> Tables in RMarkdown</h4>
 <p>There are a number of ways to include tables within RMarkdown which can either be entered manually, or generated using an R package. The choice of approach to creating tables depends on the format and size of the data, the amount of flexibility you would like to customise the output, the type of output document you are creating, and personal preference of how it should look.</p>
 <p>In this course, we will look at how tables can be created using RMarkdown syntax (without the need for additional packages), and using the <code>kable</code> function within the <code>knitr</code> package.</p>
 <p><strong>Manually creating tables</strong></p>
@@ -1092,11 +1119,14 @@ <h3 class="unnumbered anchored" data-anchor-id="exercise-8">Exercise 8</h3>
 </script>
 <nav class="page-navigation">
   <div class="nav-page nav-page-previous">
-      <a href="./session4_notes.html" class="pagination-link" aria-label="Session 4: Data visualisation with ggplot2">
-        <i class="bi bi-arrow-left-short"></i> <span class="nav-page-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Session 4: Data visualisation with ggplot2</span></span>
+      <a href="./session4_notes.html" class="pagination-link" aria-label="Data visualisation with ggplot2">
+        <i class="bi bi-arrow-left-short"></i> <span class="nav-page-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Data visualisation with ggplot2</span></span>
       </a>          
   </div>
   <div class="nav-page nav-page-next">
+      <a href="./data_description.html" class="pagination-link" aria-label="Data description">
+        <span class="nav-page-text">Data description</span> <i class="bi bi-arrow-right-short"></i>
+      </a>
   </div>
 </nav>
 </div> <!-- /content -->
diff --git a/exercise solutions.qmd b/exercise_solutions.qmd
similarity index 100%
rename from exercise solutions.qmd
rename to exercise_solutions.qmd