Skip to content

Commit

Permalink
Merge pull request #151 from chnm/feature/architecture
Browse files Browse the repository at this point in the history
Updates and fixes to the word cloud and tag URL paths
  • Loading branch information
hepplerj authored Oct 10, 2024
2 parents c91be72 + 5296f57 commit f1b598c
Show file tree
Hide file tree
Showing 115 changed files with 411 additions and 203 deletions.
32 changes: 31 additions & 1 deletion bom-website/assets/visualizations/wordcloud/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ function fetchDataAndRender(startYear, endYear) {

d3.json(url)
.then((data) => {
// Extract unique years from the data
const years = Array.from(new Set(data.map(d => d.year))).sort((a, b) => a - b);
populateYearDropdowns(years);

// Clear the existing word cloud
d3.select("#chart").selectAll("*").remove();

Expand All @@ -22,6 +26,32 @@ function fetchDataAndRender(startYear, endYear) {
});
}

// Populate year dropdowns
function populateYearDropdowns(years) {
const startYearSelect = document.getElementById("start-year");
const endYearSelect = document.getElementById("end-year");

// Clear existing options
startYearSelect.innerHTML = "";
endYearSelect.innerHTML = "";

years.forEach((year) => {
const optionStart = document.createElement("option");
optionStart.value = year;
optionStart.text = year;
startYearSelect.appendChild(optionStart);

const optionEnd = document.createElement("option");
optionEnd.value = year;
optionEnd.text = year;
endYearSelect.appendChild(optionEnd);
});

// Set default values
startYearSelect.value = years[0];
endYearSelect.value = years[years.length - 1];
}

// Initial fetch and render
fetchDataAndRender(1648, 1754);

Expand All @@ -34,7 +64,7 @@ document.getElementById("update-button").addEventListener("click", () => {

// Add event listener to the reset button
document.getElementById("reset-button").addEventListener("click", () => {
// Reset the input fields to the original values
// Reset the dropdowns to the original values
document.getElementById("start-year").value = 1648;
document.getElementById("end-year").value = 1754;
// Fetch and render the original data
Expand Down
136 changes: 79 additions & 57 deletions bom-website/assets/visualizations/wordcloud/wordcloud.js
Original file line number Diff line number Diff line change
@@ -1,27 +1,32 @@
import * as d3 from 'd3';
import d3Cloud from 'd3-cloud';
import Visualization from '../common/visualization';
import * as d3 from "d3";
import d3Cloud from "d3-cloud";
import Visualization from "../common/visualization";

export default class WordCloudChart extends Visualization {
constructor(id, data, dim) {
const margin = {
top: 0, right: 40, bottom: 40, left: 10,
top: 0,
right: 40,
bottom: 40,
left: 10,
};
super(id, data, dim, margin);
}

// Draw the plot
render() {
// causes needs to be an array of objects with a text and size property,
// causes needs to be an array of objects with a text and size property,
// which will group each word together and add together each of their d.count
// values. To do this we'll loop through the data and create a new object
// for each unique word, and add the count to the size property. We use
// d3.rollups to do this.
const causes = d3.rollups(
this.data.causes,
(v) => d3.sum(v, (d) => d.count),
(d) => d.death,
).map(([text, size]) => ({ text, size }));
const causes = d3
.rollups(
this.data.causes,
(v) => d3.sum(v, (d) => d.count),
(d) => d.death,
)
.map(([text, size]) => ({ text, size }));

const wordcloud = WordCloud(causes, {
size: (group) => {
Expand All @@ -36,64 +41,81 @@ export default class WordCloudChart extends Visualization {
}

// Word cloud generator
function WordCloud(text, {
size = group => group.length, // Given a grouping of words, returns the size factor for that word
word = d => d, // Given an item of the data array, returns the word
marginTop = 0, // top margin, in pixels
marginRight = 0, // right margin, in pixels
marginBottom = 0, // bottom margin, in pixels
marginLeft = 0, // left margin, in pixels
width = 900, // outer width, in pixels
height = 450, // outer height, in pixels
maxWords = 1200, // maximum number of words to extract from the text
fontFamily = "serif", // font family
fontScale = 14, // base font size
padding = 0, // amount of padding between the words (in pixels)
rotate = 0, // a constant or function to rotate the words
} = {}) {
const words = typeof text === "string" ? text.split(/\W+/g) : Array.from(text);
function WordCloud(
text,
{
size = (group) => group.length, // Given a grouping of words, returns the size factor for that word
word = (d) => d, // Given an item of the data array, returns the word
marginTop = 0, // top margin, in pixels
marginRight = 0, // right margin, in pixels
marginBottom = 0, // bottom margin, in pixels
marginLeft = 0, // left margin, in pixels
width = 900, // outer width, in pixels
height = 450, // outer height, in pixels
maxWords = 10000, // maximum number of words to extract from the text
fontFamily = "serif", // font family
fontScale = 10, // base font size
padding = 0, // amount of padding between the words (in pixels)
rotate = 0, // a constant or function to rotate the words
} = {},
) {
const words =
typeof text === "string" ? text.split(/\W+/g) : Array.from(text);

const data = d3.rollups(words, size, w => w)
const data = d3
.rollups(words, size, (w) => w)
.sort(([, a], [, b]) => d3.descending(a, b))
.slice(0, maxWords)
.map(([key, size]) => ({text: word(key), size}));

const svg = d3.create("svg")
.attr("viewBox", [0, 0, width, height])
.attr("width", width)
.attr("font-family", fontFamily)
.attr("text-anchor", "middle")
.attr("style", "max-width: 100%; height: auto; height: intrinsic;");
.map(([key, size]) => ({ text: word(key), size }));

const g = svg.append("g").attr("transform", `translate(${width / 2},${height / 2})`);

const svg = d3
.create("svg")
.attr("viewBox", [0, 0, width, height])
.attr("width", width)
.attr("font-family", fontFamily)
.attr("text-anchor", "middle")
.attr("style", "max-width: 100%; height: auto; height: intrinsic;");

const g = svg
.append("g")
.attr("transform", `translate(${width / 2},${height / 2})`);

const cloud = d3Cloud()
.size([width - marginLeft - marginRight, height - marginTop - marginBottom])
.words(data)
.padding(padding)
.rotate(rotate)
.font(fontFamily)
.fontSize(d => Math.max(Math.sqrt(d.size) * fontScale, 12))
.on("end", words => {
const textElements = g.selectAll("text")
.data(words)
.enter().append("text")
.attr("font-size", d => d.size)
.attr("transform", d => `translate(${d.x},${d.y}) rotate(${d.rotate})`)
.text(d => d.text)
.style("cursor", "crosshair");
.size([width - marginLeft - marginRight, height - marginTop - marginBottom])
.words(data)
.padding(padding)
.rotate(rotate)
.font(fontFamily)
.fontSize((d) => Math.max(Math.sqrt(d.size) * fontScale, 12))
.on("end", (words) => {
const textElements = g
.selectAll("text")
.data(words)
.enter()
.append("text")
.attr("font-size", (d) => d.size)
.attr(
"transform",
(d) => `translate(${d.x},${d.y}) rotate(${d.rotate})`,
)
.text((d) => d.text)
.style("cursor", "crosshair");

// Select the <p> element
const infoText = d3.select("#word-info");
// Select the <p> element
const infoText = d3.select("#word-info");

// Add event listeners to update the <p> element
textElements.on("mouseover", function(event, d) {
infoText.html(`Cause of death: <strong>${d.text}</strong>, Count: <strong>${d.size}</strong>`);
// Add event listeners to update the <p> element
textElements
.on("mouseover", function (event, d) {
infoText.html(
`Cause of death: <strong>${d.text}</strong>, Count: <strong>${d.size}</strong>`,
);
})
.on("mouseout", function() {
.on("mouseout", function () {
infoText.text("Mouse over a word to see its count");
});
});
});

cloud.start();
return svg.node();
Expand Down
20 changes: 10 additions & 10 deletions bom-website/config.yaml
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
baseURL: 'https://deathbynumbers.org'
languageCode: 'en-us'
title: 'Death by Numbers'
baseURL: "https://deathbynumbers.org"
languageCode: "en-us"
title: "Death by Numbers"
description: >-
Death by Numbers is a digital scholarly research project on the London Plague Bills
by the Roy Rosenzweig Center for History and New Media
copyright: 2021-2023

permalinks:
blog: '/:year/:month/:day/:slug/'
archive: '/:slug/'
page: '/:section/:slug/'
blog: "/:year/:month/:day/:slug/"
archive: "/:slug/"
page: "/:section/:slug/"

theme: 'dbn'
theme: "dbn"

author:
name: Roy Rosenzweig Center for History and New Media
Expand All @@ -28,7 +27,7 @@ markup:
unsafe: true
tableOfContents:
startLevel: 2
endLevel: 2
endLevel: 3

sitemap:
changefreq: weekly
Expand All @@ -38,4 +37,5 @@ sitemap:
taxonomies:
category: categories
tag: tags
author: author
author: author

File renamed without changes
File renamed without changes
File renamed without changes
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ tags:

In late November of 1703, a "great storm" or hurricane struck the British Isles. Bad weather began a few days before the heart of the storm made landfall on November 26th, spawning tornadoes, ripping off roofs and chimneys, and destroying entire fleets. One of the most famous tragedies of the storm happened on the Goodwin Sands, a deadly sandbank off the coast of Kent. At least 53 ships were wrecked on the sandbank and over 2,000 men died just six miles from safety.

{{< figure src="/images/image1.png" caption="Figure 1. Ships being tossed about by a storm at sea." alt="ships being tossed about by a storm at sea" >}}
{{< figure src="image1.png" caption="Figure 1. Ships being tossed about by a storm at sea." alt="ships being tossed about by a storm at sea" >}}

The death and destruction continued throughout southern England, including in the capital city of London. As one contemporary report tells:

Expand All @@ -36,9 +36,9 @@ Calculating the number of deaths that can actually be attributed to a storm is d

One thing the bills _can_ tell us, however, is where people were dying throughout the city. This is because location data has been recorded for many of the deaths we have identified as storm related. These locations offer another way to analyze the deaths related to the storm. In the first map, we see deaths by week reported. Pale blue is no deaths in any of our three weeks, light blue is one or more deaths in [“week 50” of 1703](https://deathbynumbers.org/2022/02/14/confusion-of-calendars/) (November 23-30), dark blue with stripes is week 51 (November 30-December 7), and turquoise blue is week 52 (December 7-14). In the second map, we see deaths by parish for weeks 50-52 summed. Purple is no deaths, light blue is 1 death, medium blue is 2 deaths, and dark blue is 3 deaths.

{{< figure src="/images/image4.png" caption="Figure." alt="map described in text" >}}
{{< figure src="image4.png" caption="Figure." alt="map described in text" >}}

{{< figure src="/images/image2.png" caption="Figure." alt="map described in text" >}}
{{< figure src="image2.png" caption="Figure." alt="map described in text" >}}

As we can see on the map, the majority of the deaths were recorded in the parishes “without the walls”—that is, outside the London Wall—with the exception of St. Bennet Fink and St. Katherine Coleman. The visualization provided by the location data from the bills tells us that the highest number of deaths occurred in the parishes furthest from the center of London. This geographic pattern can partly be explained by the infrastructure and rebuilding of the city after the Great Fire of London in 1666. The areas of London that burnt down in the fire were mostly contained inside the London wall, a Roman structure that circled a “mass of medieval streets, lanes and alleys,” where the houses were mostly built of timber (Mortimer 2017, 10). These wooden medieval houses which were packed together inside the London wall were one of the reasons the fire spread so quickly (Mortimer 2017, 21). After the fire, the areas that were burnt down had been almost entirely rebuilt in brick. This shift to newer, more durable infrastructure in the parishes inside the London wall meant that these houses would stand up better to other natural phenomena, such as the high winds associated with hurricanes. As the maps show, the area inside the London wall has the least number of reported deaths due to the storm, and it is possible that the reconstruction of the city after the fire of 1666 plays a role in these low numbers.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ For the purpose of this blog entry, I will not focus on text as text, but only o

The transcription process in _Death by Numbers_ using DataScribe generates a dataset where each record of a weekly bill occupies a row, and the causes of death are represented as columns. Descriptions of drownings, killings, suicides, people found dead, and accidents are recorded as unstructured text, typically including a count, the location, and occasionally a brief account of the incident. For instance, drowning's descriptive text highlights the location (_at Christ Church in Surry; at the London Bridge; at the River Lee),_ a brief characterization of circumstances (_accidentally; by misfortune; in a ditch; in a tub of soap suds_), and occasionally some data about the drowned person (_an unknown man; two brothers; a boy_). To both capture and provide structure to the text, our transcription team fills out two fields in the DataScribe transcription form: one for the count and one for the text.

{{< figure src="/images/adasme-fig_1-words.png" caption="Fig 1. View of the transcription form fields designed to capture the text about drowning deaths." alt="Fig 1. View of the transcription form fields designed to capture the text about drowning deaths." >}}
{{< figure src="adasme-fig_1-words.png" caption="Fig 1. View of the transcription form fields designed to capture the text about drowning deaths." alt="Fig 1. View of the transcription form fields designed to capture the text about drowning deaths." >}}

To summarize the methodology, I begin in Visual Studio Code by extracting text from descriptive columns, such as the 'Drowned (Descriptive Text)' column shown in Figure 1, and appending it to a list. I then handle missing values by replacing any `NaN` elements with empty strings. Additional cleanup involved replacing punctuation with spaces and removing unwanted words. At this point, one critical step is correcting the many misspelled variants of parish names by matching them with the correct ones (the ones we use in our Omeka install). ​The transcription team captures the text mostly as it appears, preserving the wide range of original spellings from the Bills in the transcribed data. Correcting the typos involves constructing a dictionary of key-value pairs to fix all misspellings in the original text. Later, I repeated this process on the final data frame with a slight variation: I created a new column by remapping all variants with the correct parish names and grouped the data by these corrections to remove duplicates. Although time-consuming, this process proved to be somewhat rewarding and brought a certain sense of accomplishment.

Expand All @@ -27,15 +27,15 @@ Observing the text reveals that locations typically follow the preposition "at."

As mentioned before, The _Death by Numbers Project_ builds data sets with textual descriptions of killings. However, it is not exactly a copy and paste process but an interpretive procedure. We built an aggregate category in which we include any death involving human agency, such as murder, shooting, stabbing, or being run over by a cart, etc. These deaths show up in the Bills in different parts of the causes listed. The total killings for the three data sets are 126, 114, 228, the last number being larger due to a lengthier set of weeks. The parishes with the highest number of human-caused deaths across the three datasets were St. Giles in the Fields, St. Mary Whitechapel, St. Martin in the Fields, St. Giles Cripplegate, St. Sepulchre's Parish, and to the south of the Thames River, St. Saviour's Southwark. Among the 97 parishes within the Walls of London, those located along the Thames had the highest number of killings, with Allhallows Great, Allhallows Less, St. Mary Somerset, St. Magnus Parish, and St. Dunstan East being the most frequently mentioned locations.

{{< figure src="/images/adasme-fig_2-words.png" caption="Fig 2. Spatial Representation of Killings in each of the three data sets analyzed." alt="Fig 2. Spatial Representation of Killings in each of the three data sets analyzed." >}}
{{< figure src="adasme-fig_2-words.png" caption="Fig 2. Spatial Representation of Killings in each of the three data sets analyzed." alt="Fig 2. Spatial Representation of Killings in each of the three data sets analyzed." >}}

The suicide field is constructed by transcribing any suicides that are recognizable by descriptions of a human being actively taking his/her life like “hanged himself”, “poysoned himself” and “stabbed himself”. This category also includes deaths described as "distraction" or "being distracted," a term rooted in the uncertainty surrounding suicide motives and the widespread belief among authorities that it represented a rejection of religion ([for more details see this post in our blog](https://deathbynumbers.org/2022/06/20/strangled-himself-being-distracted-messy-data-and-suicides-in-the-bills-of-mortality/)). The number of suicide deaths extracted from the dataset totaled 56, 75, and 124, respectively. The parishes with the highest number of suicide incidents include St. James Clerkenwell, St. Sepulchre's Parish, St. Andrew Holborn, St. Olave Southwark, St. Mary Whitechapel, St. Giles Cripplegate, and St. Giles in the Fields. Notably, St. Botolph Bishopsgate had the most suicides across all three datasets, with a total of eleven incidents. This is not simply a coincidence, as St. Botolph was home to Bethlehem Hospital (commonly known as Bedlam), which housed a large number of patients diagnosed with various forms of lunacy or mental disorders.

{{< figure src="/images/adasme-fig_3-words.png" caption="Fig 3. Spatial Representation of Suicides in each of the three data sets analyzed." alt="Fig 3. Spatial Representation of Suicides in each of the three data sets analyzed." >}}
{{< figure src="adasme-fig_3-words.png" caption="Fig 3. Spatial Representation of Suicides in each of the three data sets analyzed." alt="Fig 3. Spatial Representation of Suicides in each of the three data sets analyzed." >}}

Drowning deaths are both more concentrated and more frequent than suicides and killings. The parishes with the highest number of drowning incidents include St. Saviour's Southwark, St. Olave's Southwark, and Stepney Parish. However, the majority of drowning fatalities are centered in St. Katherine's Tower, where the number of immersion deaths totals 223, 213, and 368 across the three datasets, respectively. Most of the deaths at St. Katharine's Tower were likely related to dock work and tied to transport and shipping activities, as the wharves at St. Katharine's, known as 'Sufferance Wharves,' received hundreds of ships that unloaded their goods and cargo at the quays.[^1]

{{< figure src="/images/adasme-fig_4-words.png" caption="Fig 4. Spatial Representation of Drownings in each of the three data sets analyzed." alt="Spatial Representation of Drownings in each of the three data sets analyzed." >}}
{{< figure src="adasme-fig_4-words.png" caption="Fig 4. Spatial Representation of Drownings in each of the three data sets analyzed." alt="Spatial Representation of Drownings in each of the three data sets analyzed." >}}

## Conclusion

Expand Down
Loading

0 comments on commit f1b598c

Please sign in to comment.