diff --git a/modules/node_modules/@frogpond/ccc-google-calendar/index.js b/modules/node_modules/@frogpond/ccc-google-calendar/index.js
index f1bcf199..46efa8e2 100644
--- a/modules/node_modules/@frogpond/ccc-google-calendar/index.js
+++ b/modules/node_modules/@frogpond/ccc-google-calendar/index.js
@@ -1,21 +1,19 @@
-import {get} from '@frogpond/ccc-lib'
+import {get, parseHtml} from '@frogpond/ccc-lib'
import moment from 'moment'
import getUrls from 'get-urls'
-import _jsdom from 'jsdom'
-const {JSDOM} = _jsdom
function convertGoogleEvents(data, now = moment()) {
let events = data.map((event) => {
+ const title = parseHtml(event.summary || '')
const startTime = moment(event.start.date || event.start.dateTime)
const endTime = moment(event.end.date || event.end.dateTime)
- let description = (event.description || '').replace('
', '\n')
- description = JSDOM.fragment(description).textContent.trim()
+ let description = cleanTextBlock(event.description || '')
return {
dataSource: 'google',
startTime,
endTime,
- title: event.summary || '',
+ title,
description: description,
location: event.location || '',
isOngoing: startTime.isBefore(now, 'day'),
@@ -31,6 +29,10 @@ function convertGoogleEvents(data, now = moment()) {
return events
}
+function cleanTextBlock(text) {
+ return parseHtml(text).replace(/\t/g, ' ').replace('
', '\n').trim()
+}
+
export async function googleCalendar(calendarId, now = moment()) {
let calendarUrl = `https://www.googleapis.com/calendar/v3/calendars/${calendarId}/events`
diff --git a/modules/node_modules/@frogpond/ccc-lib/html.js b/modules/node_modules/@frogpond/ccc-lib/html.js
new file mode 100644
index 00000000..28374239
--- /dev/null
+++ b/modules/node_modules/@frogpond/ccc-lib/html.js
@@ -0,0 +1,14 @@
+import {toLaxTitleCase} from '@frogpond/titlecase'
+
+import _jsdom from 'jsdom'
+const {JSDOM} = _jsdom
+
+// Html
+
+export function parseHtml(string) {
+ return JSDOM.fragment(string).textContent.trim()
+}
+
+export function innerTextWithSpaces(elem) {
+ return JSDOM.fragment(elem).split(/\s+/u).join(' ').trim()
+}
diff --git a/modules/node_modules/@frogpond/ccc-lib/index.js b/modules/node_modules/@frogpond/ccc-lib/index.js
index 2a45fe06..50d28371 100644
--- a/modules/node_modules/@frogpond/ccc-lib/index.js
+++ b/modules/node_modules/@frogpond/ccc-lib/index.js
@@ -1,3 +1,4 @@
export {get} from './http'
export * from './cache'
export * from './url'
+export * from './html'
diff --git a/modules/node_modules/@frogpond/ccc-presence/index.js b/modules/node_modules/@frogpond/ccc-presence/index.js
index a2cfc2ec..bdcdf078 100644
--- a/modules/node_modules/@frogpond/ccc-presence/index.js
+++ b/modules/node_modules/@frogpond/ccc-presence/index.js
@@ -1,10 +1,8 @@
-import {get, ONE_HOUR} from '@frogpond/ccc-lib'
+import {get, ONE_HOUR, parseHtml} from '@frogpond/ccc-lib'
import mem from 'mem'
import lodash from 'lodash'
-import _jsdom from 'jsdom'
import pMap from 'p-map'
const {sortBy, startCase} = lodash
-const {JSDOM} = _jsdom
/*
type ContactPersonType = {
@@ -45,10 +43,13 @@ export function cleanOrg(org) {
// )
let category = org.categories.join(', ')
- let meetings =
- (org.regularMeetingLocation || '').trim() +
- (org.regularMeetingTime || '').trim()
- let description = JSDOM.fragment(org.description).textContent.trim()
+
+ let meetingTime = org.regularMeetingTime || ''
+ let meetingLocation = parseHtml(org.regularMeetingLocation || '')
+ let meetings = `${meetingTime} ${meetingLocation}`.trim()
+
+ let description = parseHtml(org.description)
+
let website = (org.website || '').trim()
if (website && !/^https?:\/\//.test(website)) {
website = `http://${website}`
diff --git a/modules/node_modules/@frogpond/ccc-reason-calendar/index.js b/modules/node_modules/@frogpond/ccc-reason-calendar/index.js
index 39892000..b5f16d28 100644
--- a/modules/node_modules/@frogpond/ccc-reason-calendar/index.js
+++ b/modules/node_modules/@frogpond/ccc-reason-calendar/index.js
@@ -1,13 +1,11 @@
/* eslint-disable camelcase */
-import {get} from '@frogpond/ccc-lib'
+import {get, parseHtml} from '@frogpond/ccc-lib'
import moment from 'moment-timezone'
import dropWhile from 'lodash/dropWhile'
import dropRightWhile from 'lodash/dropRightWhile'
import sortBy from 'lodash/sortBy'
import getUrls from 'get-urls'
-import _jsdom from 'jsdom'
-const {JSDOM} = _jsdom
const TZ = 'US/Central'
@@ -113,8 +111,8 @@ function convertReasonEvent(event, now = moment()) {
moment(event.startTime).isBefore(now, 'day') &&
moment(event.endTime).isSameOrAfter(now)
- let description = (event.description || '').replace('
', '\n')
- description = JSDOM.fragment(description).textContent.trim()
+ let title = parseHtml(event.name || '')
+ let description = cleanTextBlock(event.description || '')
let links = description ? [...getUrls(description)] : []
@@ -122,7 +120,7 @@ function convertReasonEvent(event, now = moment()) {
dataSource: 'reason',
startTime: event.startTime,
endTime: event.endTime,
- title: event.name || '',
+ title: title,
description: description,
location: event.location || '',
links: links,
@@ -138,6 +136,10 @@ function convertReasonEvent(event, now = moment()) {
}
}
+function cleanTextBlock(text) {
+ return parseHtml(text).replace('
', '\n').trim()
+}
+
export async function reasonCalendar(calendarUrl, now = moment()) {
let dateParams = {
// eslint-disable-next-line camelcase
diff --git a/modules/node_modules/@frogpond/ccc-rss-feed/index.js b/modules/node_modules/@frogpond/ccc-rss-feed/index.js
index b43b2124..f33ae8ea 100644
--- a/modules/node_modules/@frogpond/ccc-rss-feed/index.js
+++ b/modules/node_modules/@frogpond/ccc-rss-feed/index.js
@@ -1,4 +1,4 @@
-import {get} from '@frogpond/ccc-lib'
+import {get, parseHtml} from '@frogpond/ccc-lib'
import _jsdom from 'jsdom'
const {JSDOM} = _jsdom
@@ -25,7 +25,7 @@ export function convertRssItemToStory(item) {
let title = item.querySelector('title')
title = title ? title.textContent : '(no title)'
- title = JSDOM.fragment(title).textContent.trim()
+ title = parseHtml(title)
let datePublished = item.querySelector('pubDate')
datePublished = datePublished ? datePublished.textContent : null
@@ -35,12 +35,12 @@ export function convertRssItemToStory(item) {
let content = item.getAttribute('content:encoded')
content = content || (descriptionEl && descriptionEl.textContent)
content = content || '(no content)'
- content = JSDOM.fragment(content).textContent.trim()
+ content = parseHtml(content)
let excerpt = descriptionEl
? descriptionEl.textContent
: content.substr(0, 250)
- excerpt = JSDOM.fragment(excerpt).textContent.trim()
+ excerpt = parseHtml(excerpt)
let featuredImage = null
if (item.querySelector('enclosure')) {
diff --git a/modules/node_modules/@frogpond/ccc-wpjson-feed/index.js b/modules/node_modules/@frogpond/ccc-wpjson-feed/index.js
index 127af1e7..da352db1 100644
--- a/modules/node_modules/@frogpond/ccc-wpjson-feed/index.js
+++ b/modules/node_modules/@frogpond/ccc-wpjson-feed/index.js
@@ -1,6 +1,4 @@
-import {get} from '@frogpond/ccc-lib'
-import _jsdom from 'jsdom'
-const {JSDOM} = _jsdom
+import {get, parseHtml} from '@frogpond/ccc-lib'
export async function fetchWpJson(url, query = {}) {
const feed = await get(url, {query, json: true})
@@ -47,10 +45,10 @@ export function convertWpJsonItemToStory(item) {
categories: categories,
content: item.content.rendered,
datePublished: item.date_gmt,
- excerpt: JSDOM.fragment(item.excerpt.rendered).textContent.trim(),
+ excerpt: parseHtml(item.excerpt.rendered),
featuredImage: featuredImage,
link: item.link,
- title: JSDOM.fragment(item.title.rendered).textContent.trim(),
+ title: parseHtml(item.title.rendered),
}
}
diff --git a/modules/node_modules/@frogpond/ccci-carleton-college/v1/convos/index.js b/modules/node_modules/@frogpond/ccci-carleton-college/v1/convos/index.js
index 74ec5e41..8aed5017 100644
--- a/modules/node_modules/@frogpond/ccci-carleton-college/v1/convos/index.js
+++ b/modules/node_modules/@frogpond/ccci-carleton-college/v1/convos/index.js
@@ -1,4 +1,4 @@
-import {get, ONE_HOUR, makeAbsoluteUrl} from '@frogpond/ccc-lib'
+import {get, ONE_HOUR, makeAbsoluteUrl, parseHtml} from '@frogpond/ccc-lib'
import {fromHtml} from '@frogpond/ccc-markdown'
import mem from 'mem'
import _jsdom from 'jsdom'
@@ -9,15 +9,15 @@ const archiveBase =
'https://apps.carleton.edu/events/convocations/feeds/media_files?page_id=342645'
function processConvo(event) {
- let title = JSDOM.fragment(
+ let title = parseHtml(
event.querySelector('title').textContent,
- ).textContent.trim()
+ )
let description = event.querySelector('description')
description = description
- ? JSDOM.fragment(
+ ? parseHtml(
event.querySelector('description').textContent,
- ).textContent.trim()
+ )
: ''
let pubDate = moment(event.querySelector('pubDate').textContent)
diff --git a/modules/node_modules/@frogpond/ccci-carleton-college/v1/news/nnb.js b/modules/node_modules/@frogpond/ccci-carleton-college/v1/news/nnb.js
index 60b4e3ef..95f0ac87 100644
--- a/modules/node_modules/@frogpond/ccci-carleton-college/v1/news/nnb.js
+++ b/modules/node_modules/@frogpond/ccci-carleton-college/v1/news/nnb.js
@@ -1,4 +1,4 @@
-import {get} from '@frogpond/ccc-lib'
+import {get, parseHtml} from '@frogpond/ccc-lib'
import _jsdom from 'jsdom'
import lodash from 'lodash'
const {groupBy, toPairs} = lodash
@@ -13,9 +13,9 @@ export async function noonNewsBulletein() {
let bulletinEls = [...dom.window.document.querySelectorAll('item')]
let bulletins = bulletinEls.map((item) => {
let description = item.querySelector('description').textContent
- description = JSDOM.fragment(description).textContent.trim()
+ description = parseHtml(description)
let category = item.querySelector('category').textContent
- category = JSDOM.fragment(category).textContent.trim()
+ category = parseHtml(category)
return {description, category}
})
diff --git a/modules/node_modules/@frogpond/ccci-stolaf-college/v1/jobs/index.js b/modules/node_modules/@frogpond/ccci-stolaf-college/v1/jobs/index.js
index 8b79b50a..f4b170e2 100644
--- a/modules/node_modules/@frogpond/ccci-stolaf-college/v1/jobs/index.js
+++ b/modules/node_modules/@frogpond/ccci-stolaf-college/v1/jobs/index.js
@@ -1,22 +1,19 @@
-import {get, ONE_DAY} from '@frogpond/ccc-lib'
+import {get, ONE_DAY, parseHtml} from '@frogpond/ccc-lib'
import mem from 'mem'
-import _jsdom from 'jsdom'
import getUrls from 'get-urls'
-const {JSDOM} = _jsdom
-
export function cleanJob(job) {
+ const title = parseHtml(job.title)
+ const office = parseHtml(job.office)
+ const hoursPerWeek = parseHtml(job.hoursPerWeek)
+ const timeOfHours = parseHtml(job.timeOfHours)
+
// these all need to retain their newlines
- const description = cleanTextBlock(
- JSDOM.fragment(job.description).textContent,
- )
- const comments = cleanTextBlock(JSDOM.fragment(job.comments).textContent)
- const skills = cleanTextBlock(JSDOM.fragment(job.skills).textContent)
- const howToApply = cleanTextBlock(JSDOM.fragment(job.howToApply).textContent)
- const timeline = cleanTextBlock(JSDOM.fragment(job.timeline).textContent)
- const timeOfHours = cleanTextBlock(
- JSDOM.fragment(job.timeOfHours).textContent,
- )
+ const description = cleanTextBlock(job.description)
+ const comments = cleanTextBlock(job.comments)
+ const skills = cleanTextBlock(job.skills)
+ const howToApply = cleanTextBlock(job.howToApply)
+ const timeline = cleanTextBlock(job.timeline)
const contactEmail = fixupEmailFormat(job.contactEmail)
const contactPhone = fixupPhoneFormat(job.contactPhone)
@@ -34,12 +31,12 @@ export function cleanJob(job) {
contactPhone: contactPhone,
description: description,
goodForIncomingStudents: job.goodForIncomingStudents,
- hoursPerWeek: job.hoursPerWeek,
+ hoursPerWeek: hoursPerWeek,
howToApply: howToApply,
id: job.id,
lastModified: job.lastModified,
links: links,
- office: job.office,
+ office: office,
openPositions: job.openPositions,
skills: skills,
timeline: timeline,
@@ -51,7 +48,7 @@ export function cleanJob(job) {
}
function cleanTextBlock(text) {
- return text.replace(/\s+/g, ' ')
+ return parseHtml(text).replace(/\t/g, ' ').trim()
}
export function getLinksFromJob({description, comments, skills, howToApply}) {
diff --git a/modules/node_modules/@frogpond/ccci-stolaf-college/v1/streams/index.js b/modules/node_modules/@frogpond/ccci-stolaf-college/v1/streams/index.js
index cdec4f77..07db4dcc 100644
--- a/modules/node_modules/@frogpond/ccci-stolaf-college/v1/streams/index.js
+++ b/modules/node_modules/@frogpond/ccci-stolaf-college/v1/streams/index.js
@@ -1,4 +1,4 @@
-import {get, ONE_HOUR} from '@frogpond/ccc-lib'
+import {get, ONE_HOUR, parseHtml} from '@frogpond/ccc-lib'
import mem from 'mem'
import moment from 'moment-timezone'
@@ -19,9 +19,15 @@ export async function getStreams({streamClass, sort, dateFrom, dateTo}) {
(resp) => resp.body,
)
const processed = data.results.map((stream) => {
- let {starttime} = stream
+ let {starttime, title, subtitle, performer} = stream
+
+ let streamTitle = parseHtml(title)
+ let detail = parseHtml(subtitle || performer || '')
+
return {
...stream,
+ title: streamTitle,
+ subtitle: detail,
starttime: moment
.tz(starttime, 'YYYY-MM-DD HH:mm', 'America/Chicago')
.toISOString(),
diff --git a/package.json b/package.json
index 0a0924d8..34b8769a 100644
--- a/package.json
+++ b/package.json
@@ -22,11 +22,11 @@
"test": "./scripts/smoke-test.sh"
},
"dependencies": {
+ "@frogpond/titlecase": "^1.0.0",
"dotenv": "10.0.0",
"esm": "3.2.25",
"get-urls": "10.0.1",
"got": "9.6.0",
- "html-entities": "2.3.2",
"is-absolute-url": "3.0.3",
"jsdom": "16.6.0",
"koa": "2.13.4",
diff --git a/yarn.lock b/yarn.lock
index dc153e20..aa7f3d5f 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -109,6 +109,11 @@
minimatch "^3.0.4"
strip-json-comments "^3.1.1"
+"@frogpond/titlecase@^1.0.0":
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/@frogpond/titlecase/-/titlecase-1.0.0.tgz#993e5371c31b58a839e76fdc531053c55b1a2736"
+ integrity sha512-C1qKm/J+B+cXo+7+ZHbRnt2iApx/IrxMEXwOxe+ZkeTvSC1nZ2XRWf0xzFuAanpBNDhFcgrxdnR8FMKNHo1scQ==
+
"@gar/promisify@^1.0.1":
version "1.1.2"
resolved "https://registry.yarnpkg.com/@gar/promisify/-/promisify-1.1.2.tgz#30aa825f11d438671d585bd44e7fd564535fc210"
@@ -1130,11 +1135,6 @@ html-encoding-sniffer@^2.0.1:
dependencies:
whatwg-encoding "^1.0.5"
-html-entities@2.3.2:
- version "2.3.2"
- resolved "https://registry.yarnpkg.com/html-entities/-/html-entities-2.3.2.tgz#760b404685cb1d794e4f4b744332e3b00dcfe488"
- integrity sha512-c3Ab/url5ksaT0WyleslpBEthOzWhrjQbg75y7XUsfSzi3Dgzt0l8w5e7DylRn15MTlMMD58dTfzddNS2kcAjQ==
-
http-assert@^1.3.0:
version "1.4.1"
resolved "https://registry.yarnpkg.com/http-assert/-/http-assert-1.4.1.tgz#c5f725d677aa7e873ef736199b89686cceb37878"