diff --git a/modules/node_modules/@frogpond/ccc-google-calendar/index.js b/modules/node_modules/@frogpond/ccc-google-calendar/index.js index f1bcf199..46efa8e2 100644 --- a/modules/node_modules/@frogpond/ccc-google-calendar/index.js +++ b/modules/node_modules/@frogpond/ccc-google-calendar/index.js @@ -1,21 +1,19 @@ -import {get} from '@frogpond/ccc-lib' +import {get, parseHtml} from '@frogpond/ccc-lib' import moment from 'moment' import getUrls from 'get-urls' -import _jsdom from 'jsdom' -const {JSDOM} = _jsdom function convertGoogleEvents(data, now = moment()) { let events = data.map((event) => { + const title = parseHtml(event.summary || '') const startTime = moment(event.start.date || event.start.dateTime) const endTime = moment(event.end.date || event.end.dateTime) - let description = (event.description || '').replace('
', '\n') - description = JSDOM.fragment(description).textContent.trim() + let description = cleanTextBlock(event.description || '') return { dataSource: 'google', startTime, endTime, - title: event.summary || '', + title, description: description, location: event.location || '', isOngoing: startTime.isBefore(now, 'day'), @@ -31,6 +29,10 @@ function convertGoogleEvents(data, now = moment()) { return events } +function cleanTextBlock(text) { + return parseHtml(text).replace(/\t/g, ' ').replace('
', '\n').trim() +} + export async function googleCalendar(calendarId, now = moment()) { let calendarUrl = `https://www.googleapis.com/calendar/v3/calendars/${calendarId}/events` diff --git a/modules/node_modules/@frogpond/ccc-lib/html.js b/modules/node_modules/@frogpond/ccc-lib/html.js new file mode 100644 index 00000000..28374239 --- /dev/null +++ b/modules/node_modules/@frogpond/ccc-lib/html.js @@ -0,0 +1,14 @@ +import {toLaxTitleCase} from '@frogpond/titlecase' + +import _jsdom from 'jsdom' +const {JSDOM} = _jsdom + +// Html + +export function parseHtml(string) { + return JSDOM.fragment(string).textContent.trim() +} + +export function innerTextWithSpaces(elem) { + return JSDOM.fragment(elem).split(/\s+/u).join(' ').trim() +} diff --git a/modules/node_modules/@frogpond/ccc-lib/index.js b/modules/node_modules/@frogpond/ccc-lib/index.js index 2a45fe06..50d28371 100644 --- a/modules/node_modules/@frogpond/ccc-lib/index.js +++ b/modules/node_modules/@frogpond/ccc-lib/index.js @@ -1,3 +1,4 @@ export {get} from './http' export * from './cache' export * from './url' +export * from './html' diff --git a/modules/node_modules/@frogpond/ccc-presence/index.js b/modules/node_modules/@frogpond/ccc-presence/index.js index a2cfc2ec..bdcdf078 100644 --- a/modules/node_modules/@frogpond/ccc-presence/index.js +++ b/modules/node_modules/@frogpond/ccc-presence/index.js @@ -1,10 +1,8 @@ -import {get, ONE_HOUR} from '@frogpond/ccc-lib' +import {get, ONE_HOUR, parseHtml} from '@frogpond/ccc-lib' import mem from 'mem' import lodash from 'lodash' -import _jsdom from 'jsdom' import pMap from 'p-map' const {sortBy, startCase} = lodash -const {JSDOM} = _jsdom /* type ContactPersonType = { @@ -45,10 +43,13 @@ export function cleanOrg(org) { // ) let category = org.categories.join(', ') - let meetings = - (org.regularMeetingLocation || '').trim() + - (org.regularMeetingTime || '').trim() - let description = JSDOM.fragment(org.description).textContent.trim() + + let meetingTime = org.regularMeetingTime || '' + let meetingLocation = parseHtml(org.regularMeetingLocation || '') + let meetings = `${meetingTime} ${meetingLocation}`.trim() + + let description = parseHtml(org.description) + let website = (org.website || '').trim() if (website && !/^https?:\/\//.test(website)) { website = `http://${website}` diff --git a/modules/node_modules/@frogpond/ccc-reason-calendar/index.js b/modules/node_modules/@frogpond/ccc-reason-calendar/index.js index 39892000..b5f16d28 100644 --- a/modules/node_modules/@frogpond/ccc-reason-calendar/index.js +++ b/modules/node_modules/@frogpond/ccc-reason-calendar/index.js @@ -1,13 +1,11 @@ /* eslint-disable camelcase */ -import {get} from '@frogpond/ccc-lib' +import {get, parseHtml} from '@frogpond/ccc-lib' import moment from 'moment-timezone' import dropWhile from 'lodash/dropWhile' import dropRightWhile from 'lodash/dropRightWhile' import sortBy from 'lodash/sortBy' import getUrls from 'get-urls' -import _jsdom from 'jsdom' -const {JSDOM} = _jsdom const TZ = 'US/Central' @@ -113,8 +111,8 @@ function convertReasonEvent(event, now = moment()) { moment(event.startTime).isBefore(now, 'day') && moment(event.endTime).isSameOrAfter(now) - let description = (event.description || '').replace('
', '\n') - description = JSDOM.fragment(description).textContent.trim() + let title = parseHtml(event.name || '') + let description = cleanTextBlock(event.description || '') let links = description ? [...getUrls(description)] : [] @@ -122,7 +120,7 @@ function convertReasonEvent(event, now = moment()) { dataSource: 'reason', startTime: event.startTime, endTime: event.endTime, - title: event.name || '', + title: title, description: description, location: event.location || '', links: links, @@ -138,6 +136,10 @@ function convertReasonEvent(event, now = moment()) { } } +function cleanTextBlock(text) { + return parseHtml(text).replace('
', '\n').trim() +} + export async function reasonCalendar(calendarUrl, now = moment()) { let dateParams = { // eslint-disable-next-line camelcase diff --git a/modules/node_modules/@frogpond/ccc-rss-feed/index.js b/modules/node_modules/@frogpond/ccc-rss-feed/index.js index b43b2124..f33ae8ea 100644 --- a/modules/node_modules/@frogpond/ccc-rss-feed/index.js +++ b/modules/node_modules/@frogpond/ccc-rss-feed/index.js @@ -1,4 +1,4 @@ -import {get} from '@frogpond/ccc-lib' +import {get, parseHtml} from '@frogpond/ccc-lib' import _jsdom from 'jsdom' const {JSDOM} = _jsdom @@ -25,7 +25,7 @@ export function convertRssItemToStory(item) { let title = item.querySelector('title') title = title ? title.textContent : '(no title)' - title = JSDOM.fragment(title).textContent.trim() + title = parseHtml(title) let datePublished = item.querySelector('pubDate') datePublished = datePublished ? datePublished.textContent : null @@ -35,12 +35,12 @@ export function convertRssItemToStory(item) { let content = item.getAttribute('content:encoded') content = content || (descriptionEl && descriptionEl.textContent) content = content || '(no content)' - content = JSDOM.fragment(content).textContent.trim() + content = parseHtml(content) let excerpt = descriptionEl ? descriptionEl.textContent : content.substr(0, 250) - excerpt = JSDOM.fragment(excerpt).textContent.trim() + excerpt = parseHtml(excerpt) let featuredImage = null if (item.querySelector('enclosure')) { diff --git a/modules/node_modules/@frogpond/ccc-wpjson-feed/index.js b/modules/node_modules/@frogpond/ccc-wpjson-feed/index.js index 127af1e7..da352db1 100644 --- a/modules/node_modules/@frogpond/ccc-wpjson-feed/index.js +++ b/modules/node_modules/@frogpond/ccc-wpjson-feed/index.js @@ -1,6 +1,4 @@ -import {get} from '@frogpond/ccc-lib' -import _jsdom from 'jsdom' -const {JSDOM} = _jsdom +import {get, parseHtml} from '@frogpond/ccc-lib' export async function fetchWpJson(url, query = {}) { const feed = await get(url, {query, json: true}) @@ -47,10 +45,10 @@ export function convertWpJsonItemToStory(item) { categories: categories, content: item.content.rendered, datePublished: item.date_gmt, - excerpt: JSDOM.fragment(item.excerpt.rendered).textContent.trim(), + excerpt: parseHtml(item.excerpt.rendered), featuredImage: featuredImage, link: item.link, - title: JSDOM.fragment(item.title.rendered).textContent.trim(), + title: parseHtml(item.title.rendered), } } diff --git a/modules/node_modules/@frogpond/ccci-carleton-college/v1/convos/index.js b/modules/node_modules/@frogpond/ccci-carleton-college/v1/convos/index.js index 74ec5e41..8aed5017 100644 --- a/modules/node_modules/@frogpond/ccci-carleton-college/v1/convos/index.js +++ b/modules/node_modules/@frogpond/ccci-carleton-college/v1/convos/index.js @@ -1,4 +1,4 @@ -import {get, ONE_HOUR, makeAbsoluteUrl} from '@frogpond/ccc-lib' +import {get, ONE_HOUR, makeAbsoluteUrl, parseHtml} from '@frogpond/ccc-lib' import {fromHtml} from '@frogpond/ccc-markdown' import mem from 'mem' import _jsdom from 'jsdom' @@ -9,15 +9,15 @@ const archiveBase = 'https://apps.carleton.edu/events/convocations/feeds/media_files?page_id=342645' function processConvo(event) { - let title = JSDOM.fragment( + let title = parseHtml( event.querySelector('title').textContent, - ).textContent.trim() + ) let description = event.querySelector('description') description = description - ? JSDOM.fragment( + ? parseHtml( event.querySelector('description').textContent, - ).textContent.trim() + ) : '' let pubDate = moment(event.querySelector('pubDate').textContent) diff --git a/modules/node_modules/@frogpond/ccci-carleton-college/v1/news/nnb.js b/modules/node_modules/@frogpond/ccci-carleton-college/v1/news/nnb.js index 60b4e3ef..95f0ac87 100644 --- a/modules/node_modules/@frogpond/ccci-carleton-college/v1/news/nnb.js +++ b/modules/node_modules/@frogpond/ccci-carleton-college/v1/news/nnb.js @@ -1,4 +1,4 @@ -import {get} from '@frogpond/ccc-lib' +import {get, parseHtml} from '@frogpond/ccc-lib' import _jsdom from 'jsdom' import lodash from 'lodash' const {groupBy, toPairs} = lodash @@ -13,9 +13,9 @@ export async function noonNewsBulletein() { let bulletinEls = [...dom.window.document.querySelectorAll('item')] let bulletins = bulletinEls.map((item) => { let description = item.querySelector('description').textContent - description = JSDOM.fragment(description).textContent.trim() + description = parseHtml(description) let category = item.querySelector('category').textContent - category = JSDOM.fragment(category).textContent.trim() + category = parseHtml(category) return {description, category} }) diff --git a/modules/node_modules/@frogpond/ccci-stolaf-college/v1/jobs/index.js b/modules/node_modules/@frogpond/ccci-stolaf-college/v1/jobs/index.js index 8b79b50a..f4b170e2 100644 --- a/modules/node_modules/@frogpond/ccci-stolaf-college/v1/jobs/index.js +++ b/modules/node_modules/@frogpond/ccci-stolaf-college/v1/jobs/index.js @@ -1,22 +1,19 @@ -import {get, ONE_DAY} from '@frogpond/ccc-lib' +import {get, ONE_DAY, parseHtml} from '@frogpond/ccc-lib' import mem from 'mem' -import _jsdom from 'jsdom' import getUrls from 'get-urls' -const {JSDOM} = _jsdom - export function cleanJob(job) { + const title = parseHtml(job.title) + const office = parseHtml(job.office) + const hoursPerWeek = parseHtml(job.hoursPerWeek) + const timeOfHours = parseHtml(job.timeOfHours) + // these all need to retain their newlines - const description = cleanTextBlock( - JSDOM.fragment(job.description).textContent, - ) - const comments = cleanTextBlock(JSDOM.fragment(job.comments).textContent) - const skills = cleanTextBlock(JSDOM.fragment(job.skills).textContent) - const howToApply = cleanTextBlock(JSDOM.fragment(job.howToApply).textContent) - const timeline = cleanTextBlock(JSDOM.fragment(job.timeline).textContent) - const timeOfHours = cleanTextBlock( - JSDOM.fragment(job.timeOfHours).textContent, - ) + const description = cleanTextBlock(job.description) + const comments = cleanTextBlock(job.comments) + const skills = cleanTextBlock(job.skills) + const howToApply = cleanTextBlock(job.howToApply) + const timeline = cleanTextBlock(job.timeline) const contactEmail = fixupEmailFormat(job.contactEmail) const contactPhone = fixupPhoneFormat(job.contactPhone) @@ -34,12 +31,12 @@ export function cleanJob(job) { contactPhone: contactPhone, description: description, goodForIncomingStudents: job.goodForIncomingStudents, - hoursPerWeek: job.hoursPerWeek, + hoursPerWeek: hoursPerWeek, howToApply: howToApply, id: job.id, lastModified: job.lastModified, links: links, - office: job.office, + office: office, openPositions: job.openPositions, skills: skills, timeline: timeline, @@ -51,7 +48,7 @@ export function cleanJob(job) { } function cleanTextBlock(text) { - return text.replace(/\s+/g, ' ') + return parseHtml(text).replace(/\t/g, ' ').trim() } export function getLinksFromJob({description, comments, skills, howToApply}) { diff --git a/modules/node_modules/@frogpond/ccci-stolaf-college/v1/streams/index.js b/modules/node_modules/@frogpond/ccci-stolaf-college/v1/streams/index.js index cdec4f77..07db4dcc 100644 --- a/modules/node_modules/@frogpond/ccci-stolaf-college/v1/streams/index.js +++ b/modules/node_modules/@frogpond/ccci-stolaf-college/v1/streams/index.js @@ -1,4 +1,4 @@ -import {get, ONE_HOUR} from '@frogpond/ccc-lib' +import {get, ONE_HOUR, parseHtml} from '@frogpond/ccc-lib' import mem from 'mem' import moment from 'moment-timezone' @@ -19,9 +19,15 @@ export async function getStreams({streamClass, sort, dateFrom, dateTo}) { (resp) => resp.body, ) const processed = data.results.map((stream) => { - let {starttime} = stream + let {starttime, title, subtitle, performer} = stream + + let streamTitle = parseHtml(title) + let detail = parseHtml(subtitle || performer || '') + return { ...stream, + title: streamTitle, + subtitle: detail, starttime: moment .tz(starttime, 'YYYY-MM-DD HH:mm', 'America/Chicago') .toISOString(), diff --git a/package.json b/package.json index 0a0924d8..34b8769a 100644 --- a/package.json +++ b/package.json @@ -22,11 +22,11 @@ "test": "./scripts/smoke-test.sh" }, "dependencies": { + "@frogpond/titlecase": "^1.0.0", "dotenv": "10.0.0", "esm": "3.2.25", "get-urls": "10.0.1", "got": "9.6.0", - "html-entities": "2.3.2", "is-absolute-url": "3.0.3", "jsdom": "16.6.0", "koa": "2.13.4", diff --git a/yarn.lock b/yarn.lock index dc153e20..aa7f3d5f 100644 --- a/yarn.lock +++ b/yarn.lock @@ -109,6 +109,11 @@ minimatch "^3.0.4" strip-json-comments "^3.1.1" +"@frogpond/titlecase@^1.0.0": + version "1.0.0" + resolved "https://registry.yarnpkg.com/@frogpond/titlecase/-/titlecase-1.0.0.tgz#993e5371c31b58a839e76fdc531053c55b1a2736" + integrity sha512-C1qKm/J+B+cXo+7+ZHbRnt2iApx/IrxMEXwOxe+ZkeTvSC1nZ2XRWf0xzFuAanpBNDhFcgrxdnR8FMKNHo1scQ== + "@gar/promisify@^1.0.1": version "1.1.2" resolved "https://registry.yarnpkg.com/@gar/promisify/-/promisify-1.1.2.tgz#30aa825f11d438671d585bd44e7fd564535fc210" @@ -1130,11 +1135,6 @@ html-encoding-sniffer@^2.0.1: dependencies: whatwg-encoding "^1.0.5" -html-entities@2.3.2: - version "2.3.2" - resolved "https://registry.yarnpkg.com/html-entities/-/html-entities-2.3.2.tgz#760b404685cb1d794e4f4b744332e3b00dcfe488" - integrity sha512-c3Ab/url5ksaT0WyleslpBEthOzWhrjQbg75y7XUsfSzi3Dgzt0l8w5e7DylRn15MTlMMD58dTfzddNS2kcAjQ== - http-assert@^1.3.0: version "1.4.1" resolved "https://registry.yarnpkg.com/http-assert/-/http-assert-1.4.1.tgz#c5f725d677aa7e873ef736199b89686cceb37878"