From 096a34343c6ad94e7b95c81590acc38a8559fcdb Mon Sep 17 00:00:00 2001 From: Nick Evans <2616208+nickevansuk@users.noreply.github.com> Date: Tue, 19 Mar 2024 10:15:26 +0000 Subject: [PATCH] feat: Add validateJsonLdId (#5) --- README.md | 29 +++++++++++++++++ index.js | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++- package.json | 2 +- 3 files changed, 119 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b93633b..af6d137 100644 --- a/README.md +++ b/README.md @@ -108,6 +108,35 @@ getAllDatasets().then(({ jsonld, errors }) => { }); ``` +### `validateJsonLdId(id, expectHtml)` + +#### Description +This function validates the `@id` (or `id`, for backwards compatibility) property within a JSON-LD `Dataset` or `DataCatalog`. It fetches JSON-LD data from a specified URL, checks whether the data is embedded in HTML or raw JSON-LD, extracts the JSON-LD, and ensures that the `@id` field within the document matches the provided `id`. This function acts as a safety check, affirming that the expected identifier aligns exactly with the identifier found within the fetched JSON-LD document. Note that `@id` is case sensitive and must match exactly. + +#### Parameters +- `id` (string): A string that specifies the expected `@id` or `id` value in the JSON-LD document. +- `expectHtml` (boolean): A boolean flag indicating whether the fetched data is expected to be embedded within HTML such as for a Dataset Site (when `true`), or expected to be raw JSON-LD such as for a Data Catalogue (when `false`). + +#### Returns +A `Promise` that resolves with an object containing: + - `isValid` - A boolean that is `true` if the validation is successful (the expected `@id` matches the found `@id`) and `false` otherwise. + - `error` - A string describing the error encountered during the validation process or `null` if the validation is successful. + +#### Usage +```javascript +async function exampleUsage() { + const id = "https://example.com/data.jsonld"; + const { isValid, error } = await validateJsonLdId(id, false); + + if (isValid) { + console.log(`Validation successful for ID: ${id}`); + } else { + console.error(`Validation failed for ID: ${id}. Error: ${error}`); + } +} +``` + + ## Testing Execute test cases using: diff --git a/index.js b/index.js index b5bde82..ce6f7c2 100644 --- a/index.js +++ b/index.js @@ -99,7 +99,7 @@ async function getAllDatasets(dataCatalogUrl = 'https://openactive.io/data-catal let dataset; try { // Get JSONLD from dataset URLs - dataset = (await axios.get(datasetUrl)).data; + dataset = (await axiosGetWithRetryForKnownLegendIssue(datasetUrl)).data; } catch (error) { errors.push({ url: datasetUrl, @@ -118,8 +118,96 @@ async function getAllDatasets(dataCatalogUrl = 'https://openactive.io/data-catal return { jsonld: jsonldFromDatasetUrls, errors }; } +/** + * Validates JSON-LD content by ensuring the '@id' or 'id' field matches the provided ID. + * + * This function performs an HTTP GET request to the specified ID (URL), retrieves + * the response, and extracts JSON-LD from it if needed and possible. It then compares + * the '@id' or 'id' field from the retrieved JSON-LD to the provided ID. + * + * Note that this is only applicable to JSON-LD "@id" for the DataCatalog and Dataset types, which must resolve. + * + * @async + * @param {string} id - The expected '@id' or 'id' value, also the URL to be requested. + * @param {boolean} expectHtml - A flag indicating whether the response is expected to be HTML (i.e. a Dataset Site). + * @returns {Promise<{isValid: boolean, error: string|null}>} - An object indicating the validity + * of the JSON-LD and any associated error message. + * + * @example + * validateJsonLdId('https://example.com/data.jsonld', false) + * .then(({isValid, error}) => { + * if (isValid) { + * console.log('JSON-LD is valid!'); + * } else { + * console.error(`JSON-LD validation failed: ${error}`); + * } + * }); + */ +async function validateJsonLdId(id, expectHtml) { + let response; + + try { + response = await axiosGetWithRetryForKnownLegendIssue(id); + response = response.data; + } catch (error) { + return { isValid: false, error: `Failed to resolve URL: ${error.message}` }; + } + + let jsonLd; + try { + if (expectHtml && typeof response === 'string') { + jsonLd = extractJSONLDfromHTML(id, response); + } else if (!expectHtml && typeof response === 'object') { + jsonLd = response; + } else { + return { isValid: false, error: `Unexpected response type: ${typeof response}` }; + } + + const jsonId = jsonLd['@id'] || jsonLd.id; + if (jsonId !== id) { + return { isValid: false, error: `Mismatched '@id': From file: "${id}"; From referenced JSON-LD: "${jsonId}"` }; + } + } catch (error) { + return { isValid: false, error: error.message }; + } + + return { isValid: true, error: null }; +} + +/* +* System-specific workaround: Note that rate limits in Legend can cause this request to fail with a 403 (?), so we retry up to 5 times +* TODO: Ask Legend to return a 429 instead +*/ +async function axiosGetWithRetryForKnownLegendIssue(url) { + let response; + const maxRetries = 5; // Define a maximum number of retries + + async function sleep(milliseconds) { + return new Promise((resolve) => { setTimeout(resolve, milliseconds); }); + } + + for (let attempt = 0; attempt < maxRetries; attempt += 1) { + try { + response = await axios.get(url); + break; // Exit the loop if the request was successful + } catch (error) { + if (error.response && error.response.status === 403 && attempt < maxRetries - 1) { + // Log a warning and retry after sleeping for a random duration between 1 and 3 seconds + // A random duration is used to avoid clients retrying at the same time and causing a thundering herd, + // particularly when a single service is serving multiple datasets. + console.warn(`Attempt ${attempt + 1}: Access forbidden (403) for URL: ${url}. Retrying...`); + await sleep(1000 + Math.random() * 2000); // Sleep for 1 to 3 seconds + } else { + throw error; + } + } + } + return response; +} + module.exports = { getAllDatasetSiteUrls, extractJSONLDfromHTML, getAllDatasets, + validateJsonLdId, }; diff --git a/package.json b/package.json index a346c58..84c1bac 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@openactive/dataset-utils", - "version": "1.0.1", + "version": "1.1.0", "description": "Utilities for working with OpenActive data catalogs and dataset sites", "homepage": "https://www.openactive.io", "main": "index.js",