Skip to content

Commit

Permalink
feat: Add validateJsonLdId (#5)
Browse files Browse the repository at this point in the history
  • Loading branch information
nickevansuk authored Mar 19, 2024
1 parent f34fd3f commit 096a343
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 2 deletions.
29 changes: 29 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,35 @@ getAllDatasets().then(({ jsonld, errors }) => {
});
```

### `validateJsonLdId(id, expectHtml)`

#### Description
This function validates the `@id` (or `id`, for backwards compatibility) property within a JSON-LD `Dataset` or `DataCatalog`. It fetches JSON-LD data from a specified URL, checks whether the data is embedded in HTML or raw JSON-LD, extracts the JSON-LD, and ensures that the `@id` field within the document matches the provided `id`. This function acts as a safety check, affirming that the expected identifier aligns exactly with the identifier found within the fetched JSON-LD document. Note that `@id` is case sensitive and must match exactly.

#### Parameters
- `id` (string): A string that specifies the expected `@id` or `id` value in the JSON-LD document.
- `expectHtml` (boolean): A boolean flag indicating whether the fetched data is expected to be embedded within HTML such as for a Dataset Site (when `true`), or expected to be raw JSON-LD such as for a Data Catalogue (when `false`).

#### Returns
A `Promise` that resolves with an object containing:
- `isValid` - A boolean that is `true` if the validation is successful (the expected `@id` matches the found `@id`) and `false` otherwise.
- `error` - A string describing the error encountered during the validation process or `null` if the validation is successful.

#### Usage
```javascript
async function exampleUsage() {
const id = "https://example.com/data.jsonld";
const { isValid, error } = await validateJsonLdId(id, false);

if (isValid) {
console.log(`Validation successful for ID: ${id}`);
} else {
console.error(`Validation failed for ID: ${id}. Error: ${error}`);
}
}
```


## Testing

Execute test cases using:
Expand Down
90 changes: 89 additions & 1 deletion index.js
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ async function getAllDatasets(dataCatalogUrl = 'https://openactive.io/data-catal
let dataset;
try {
// Get JSONLD from dataset URLs
dataset = (await axios.get(datasetUrl)).data;
dataset = (await axiosGetWithRetryForKnownLegendIssue(datasetUrl)).data;
} catch (error) {
errors.push({
url: datasetUrl,
Expand All @@ -118,8 +118,96 @@ async function getAllDatasets(dataCatalogUrl = 'https://openactive.io/data-catal
return { jsonld: jsonldFromDatasetUrls, errors };
}

/**
* Validates JSON-LD content by ensuring the '@id' or 'id' field matches the provided ID.
*
* This function performs an HTTP GET request to the specified ID (URL), retrieves
* the response, and extracts JSON-LD from it if needed and possible. It then compares
* the '@id' or 'id' field from the retrieved JSON-LD to the provided ID.
*
* Note that this is only applicable to JSON-LD "@id" for the DataCatalog and Dataset types, which must resolve.
*
* @async
* @param {string} id - The expected '@id' or 'id' value, also the URL to be requested.
* @param {boolean} expectHtml - A flag indicating whether the response is expected to be HTML (i.e. a Dataset Site).
* @returns {Promise<{isValid: boolean, error: string|null}>} - An object indicating the validity
* of the JSON-LD and any associated error message.
*
* @example
* validateJsonLdId('https://example.com/data.jsonld', false)
* .then(({isValid, error}) => {
* if (isValid) {
* console.log('JSON-LD is valid!');
* } else {
* console.error(`JSON-LD validation failed: ${error}`);
* }
* });
*/
async function validateJsonLdId(id, expectHtml) {
let response;

try {
response = await axiosGetWithRetryForKnownLegendIssue(id);
response = response.data;
} catch (error) {
return { isValid: false, error: `Failed to resolve URL: ${error.message}` };
}

let jsonLd;
try {
if (expectHtml && typeof response === 'string') {
jsonLd = extractJSONLDfromHTML(id, response);
} else if (!expectHtml && typeof response === 'object') {
jsonLd = response;
} else {
return { isValid: false, error: `Unexpected response type: ${typeof response}` };
}

const jsonId = jsonLd['@id'] || jsonLd.id;
if (jsonId !== id) {
return { isValid: false, error: `Mismatched '@id': From file: "${id}"; From referenced JSON-LD: "${jsonId}"` };
}
} catch (error) {
return { isValid: false, error: error.message };
}

return { isValid: true, error: null };
}

/*
* System-specific workaround: Note that rate limits in Legend can cause this request to fail with a 403 (?), so we retry up to 5 times
* TODO: Ask Legend to return a 429 instead
*/
async function axiosGetWithRetryForKnownLegendIssue(url) {
let response;
const maxRetries = 5; // Define a maximum number of retries

async function sleep(milliseconds) {
return new Promise((resolve) => { setTimeout(resolve, milliseconds); });
}

for (let attempt = 0; attempt < maxRetries; attempt += 1) {
try {
response = await axios.get(url);
break; // Exit the loop if the request was successful
} catch (error) {
if (error.response && error.response.status === 403 && attempt < maxRetries - 1) {
// Log a warning and retry after sleeping for a random duration between 1 and 3 seconds
// A random duration is used to avoid clients retrying at the same time and causing a thundering herd,
// particularly when a single service is serving multiple datasets.
console.warn(`Attempt ${attempt + 1}: Access forbidden (403) for URL: ${url}. Retrying...`);
await sleep(1000 + Math.random() * 2000); // Sleep for 1 to 3 seconds
} else {
throw error;
}
}
}
return response;
}

module.exports = {
getAllDatasetSiteUrls,
extractJSONLDfromHTML,
getAllDatasets,
validateJsonLdId,
};
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@openactive/dataset-utils",
"version": "1.0.1",
"version": "1.1.0",
"description": "Utilities for working with OpenActive data catalogs and dataset sites",
"homepage": "https://www.openactive.io",
"main": "index.js",
Expand Down

0 comments on commit 096a343

Please sign in to comment.