Skip to content

Commit

Permalink
feat: Added catalog metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
nickevansuk committed Mar 20, 2024
1 parent 1b82515 commit 489a4d2
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 14 deletions.
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ If the URL supplied is a data catalog, it gets the `dataset` array and flattens

#### Returns
A `Promise` that resolves with an object containing:
- `catalogMetadata`: A JSON-LD object of the root data catalog provided.
- `urls` - An array of strings, each being a URL for a dataset.
- `errors` - An array of error objects, each containing details about errors encountered during the retrieval process. If no errors were encountered, this array is empty. Each error object includes:
- `url`: The URL from which data was being fetched when the error occurred.
Expand Down Expand Up @@ -86,7 +87,8 @@ The `errors` array it returns will detail any issues that occurred during the pr

#### Returns:
A `Promise` that resolves with an object containing:
- `jsonld`: An array of extracted JSON-LD objects from the datasets.
- `catalogMetadata`: A JSON-LD object of the root data catalog provided.
- `datasets`: An array of extracted JSON-LD objects from the Dataset Sites.
- `errors`: An array of error objects indicating any issues encountered during fetching. Each error object includes:
- `url`: The URL from which data was being fetched when the error occurred.
- `status`: HTTP status code of the error response (if available).
Expand All @@ -96,8 +98,8 @@ A `Promise` that resolves with an object containing:
```js
const { getAllDatasets } = require('@openactive/dataset-utils');

getAllDatasets().then(({ jsonld, errors }) => {
console.log(jsonld);
getAllDatasets().then(({ datasets, errors }) => {
console.log(datasets);

// Iterating through the errors
errors.forEach(error => {
Expand Down
53 changes: 43 additions & 10 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ const { Parser } = require('htmlparser2');
* If the URL is not supplied, the OA Data Catalog (https://openactive.io/data-catalogs/data-catalog-collection.jsonld) is used.
*
* @param {string} [dataCatalogUrl]
* @returns {Promise<{urls: string[], errors: object[]}>}
* @returns {Promise<{catalogMetadata: Record<string,any>[], urls: string[], errors: object[]}>}
*/
async function getAllDatasetSiteUrls(dataCatalogUrl = 'https://openactive.io/data-catalogs/data-catalog-collection.jsonld') {
let catalog;
Expand All @@ -35,16 +35,28 @@ async function getAllDatasetSiteUrls(dataCatalogUrl = 'https://openactive.io/dat
const allUrls = [].concat(...datasetArraysAndErrors.map(data => data.urls));
const allErrors = [].concat(...datasetArraysAndErrors.map(data => data.errors));

return { urls: allUrls, errors: allErrors };
return {
catalogMetadata: catalog,
urls: allUrls,
errors: allErrors,
};
}

// If the catalog has `dataset`, it does not have any further part catalogs and the datasets can be got from them
if (catalog.dataset) {
return { urls: catalog.dataset, errors: [] };
return {
catalogMetadata: catalog,
urls: catalog.dataset,
errors: [],
};
}

// If the catalog has neither `hasPart` or `dataset`, return [] as it does not have the information we want
return { urls: [], errors };
return {
catalogMetadata: catalog,
urls: [],
errors,
};
}

/**
Expand Down Expand Up @@ -88,12 +100,12 @@ function extractJSONLDfromHTML(url, html) {
* If dataCatalogUrl is not supplied, the default OA Data Catalog (https://openactive.io/data-catalogs/data-catalog-collection.jsonld) is used.
*
* @param {string} [dataCatalogUrl]
* @returns {Promise<{jsonld: Record<string,any>[], errors: string[]}>}
* @returns {Promise<{catalogMetadata: Record<string,any>[],datasets: Record<string,any>[],errors: string[]}>}
*
*/
async function getAllDatasets(dataCatalogUrl = 'https://openactive.io/data-catalogs/data-catalog-collection.jsonld') {
// Get Dataset URLs
const { urls: datasetUrls, errors } = await getAllDatasetSiteUrls(dataCatalogUrl);
const { catalogMetadata, urls: datasetUrls, errors } = await getAllDatasetSiteUrls(dataCatalogUrl);

const jsonldFromDatasetUrls = (await Promise.all(datasetUrls.map(async (datasetUrl) => {
let dataset;
Expand All @@ -109,13 +121,34 @@ async function getAllDatasets(dataCatalogUrl = 'https://openactive.io/data-catal
return null;
}

const jsonld = extractJSONLDfromHTML(datasetUrl, dataset);
return jsonld;
try {
const jsonld = extractJSONLDfromHTML(datasetUrl, dataset);
if (!jsonld || !jsonld['@id']) {
errors.push({
url: datasetUrl,
status: null,
message: 'Invalid JSON-LD found in dataset HTML - it did not contain `@id`.',
});
return null;
}
return jsonld;
} catch (error) {
errors.push({
url: datasetUrl,
status: null,
message: error.message,
});
return null;
}
})))
// Filter out datasets that do not have valid dataset
.filter(x => !!x);

return { jsonld: jsonldFromDatasetUrls, errors };
return {
catalogMetadata,
datasets: jsonldFromDatasetUrls,
errors,
};
}

/**
Expand Down Expand Up @@ -188,7 +221,7 @@ async function axiosGetWithRetryForKnownLegendIssue(url) {

for (let attempt = 0; attempt < maxRetries; attempt += 1) {
try {
response = await axios.get(url);
response = await axios.get(url, { timeout: 60000 });
break; // Exit the loop if the request was successful
} catch (error) {
if (error.response && error.response.status === 403 && attempt < maxRetries - 1) {
Expand Down
4 changes: 3 additions & 1 deletion test/getAllDatasets-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@ describe('getAllDatasets()', function () {
});

// Test
const { jsonld: datasets } = await getAllDatasets('https://openactive.io/data-catalogs/example-data-catalog-collection.jsonld');
const { datasets, errors } = await getAllDatasets('https://openactive.io/data-catalogs/example-data-catalog-collection.jsonld');

// Assertions
expect(errors).to.be.an('array');
expect(errors.length).to.equal(0);
expect(datasets).to.be.an('array');
expect(datasets.length).to.be.above(0);
expect(datasets[0]).to.be.an('object');
Expand Down

0 comments on commit 489a4d2

Please sign in to comment.