Skip to content

Commit

Permalink
Merge pull request #110 from LD4P/t102-throttle2
Browse files Browse the repository at this point in the history
Throttles requests to Trellis.
  • Loading branch information
jcoyne authored Nov 14, 2019
2 parents 301f6af + c66b7a5 commit 9a033e0
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 11 deletions.
3 changes: 2 additions & 1 deletion config/default.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,6 @@ module.exports = {
indexUrl: process.env.INDEX_URL || 'http://localhost:9200',
nonRdfTypeURI: process.env.NON_RDF_TYPE_URI || 'http://www.w3.org/ns/ldp#NonRDFSource',
nonRdfMimeType: process.env.NON_RDF_MIME_TYPE || 'application/json',
debug: process.env.DEBUG !== undefined ? process.env.DEBUG : true
debug: process.env.DEBUG !== undefined ? process.env.DEBUG : true,
poolLimit: process.env.POOL_LIMIT || 2
}
17 changes: 15 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"jsonpath-plus": "^1.1.0",
"stomp-client": "^0.9.0",
"superagent": "^5.1.0",
"tiny-async-pool": "^1.0.4",
"url-parse": "^1.4.4",
"wait-on": "^3.2.0"
},
Expand Down
13 changes: 5 additions & 8 deletions src/Crawler.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import config from 'config'
import Logger from './Logger'
import Request from './Request'
import asyncPool from 'tiny-async-pool'

const linkHeaderRegex = /<(?<link>.+)>; rel="type"/

Expand Down Expand Up @@ -41,17 +42,13 @@ export default class Crawler {
// index.
const response = await this.typeSpecificRequest(uri, types).response()

await onResource(response.body, uri, types)

const containedResourcesArray = this.containedResourcesArray(response.body)
this.logger.debug(`${uri} contains ${containedResourcesArray}`)

const containedResourcePromises = containedResourcesArray.map(async child => {
if (child)
// Recurse down into each child resource
await this.request(child, onResource)
})

// await promises for the callback on this resource as well as the requests for any child resources
await Promise.all([onResource(response.body, uri, types)].concat(containedResourcePromises))
await asyncPool(config.get('poolLimit'), containedResourcesArray, (child) => this.request(child, onResource))

} catch(error) {
this.logger.error(`during crawl, error making mime type-specific request to ${uri}: ${error}`)
}
Expand Down

0 comments on commit 9a033e0

Please sign in to comment.