From 7281e857fb40c4f43218ef19d3bf96661b881c34 Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Thu, 30 Jan 2020 16:21:35 +0100 Subject: [PATCH] Remove Injector topology from crawler run script (integrated into main topology) --- bin/run-crawler.sh | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/bin/run-crawler.sh b/bin/run-crawler.sh index 53a85d5..2fb09c9 100755 --- a/bin/run-crawler.sh +++ b/bin/run-crawler.sh @@ -26,17 +26,9 @@ sleep 10 STORMCRAWLER="storm jar $PWD/lib/crawler.jar" -# inject seeds into Elasticsearch -$STORMCRAWLER com.digitalpebble.stormcrawler.elasticsearch.ESSeedInjector \ - $PWD/seeds '*' -conf $PWD/conf/es-conf.yaml -conf $PWD/conf/crawler-conf.yaml -# alternatively running the flux -#$STORMCRAWLER org.apache.storm.flux.Flux --remote $PWD/conf/es-injector.flux -# wait until seeds are in the status index -sleep 20 - # run the crawler $STORMCRAWLER org.commoncrawl.stormcrawler.news.CrawlTopology \ - -conf $PWD/conf/es-conf.yaml -conf $PWD/conf/crawler-conf.yaml + $PWD/seeds '*' -conf $PWD/conf/es-conf.yaml -conf $PWD/conf/crawler-conf.yaml # alternatively running the flux #$STORMCRAWLER org.apache.storm.flux.Flux --remote $PWD/conf/crawler.flux # suppress warnings about malformed XML in sitemaps