Skip to content

Commit

Permalink
Disable the WACZ exporter extension when the downloader middleware or…
Browse files Browse the repository at this point in the history
… spider middleware is enabled (#16)
  • Loading branch information
Wesley van Lee committed Oct 31, 2024
1 parent 9ea5f71 commit e044c9b
Showing 1 changed file with 6 additions and 0 deletions.
6 changes: 6 additions & 0 deletions scrapy_webarchive/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@ def __init__(self, settings: Settings, crawler: Crawler) -> None:

if not self.settings["SW_EXPORT_URI"]:
raise NotConfigured

if "scrapy_webarchive.spidermiddlewares.WaczCrawlMiddleware" in settings.getlist('SPIDER_MIDDLEWARES'):
raise NotConfigured("You must disable the WaczCrawlMiddleware before you can use this extension.")

if "scrapy_webarchive.downloadermiddlewares.WaczMiddleware" in settings.getlist('DOWNLOADER_MIDDLEWARES'):
raise NotConfigured("You must disable the WaczMiddleware before you can use this extension.")

self.store: FilesStoreProtocol = self._get_store(spider_name=crawler.spider.name)
self.writer = WarcFileWriter(collection_name=crawler.spider.name)
Expand Down

0 comments on commit e044c9b

Please sign in to comment.