diff --git a/datCrawl/__init__.py b/datCrawl/__init__.py index 38736bb..5084af7 100644 --- a/datCrawl/__init__.py +++ b/datCrawl/__init__.py @@ -1,6 +1,6 @@ from datCrawl.exceptions import CrawlerDontHaveUrlsToWatch, \ CrawlerIsNotInstanceOfBaseCrawler, CrawlerForThisURLNotFound, \ - NoCrawlerRegistered + NoCrawlerRegistered, CrawlerAlreadyRegistered from datCrawl.crawlers import Crawler import re @@ -15,15 +15,18 @@ def __init__(self): def register_crawler(self, crawler): "Registers a crawler on the core to use in certain urls." class_name = crawler().__class__.__name__ - if isinstance(crawler(), Crawler): - urls = crawler().urls - if len(urls) > 0: - [self.register_url(url, action, class_name) for action, url in urls] - self.crawlers[class_name] = crawler + if class_name not in self.crawlers: + if isinstance(crawler(), Crawler): + urls = crawler().urls + if len(urls) > 0: + [self.register_url(url, action, class_name) for action, url in urls] + self.crawlers[class_name] = crawler + else: + raise CrawlerDontHaveUrlsToWatch('Crawler %s dont have URLs to watch for.' % class_name) else: - raise CrawlerDontHaveUrlsToWatch('Crawler %s dont have URLs to watch for.' % class_name) + raise CrawlerIsNotInstanceOfBaseCrawler('Crawler %s is not correctly created. (must be instance of base Crawler class)' % class_name) else: - raise CrawlerIsNotInstanceOfBaseCrawler('Crawler %s is not correctly created. (must be instance of base Crawler class)' % class_name) + raise CrawlerAlreadyRegistered("Crawler %s is already registered." % class_name) def register_url(self, url, action, crawler): "Registers a certain URL to work with a crawler" diff --git a/datCrawl/exceptions.py b/datCrawl/exceptions.py index d6b7ee2..1821fcc 100644 --- a/datCrawl/exceptions.py +++ b/datCrawl/exceptions.py @@ -21,3 +21,8 @@ class CrawlerActionDoesNotExist(Exception): class CrawlerForThisURLNotFound(Exception): "When there's no crawler found for a specific URL" pass + + +class CrawlerAlreadyRegistered(Exception): + "When you try to register the same crawler." + pass diff --git a/test/test_base.py b/test/test_base.py index c607cdb..790ce62 100644 --- a/test/test_base.py +++ b/test/test_base.py @@ -35,6 +35,11 @@ def test_register_urls(self): core.register_url(data[0], data[1], data[2]) self.assertEquals(core.urls[0], data) + def test_cant_register_crawler_twice(self): + core = datCrawl() + core.register_crawler(AwesomeGoogleCrawler) + self.assertRaises(CrawlerAlreadyRegistered, lambda: core.register_crawler(AwesomeGoogleCrawler)) + def test_register_crawler_with_urls(self): core = datCrawl() core.register_crawler(AwesomeGoogleCrawler)