Skip to content
This repository has been archived by the owner on Aug 9, 2019. It is now read-only.

Commit

Permalink
Added Exception if trying to register the same Crawler twice.
Browse files Browse the repository at this point in the history
  • Loading branch information
Felipe Martín committed Mar 15, 2013
1 parent 01b0053 commit 67c0feb
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 8 deletions.
19 changes: 11 additions & 8 deletions datCrawl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from datCrawl.exceptions import CrawlerDontHaveUrlsToWatch, \
CrawlerIsNotInstanceOfBaseCrawler, CrawlerForThisURLNotFound, \
NoCrawlerRegistered
NoCrawlerRegistered, CrawlerAlreadyRegistered
from datCrawl.crawlers import Crawler
import re

Expand All @@ -15,15 +15,18 @@ def __init__(self):
def register_crawler(self, crawler):
"Registers a crawler on the core to use in certain urls."
class_name = crawler().__class__.__name__
if isinstance(crawler(), Crawler):
urls = crawler().urls
if len(urls) > 0:
[self.register_url(url, action, class_name) for action, url in urls]
self.crawlers[class_name] = crawler
if class_name not in self.crawlers:
if isinstance(crawler(), Crawler):
urls = crawler().urls
if len(urls) > 0:
[self.register_url(url, action, class_name) for action, url in urls]
self.crawlers[class_name] = crawler
else:
raise CrawlerDontHaveUrlsToWatch('Crawler %s dont have URLs to watch for.' % class_name)
else:
raise CrawlerDontHaveUrlsToWatch('Crawler %s dont have URLs to watch for.' % class_name)
raise CrawlerIsNotInstanceOfBaseCrawler('Crawler %s is not correctly created. (must be instance of base Crawler class)' % class_name)
else:
raise CrawlerIsNotInstanceOfBaseCrawler('Crawler %s is not correctly created. (must be instance of base Crawler class)' % class_name)
raise CrawlerAlreadyRegistered("Crawler %s is already registered." % class_name)

def register_url(self, url, action, crawler):
"Registers a certain URL to work with a crawler"
Expand Down
5 changes: 5 additions & 0 deletions datCrawl/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,8 @@ class CrawlerActionDoesNotExist(Exception):
class CrawlerForThisURLNotFound(Exception):
"When there's no crawler found for a specific URL"
pass


class CrawlerAlreadyRegistered(Exception):
"When you try to register the same crawler."
pass
5 changes: 5 additions & 0 deletions test/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ def test_register_urls(self):
core.register_url(data[0], data[1], data[2])
self.assertEquals(core.urls[0], data)

def test_cant_register_crawler_twice(self):
core = datCrawl()
core.register_crawler(AwesomeGoogleCrawler)
self.assertRaises(CrawlerAlreadyRegistered, lambda: core.register_crawler(AwesomeGoogleCrawler))

def test_register_crawler_with_urls(self):
core = datCrawl()
core.register_crawler(AwesomeGoogleCrawler)
Expand Down

0 comments on commit 67c0feb

Please sign in to comment.