diff --git a/datCrawl/__init__.py b/datCrawl/__init__.py index d6d0fe3..63e9499 100644 --- a/datCrawl/__init__.py +++ b/datCrawl/__init__.py @@ -1,8 +1,11 @@ from datCrawl.exceptions import * +from datCrawl.crawlers import * import re class datCrawl(object): + "Main class." + def __init__(self): self.crawlers = {} self.urls = [] @@ -20,14 +23,15 @@ def register_crawler(self, crawler): else: raise CrawlerIsNotInstanceOfBaseCrawler('Crawler %s is not correctly created. (must be instance of base Crawler class)' % class_name) - def autoregister(): - "Register all crawelers automagically." - pass - def register_url(self, url, action, crawler): "Registers a certain URL to work with a crawler" self.urls.append((url, action, crawler)) + def autoregister_crawlers(): + "Register all crawelers automagically." + # TODO + pass + def run(self, url): if self.crawlers: for registered_url in self.urls: @@ -40,16 +44,3 @@ def run(self, url): raise CrawlerForThisURLNotFound("No crawler registered a URL pattern for: %s" % url) else: raise NoCrawlerRegistered("You must register a Crawler in order to do something.") - - -class Crawler(object): - "Base crawler class." - urls = [] # List of tuples with regular expression of URLs that the crawler handle - - def do(self, action, url): - try: - method = getattr(self, 'action_%s' % action) - result = method(url) - return result - except AttributeError: - raise CrawlerActionDoesNotExist('%s: action (%s) does not exist' % (self.__class__.__name__, action)) diff --git a/datCrawl/crawlers.py b/datCrawl/crawlers.py new file mode 100644 index 0000000..e16035a --- /dev/null +++ b/datCrawl/crawlers.py @@ -0,0 +1,11 @@ +class Crawler(object): + "Base crawler class." + urls = [] # List of tuples with regular expression of URLs that the crawler handle + + def do(self, action, url): + try: + method = getattr(self, 'action_%s' % action) + result = method(url) + return result + except AttributeError: + raise CrawlerActionDoesNotExist('%s: action (%s) does not exist' % (self.__class__.__name__, action))