Skip to content
This repository has been archived by the owner on Aug 9, 2019. It is now read-only.

Commit

Permalink
Using a separate module for the crawlers
Browse files Browse the repository at this point in the history
  • Loading branch information
Felipe Martín committed Mar 15, 2013
1 parent 3051009 commit c7998d7
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 17 deletions.
25 changes: 8 additions & 17 deletions datCrawl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from datCrawl.exceptions import *
from datCrawl.crawlers import *
import re


class datCrawl(object):
"Main class."

def __init__(self):
self.crawlers = {}
self.urls = []
Expand All @@ -20,14 +23,15 @@ def register_crawler(self, crawler):
else:
raise CrawlerIsNotInstanceOfBaseCrawler('Crawler %s is not correctly created. (must be instance of base Crawler class)' % class_name)

def autoregister():
"Register all crawelers automagically."
pass

def register_url(self, url, action, crawler):
"Registers a certain URL to work with a crawler"
self.urls.append((url, action, crawler))

def autoregister_crawlers():
"Register all crawelers automagically."
# TODO
pass

def run(self, url):
if self.crawlers:
for registered_url in self.urls:
Expand All @@ -40,16 +44,3 @@ def run(self, url):
raise CrawlerForThisURLNotFound("No crawler registered a URL pattern for: %s" % url)
else:
raise NoCrawlerRegistered("You must register a Crawler in order to do something.")


class Crawler(object):
"Base crawler class."
urls = [] # List of tuples with regular expression of URLs that the crawler handle

def do(self, action, url):
try:
method = getattr(self, 'action_%s' % action)
result = method(url)
return result
except AttributeError:
raise CrawlerActionDoesNotExist('%s: action (%s) does not exist' % (self.__class__.__name__, action))
11 changes: 11 additions & 0 deletions datCrawl/crawlers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
class Crawler(object):
"Base crawler class."
urls = [] # List of tuples with regular expression of URLs that the crawler handle

def do(self, action, url):
try:
method = getattr(self, 'action_%s' % action)
result = method(url)
return result
except AttributeError:

This comment has been minimized.

Copy link
@fmartingr

fmartingr Mar 18, 2013

Owner

This except is hidding proper code exceptions for showing. NeedFix

raise CrawlerActionDoesNotExist('%s: action (%s) does not exist' % (self.__class__.__name__, action))

0 comments on commit c7998d7

Please sign in to comment.