Skip to content
This repository has been archived by the owner on Aug 9, 2019. It is now read-only.

Commit

Permalink
Test by module/file. More organization.
Browse files Browse the repository at this point in the history
  • Loading branch information
Felipe Martín committed Mar 15, 2013
1 parent 3fbed9f commit 388909c
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 61 deletions.
22 changes: 22 additions & 0 deletions test/requirements.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from datCrawl.crawlers import Crawler


class AwesomeGoogleCrawler(Crawler):
urls = [
('es', 'http\:\/\/(www\.)?google\.es', ),
('de', 'http\:\/\/(www\.)?google\.de', )
]


class AwesomeEmptyCrawler(Crawler):
pass


class AwesomeWikipediaTitleCrawler(Crawler):
urls = [
('title', 'http\:\/\/en.wikipedia.org\/wiki\/(.*)', )
]

def action_title(self, url):
# LOOK, IM CRAWLING THE INTERNETS!
return {'title': 'Python'}
63 changes: 2 additions & 61 deletions test/test_base.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,9 @@
import unittest
from datCrawl import *
from requirements import *


class AwesomeGoogleCrawler(Crawler):
urls = [
('es', 'http\:\/\/(www\.)?google\.es', ),
('de', 'http\:\/\/(www\.)?google\.de', )
]


class AwesomeEmptyCrawler(Crawler):
pass


class AwesomeWikipediaTitleCrawler(Crawler):
urls = [
('title', 'http\:\/\/en.wikipedia.org\/wiki\/(.*)', )
]

def action_title(self, url):
# LOOK, IM CRAWLING THE INTERNETS!
return {'title': 'Python'}


class datCrawlTests(unittest.TestCase):
class datCrawlBaseTests(unittest.TestCase):

def test_instance_check(self):
core = datCrawl()
Expand All @@ -35,45 +15,6 @@ def test_register_urls(self):
core.register_url(data[0], data[1], data[2])
self.assertEquals(core.urls[0], data)

def test_cant_register_crawler_twice(self):
core = datCrawl()
core.register_crawler(AwesomeGoogleCrawler)
self.assertRaises(CrawlerAlreadyRegistered, lambda: core.register_crawler(AwesomeGoogleCrawler))

def test_register_crawler_with_urls(self):
core = datCrawl()
core.register_crawler(AwesomeGoogleCrawler)
self.assertEqual(core.crawlers['AwesomeGoogleCrawler'], AwesomeGoogleCrawler)
# Some other checks if the tuple are well parsed, in order: action, url, crawler name
self.assertEqual(core.urls[0][0], AwesomeGoogleCrawler().urls[0][1])
self.assertEqual(core.urls[0][1], AwesomeGoogleCrawler().urls[0][0])
self.assertEqual(core.urls[0][2], AwesomeGoogleCrawler().__class__.__name__)
self.assertEqual(core.urls[1][0], AwesomeGoogleCrawler().urls[1][1])
self.assertEqual(core.urls[1][1], AwesomeGoogleCrawler().urls[1][0])
self.assertEqual(core.urls[1][2], AwesomeGoogleCrawler().__class__.__name__)

def test_no_crawler_registered_for_url(self):
core = datCrawl()
core.register_crawler(AwesomeGoogleCrawler)
self.assertEqual(core.crawlers['AwesomeGoogleCrawler'], AwesomeGoogleCrawler)
self.assertRaises(CrawlerForThisURLNotFound, lambda: core.run('http://www.github.com'))

def test_register_crawler_without_urls(self):
core = datCrawl()
self.assertRaises(CrawlerDontHaveUrlsToWatch, lambda: core.register_crawler(AwesomeEmptyCrawler))

def test_register_incorrect_crawler(self):
core = datCrawl()
self.assertRaises(CrawlerIsNotInstanceOfBase, lambda: core.register_crawler(object))

def test_running_without_registered_crawlers(self):
core = datCrawl()
self.assertRaises(NoCrawlerRegistered, lambda: core.run('www.google.es'))

def test_running_without_url_parameters(self):
core = datCrawl()
self.assertRaises(TypeError, lambda: core.run())

def test_running_full_crawler(self):
core = datCrawl()
core.register_crawler(AwesomeWikipediaTitleCrawler)
Expand Down
47 changes: 47 additions & 0 deletions test/test_crawlers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import unittest
from datCrawl import *
from requirements import *


class datCrawlCrawlerTests(unittest.TestCase):
def test_register_crawler_with_urls(self):
core = datCrawl()
core.register_crawler(AwesomeGoogleCrawler)
self.assertEqual(core.crawlers['AwesomeGoogleCrawler'], AwesomeGoogleCrawler)
# Some other checks if the tuple are well parsed, in order: action, url, crawler name
self.assertEqual(core.urls[0][0], AwesomeGoogleCrawler().urls[0][1])
self.assertEqual(core.urls[0][1], AwesomeGoogleCrawler().urls[0][0])
self.assertEqual(core.urls[0][2], AwesomeGoogleCrawler().__class__.__name__)
self.assertEqual(core.urls[1][0], AwesomeGoogleCrawler().urls[1][1])
self.assertEqual(core.urls[1][1], AwesomeGoogleCrawler().urls[1][0])
self.assertEqual(core.urls[1][2], AwesomeGoogleCrawler().__class__.__name__)

def test_register_incorrect_crawler(self):
core = datCrawl()
self.assertRaises(CrawlerIsNotInstanceOfBase, lambda: core.register_crawler(object))

def test_cant_register_crawler_twice(self):
core = datCrawl()
core.register_crawler(AwesomeGoogleCrawler)
self.assertRaises(CrawlerAlreadyRegistered, lambda: core.register_crawler(AwesomeGoogleCrawler))

def test_no_crawler_registered_for_url(self):
core = datCrawl()
core.register_crawler(AwesomeGoogleCrawler)
self.assertEqual(core.crawlers['AwesomeGoogleCrawler'], AwesomeGoogleCrawler)
self.assertRaises(CrawlerForThisURLNotFound, lambda: core.run('http://www.github.com'))

def test_register_crawler_without_urls(self):
core = datCrawl()
self.assertRaises(CrawlerDontHaveUrlsToWatch, lambda: core.register_crawler(AwesomeEmptyCrawler))

def test_running_without_registered_crawlers(self):
core = datCrawl()
self.assertRaises(NoCrawlerRegistered, lambda: core.run('www.google.es'))

def test_running_without_url_parameters(self):
core = datCrawl()
self.assertRaises(TypeError, lambda: core.run())

if __name__ == '__main__':
unittest.main()
23 changes: 23 additions & 0 deletions test/test_downloaders.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import unittest
from datCrawl import datCrawl, downloaders
from datCrawl.exceptions import *
from requirements import *


class datCrawlDownloaderTests(unittest.TestCase):
def test_register_downloader(self):
core = datCrawl()
core.register_downloader(downloaders.DefaultDownloader)
self.assertEqual(core.downloaders['DefaultDownloader'], downloaders.DefaultDownloader)

def test_register_incorrect_downloader(self):
core = datCrawl()
self.assertRaises(DownloaderIsNotInstanceOfBase, lambda: core.register_downloader(object))

def test_cant_register_downloader_twice(self):
core = datCrawl()
core.register_downloader(downloaders.DefaultDownloader)
self.assertRaises(DownloaderAlreadyRegistered, lambda: core.register_downloader(downloaders.DefaultDownloader))

if __name__ == '__main__':
unittest.main()

0 comments on commit 388909c

Please sign in to comment.