diff --git a/test/requirements.py b/test/requirements.py new file mode 100644 index 0000000..7bad819 --- /dev/null +++ b/test/requirements.py @@ -0,0 +1,22 @@ +from datCrawl.crawlers import Crawler + + +class AwesomeGoogleCrawler(Crawler): + urls = [ + ('es', 'http\:\/\/(www\.)?google\.es', ), + ('de', 'http\:\/\/(www\.)?google\.de', ) + ] + + +class AwesomeEmptyCrawler(Crawler): + pass + + +class AwesomeWikipediaTitleCrawler(Crawler): + urls = [ + ('title', 'http\:\/\/en.wikipedia.org\/wiki\/(.*)', ) + ] + + def action_title(self, url): + # LOOK, IM CRAWLING THE INTERNETS! + return {'title': 'Python'} diff --git a/test/test_base.py b/test/test_base.py index 4e6bbcc..b8a447b 100644 --- a/test/test_base.py +++ b/test/test_base.py @@ -1,29 +1,9 @@ import unittest from datCrawl import * +from requirements import * -class AwesomeGoogleCrawler(Crawler): - urls = [ - ('es', 'http\:\/\/(www\.)?google\.es', ), - ('de', 'http\:\/\/(www\.)?google\.de', ) - ] - - -class AwesomeEmptyCrawler(Crawler): - pass - - -class AwesomeWikipediaTitleCrawler(Crawler): - urls = [ - ('title', 'http\:\/\/en.wikipedia.org\/wiki\/(.*)', ) - ] - - def action_title(self, url): - # LOOK, IM CRAWLING THE INTERNETS! - return {'title': 'Python'} - - -class datCrawlTests(unittest.TestCase): +class datCrawlBaseTests(unittest.TestCase): def test_instance_check(self): core = datCrawl() @@ -35,45 +15,6 @@ def test_register_urls(self): core.register_url(data[0], data[1], data[2]) self.assertEquals(core.urls[0], data) - def test_cant_register_crawler_twice(self): - core = datCrawl() - core.register_crawler(AwesomeGoogleCrawler) - self.assertRaises(CrawlerAlreadyRegistered, lambda: core.register_crawler(AwesomeGoogleCrawler)) - - def test_register_crawler_with_urls(self): - core = datCrawl() - core.register_crawler(AwesomeGoogleCrawler) - self.assertEqual(core.crawlers['AwesomeGoogleCrawler'], AwesomeGoogleCrawler) - # Some other checks if the tuple are well parsed, in order: action, url, crawler name - self.assertEqual(core.urls[0][0], AwesomeGoogleCrawler().urls[0][1]) - self.assertEqual(core.urls[0][1], AwesomeGoogleCrawler().urls[0][0]) - self.assertEqual(core.urls[0][2], AwesomeGoogleCrawler().__class__.__name__) - self.assertEqual(core.urls[1][0], AwesomeGoogleCrawler().urls[1][1]) - self.assertEqual(core.urls[1][1], AwesomeGoogleCrawler().urls[1][0]) - self.assertEqual(core.urls[1][2], AwesomeGoogleCrawler().__class__.__name__) - - def test_no_crawler_registered_for_url(self): - core = datCrawl() - core.register_crawler(AwesomeGoogleCrawler) - self.assertEqual(core.crawlers['AwesomeGoogleCrawler'], AwesomeGoogleCrawler) - self.assertRaises(CrawlerForThisURLNotFound, lambda: core.run('http://www.github.com')) - - def test_register_crawler_without_urls(self): - core = datCrawl() - self.assertRaises(CrawlerDontHaveUrlsToWatch, lambda: core.register_crawler(AwesomeEmptyCrawler)) - - def test_register_incorrect_crawler(self): - core = datCrawl() - self.assertRaises(CrawlerIsNotInstanceOfBase, lambda: core.register_crawler(object)) - - def test_running_without_registered_crawlers(self): - core = datCrawl() - self.assertRaises(NoCrawlerRegistered, lambda: core.run('www.google.es')) - - def test_running_without_url_parameters(self): - core = datCrawl() - self.assertRaises(TypeError, lambda: core.run()) - def test_running_full_crawler(self): core = datCrawl() core.register_crawler(AwesomeWikipediaTitleCrawler) diff --git a/test/test_crawlers.py b/test/test_crawlers.py new file mode 100644 index 0000000..b5256d6 --- /dev/null +++ b/test/test_crawlers.py @@ -0,0 +1,47 @@ +import unittest +from datCrawl import * +from requirements import * + + +class datCrawlCrawlerTests(unittest.TestCase): + def test_register_crawler_with_urls(self): + core = datCrawl() + core.register_crawler(AwesomeGoogleCrawler) + self.assertEqual(core.crawlers['AwesomeGoogleCrawler'], AwesomeGoogleCrawler) + # Some other checks if the tuple are well parsed, in order: action, url, crawler name + self.assertEqual(core.urls[0][0], AwesomeGoogleCrawler().urls[0][1]) + self.assertEqual(core.urls[0][1], AwesomeGoogleCrawler().urls[0][0]) + self.assertEqual(core.urls[0][2], AwesomeGoogleCrawler().__class__.__name__) + self.assertEqual(core.urls[1][0], AwesomeGoogleCrawler().urls[1][1]) + self.assertEqual(core.urls[1][1], AwesomeGoogleCrawler().urls[1][0]) + self.assertEqual(core.urls[1][2], AwesomeGoogleCrawler().__class__.__name__) + + def test_register_incorrect_crawler(self): + core = datCrawl() + self.assertRaises(CrawlerIsNotInstanceOfBase, lambda: core.register_crawler(object)) + + def test_cant_register_crawler_twice(self): + core = datCrawl() + core.register_crawler(AwesomeGoogleCrawler) + self.assertRaises(CrawlerAlreadyRegistered, lambda: core.register_crawler(AwesomeGoogleCrawler)) + + def test_no_crawler_registered_for_url(self): + core = datCrawl() + core.register_crawler(AwesomeGoogleCrawler) + self.assertEqual(core.crawlers['AwesomeGoogleCrawler'], AwesomeGoogleCrawler) + self.assertRaises(CrawlerForThisURLNotFound, lambda: core.run('http://www.github.com')) + + def test_register_crawler_without_urls(self): + core = datCrawl() + self.assertRaises(CrawlerDontHaveUrlsToWatch, lambda: core.register_crawler(AwesomeEmptyCrawler)) + + def test_running_without_registered_crawlers(self): + core = datCrawl() + self.assertRaises(NoCrawlerRegistered, lambda: core.run('www.google.es')) + + def test_running_without_url_parameters(self): + core = datCrawl() + self.assertRaises(TypeError, lambda: core.run()) + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_downloaders.py b/test/test_downloaders.py new file mode 100644 index 0000000..2fe9b44 --- /dev/null +++ b/test/test_downloaders.py @@ -0,0 +1,23 @@ +import unittest +from datCrawl import datCrawl, downloaders +from datCrawl.exceptions import * +from requirements import * + + +class datCrawlDownloaderTests(unittest.TestCase): + def test_register_downloader(self): + core = datCrawl() + core.register_downloader(downloaders.DefaultDownloader) + self.assertEqual(core.downloaders['DefaultDownloader'], downloaders.DefaultDownloader) + + def test_register_incorrect_downloader(self): + core = datCrawl() + self.assertRaises(DownloaderIsNotInstanceOfBase, lambda: core.register_downloader(object)) + + def test_cant_register_downloader_twice(self): + core = datCrawl() + core.register_downloader(downloaders.DefaultDownloader) + self.assertRaises(DownloaderAlreadyRegistered, lambda: core.register_downloader(downloaders.DefaultDownloader)) + +if __name__ == '__main__': + unittest.main()