From c5b3bcc5daca5ed23a1f93ee83cdd3ac44e5d517 Mon Sep 17 00:00:00 2001 From: Ryan Noelk Date: Sat, 25 Mar 2017 21:05:55 -0400 Subject: [PATCH 01/10] Feature/readme cleanup (#1) * cleaning up readme * removing coverage --- .coveragerc | 17 ----------------- .coveralls.yml | 1 - .travis.yml | 5 +---- README.md | 1 - setup.py | 2 +- 5 files changed, 2 insertions(+), 24 deletions(-) delete mode 100644 .coveragerc delete mode 100644 .coveralls.yml diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 947b2fe1d..000000000 --- a/.coveragerc +++ /dev/null @@ -1,17 +0,0 @@ -[run] -branch = True -source = recipe_scrapers - -omit = recipe_scrapers/_abstract.py - recipe_scrapers/__init__.py - recipe_scrapers/tests/* - -[report] -exclude_lines = - pragma: no cover - - # Don't complain if tests don't hit defensive assertion code: - raise AttributeError - raise NotImplementedError - -ignore_errors = True diff --git a/.coveralls.yml b/.coveralls.yml deleted file mode 100644 index 91600595a..000000000 --- a/.coveralls.yml +++ /dev/null @@ -1 +0,0 @@ -service_name: travis-ci diff --git a/.travis.yml b/.travis.yml index a526725c0..2f055891d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ python: - "3.5" install: - - pip install beautifulsoup4==4.4.0 coverage==3.7.1 coveralls + - pip install beautifulsoup4==4.4.0 before_script: - curl -L -o test_data.zip https://www.dropbox.com/sh/wkxm933pae6q0e6/AAAXSinoeSn1-fz5Fz_LHhM6a?dl=1 @@ -15,6 +15,3 @@ before_script: script: - coverage run tests.py - -after_success: - - coveralls diff --git a/README.md b/README.md index c8c727d2c..307e85719 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ ## Recipe scrapers [![Build Status](https://travis-ci.org/hhursev/recipe-scraper.svg?branch=master)](https://travis-ci.org/hhursev/recipe-scraper) -[![Coverage Status](https://coveralls.io/repos/hhursev/recipe-scraper/badge.svg?branch=master&service=github)](https://coveralls.io/github/hhursev/recipe-scraper?branch=master) A simple web scraping tool for recipe sites I use in a project of mine that makes sense to live as a separate package. **No Python 2 support.** diff --git a/setup.py b/setup.py index ca18b5ece..6f2f51b9c 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name='recipe-scraper', - url='https://github.com/hhursev/recipe-scraper/', + url='https://github.com/RyanNoelk/recipe-scraper/', version='1.0.1', description='Python package, scraping recipes from all over the internet', keywords='python recipes scraper harvest', From 25519d7c4d5904d76188e231d3b5bc0e17901839 Mon Sep 17 00:00:00 2001 From: Ryan Noelk Date: Sat, 25 Mar 2017 22:00:27 -0400 Subject: [PATCH 02/10] adding support for python 2.7 (#2) --- .travis.yml | 1 + MANIFEST.in | 1 + recipe_scrapers/_abstract.py | 16 ++++++++++++---- recipe_scrapers/_utils.py | 6 +++--- recipe_scrapers/allrecipes.py | 2 +- run_tester.py | 12 ++++++++++++ setup.py | 1 + 7 files changed, 31 insertions(+), 8 deletions(-) create mode 100755 run_tester.py diff --git a/.travis.yml b/.travis.yml index 2f055891d..23b666c83 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,7 @@ language: python python: + - "2.7" - "3.2" - "3.3" - "3.4" diff --git a/MANIFEST.in b/MANIFEST.in index 07de9b432..6f664391b 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,4 +2,5 @@ include LICENSE include README.md exclude tests.py +exclude run_tester.py recursive-exclude recipe_scrapers/tests * diff --git a/recipe_scrapers/_abstract.py b/recipe_scrapers/_abstract.py index 8abd85ec5..62d3d0213 100644 --- a/recipe_scrapers/_abstract.py +++ b/recipe_scrapers/_abstract.py @@ -1,4 +1,8 @@ -from urllib import request +try: + from urllib import request +except: + from urllib2 import urlopen as request + from urllib2 import Request from bs4 import BeautifulSoup @@ -9,15 +13,19 @@ } -class AbstractScraper(): +class AbstractScraper(object): def __init__(self, url, test=False): if test: # when testing, we load a file with url: self.soup = BeautifulSoup(url.read(), "html.parser") else: - self.soup = BeautifulSoup(request.urlopen( - request.Request(url, headers=HEADERS)).read(), "html.parser") + try: + self.soup = BeautifulSoup(request.urlopen( + request.Request(url, headers=HEADERS)).read(), "html.parser") + except: + self.soup = BeautifulSoup(request( + Request(url, headers=HEADERS)).read(), "html.parser") def host(self): """ get the host of the url, so we can use the correct scraper (check __init__.py) """ diff --git a/recipe_scrapers/_utils.py b/recipe_scrapers/_utils.py index 50c81d2c0..3fbeb50bf 100644 --- a/recipe_scrapers/_utils.py +++ b/recipe_scrapers/_utils.py @@ -23,7 +23,7 @@ def normalize_string(string): return re.sub( r'\s+', ' ', string.replace( - '\xa0', ' ').replace( #   - '\n', ' ').replace( - '\t', ' ').strip() + u'\xa0', u' ').replace( #   + u'\n', u' ').replace( + u'\t', u' ').strip() ) diff --git a/recipe_scrapers/allrecipes.py b/recipe_scrapers/allrecipes.py index 014de8149..dc70e6dca 100644 --- a/recipe_scrapers/allrecipes.py +++ b/recipe_scrapers/allrecipes.py @@ -15,7 +15,7 @@ def total_time(self): return get_minutes(self.soup.find('span', {'class': 'ready-in-time'})) def ingredients(self): - ingredients_html = self.soup.findAll('li', {'class': "checkList__line"}) + ingredients_html = self.soup.findAll('span', {'class': "recipe-ingred_txt added"}) return [ normalize_string(ingredient.get_text()) diff --git a/run_tester.py b/run_tester.py new file mode 100755 index 000000000..b60926878 --- /dev/null +++ b/run_tester.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python +# encoding: utf-8 + +from recipe_scrapers import scrap_me + +# give the url as a string, it can be url from any site listed below +scrap_me = scrap_me('http://allrecipes.com/Recipe/Apple-Cake-Iv/Detail.aspx') + +print(scrap_me.title()) +print(scrap_me.total_time()) +print(scrap_me.ingredients()) +print(scrap_me.instructions()) diff --git a/setup.py b/setup.py index 6f2f51b9c..6343c7465 100644 --- a/setup.py +++ b/setup.py @@ -22,6 +22,7 @@ 'Environment :: Python 3+ module', 'Intended Audience :: Developers', 'Operating System :: OS Independent', + 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.2', 'Programming Language :: Python :: 3.3', From d4cc6735e7944f816aff133b3b1faa25cd886f31 Mon Sep 17 00:00:00 2001 From: "ryan.noelk" Date: Sat, 25 Mar 2017 22:07:15 -0400 Subject: [PATCH 03/10] adding run_tester to ingnore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 666506b17..f3864b5f6 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,6 @@ *.egg-info .coverage +run_tester.py + recipe_scrapers/tests/test_data/*.html From 86e736f2ab79247c99fa41405b064e60d821ec86 Mon Sep 17 00:00:00 2001 From: "ryan.noelk" Date: Sat, 25 Mar 2017 22:08:09 -0400 Subject: [PATCH 04/10] rm --- run_tester.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 run_tester.py diff --git a/run_tester.py b/run_tester.py old mode 100755 new mode 100644 From 5da40d78a898f2e6597274bc6d384402178f5ab9 Mon Sep 17 00:00:00 2001 From: "ryan.noelk" Date: Sat, 25 Mar 2017 22:09:24 -0400 Subject: [PATCH 05/10] adding back test file --- .gitignore | 2 -- run_tester.py | 0 2 files changed, 2 deletions(-) mode change 100644 => 100755 run_tester.py diff --git a/.gitignore b/.gitignore index f3864b5f6..666506b17 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,4 @@ *.egg-info .coverage -run_tester.py - recipe_scrapers/tests/test_data/*.html diff --git a/run_tester.py b/run_tester.py old mode 100644 new mode 100755 From 32fa7bcafcc86e7ad77c12b975c0fe0b073b3a69 Mon Sep 17 00:00:00 2001 From: Ryan Noelk Date: Mon, 3 Apr 2017 22:48:20 -0400 Subject: [PATCH 06/10] Feature/budget bytes (#3) * adding budgetbytes class * getting the basics working * updating ignore * some updates * better support for recipe miner * cleaing up setup * upadting git ingore * upadting git ingore * updating version --- .gitignore | 3 ++ recipe_scrapers/__init__.py | 2 ++ recipe_scrapers/_abstract.py | 3 ++ recipe_scrapers/budgetbytes.py | 64 ++++++++++++++++++++++++++++++++++ run_tester.py | 11 +++--- setup.py | 5 ++- 6 files changed, 81 insertions(+), 7 deletions(-) create mode 100644 recipe_scrapers/budgetbytes.py diff --git a/.gitignore b/.gitignore index 666506b17..b11558f35 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,8 @@ *.pyc *.egg-info .coverage +/.idea recipe_scrapers/tests/test_data/*.html +build/* +dist/* diff --git a/recipe_scrapers/__init__.py b/recipe_scrapers/__init__.py index c8efbdd90..f512ea81d 100644 --- a/recipe_scrapers/__init__.py +++ b/recipe_scrapers/__init__.py @@ -4,6 +4,7 @@ from .bbcfood import BBCFood from .bbcgoodfood import BBCGoodFood from .bonappetit import BonAppetit +from .budgetbytes import BudgetBytes from .closetcooking import ClosetCooking from .cookstr import Cookstr from .epicurious import Epicurious @@ -28,6 +29,7 @@ BBCFood.host(): BBCFood, BBCGoodFood.host(): BBCGoodFood, BonAppetit.host(): BonAppetit, + BudgetBytes.host(): BudgetBytes, ClosetCooking.host(): ClosetCooking, Cookstr.host(): Cookstr, Epicurious.host(): Epicurious, diff --git a/recipe_scrapers/_abstract.py b/recipe_scrapers/_abstract.py index 62d3d0213..82a804ce1 100644 --- a/recipe_scrapers/_abstract.py +++ b/recipe_scrapers/_abstract.py @@ -34,6 +34,9 @@ def host(self): def title(self): raise NotImplementedError("This should be implemented.") + def servings(self): + raise NotImplementedError("This should be implemented.") + def total_time(self): """ total time it takes to preparate the recipe in minutes """ raise NotImplementedError("This should be implemented.") diff --git a/recipe_scrapers/budgetbytes.py b/recipe_scrapers/budgetbytes.py new file mode 100644 index 000000000..e8eb5c990 --- /dev/null +++ b/recipe_scrapers/budgetbytes.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# encoding: utf-8 + +import unicodedata +from ._abstract import AbstractScraper +from ._utils import get_minutes, normalize_string + + +class BudgetBytes(AbstractScraper): + + @classmethod + def host(self): + return 'budgetbytes.com' + + def title(self): + return self.soup.find('h1').get_text() + + def total_time(self): + return { + 'prep-time': get_minutes(self.soup.find('time', {'itemprop': 'prepTime'})), + 'cook-time': get_minutes(self.soup.find('time', {'itemprop': 'cookTime'})) + } + + def servings(self): + return self.soup.find('span', {'itemprop': 'recipeYield'}).get_text() + + def ingredients(self): + ingredients_html = self.soup.findAll('li', {'class': 'ingredient'}) + ingredients = [] + + for ingredient in ingredients_html: + ingredient = normalize_string(ingredient.get_text()) + ingredient = ingredient.split(' $', 1)[0] + + try: + array = ingredient.split(' ', 2) + ingredient_dict = { + 'amount': round(unicodedata.numeric(array[0]), 3), + 'type': array[1], + 'title': array[2] + } + except: + ingredient_dict = { + 'title': ingredient + } + + ingredients.append(ingredient_dict) + + return ingredients + + def instructions(self): + instructions_html = self.soup.findAll('li', {'class': 'instruction'}) + + return [ + normalize_string(instruction.get_text()) + for instruction in instructions_html + ] + + def description(self): + li = self.soup.find('div', {'class': 'entry-content'}).findAll('p') + return li[0].get_text() + + def image(self): + return self.soup.find('img', {'itemprop': 'image'})["src"] diff --git a/run_tester.py b/run_tester.py index b60926878..78983d8ac 100755 --- a/run_tester.py +++ b/run_tester.py @@ -4,9 +4,12 @@ from recipe_scrapers import scrap_me # give the url as a string, it can be url from any site listed below -scrap_me = scrap_me('http://allrecipes.com/Recipe/Apple-Cake-Iv/Detail.aspx') +scrap_me = scrap_me('https://www.budgetbytes.com/2017/03/lemon-garlic-roasted-chicken') -print(scrap_me.title()) -print(scrap_me.total_time()) +# print(scrap_me.title()) +# print(scrap_me.servings()) +# print(scrap_me.total_time()) print(scrap_me.ingredients()) -print(scrap_me.instructions()) +# print(scrap_me.instructions()) +# print(scrap_me.description()) +# print(scrap_me.image()) diff --git a/setup.py b/setup.py index 6343c7465..66c7da47d 100644 --- a/setup.py +++ b/setup.py @@ -6,9 +6,9 @@ os.chdir(os.path.normpath(os.path.join(os.path.abspath(__file__), os.pardir))) setup( - name='recipe-scraper', + name='openeats-recipe-scraper', url='https://github.com/RyanNoelk/recipe-scraper/', - version='1.0.1', + version='1.0.2', description='Python package, scraping recipes from all over the internet', keywords='python recipes scraper harvest', long_description=README, @@ -19,7 +19,6 @@ package_data={'': ['LICENSE']}, include_package_data=True, classifiers=[ - 'Environment :: Python 3+ module', 'Intended Audience :: Developers', 'Operating System :: OS Independent', 'Programming Language :: Python :: 2.7', From 356d79e1e6074bf879e984f8473b46e5e7476934 Mon Sep 17 00:00:00 2001 From: Ryan Noelk Date: Tue, 4 Apr 2017 21:29:30 -0400 Subject: [PATCH 07/10] Feature/doc clean up (#4) * adding utf-8 * adding data fetcher * updating abstract --- .travis.yml | 6 +----- README.md | 8 ++++++-- recipe_scrapers/__init__.py | 2 ++ recipe_scrapers/_abstract.py | 19 +++++++++++++++++++ recipe_scrapers/_utils.py | 2 ++ recipe_scrapers/allrecipes.py | 2 ++ recipe_scrapers/bbcfood.py | 2 ++ recipe_scrapers/bbcgoodfood.py | 2 ++ recipe_scrapers/bonappetit.py | 2 ++ recipe_scrapers/budgetbytes.py | 2 ++ recipe_scrapers/closetcooking.py | 2 ++ recipe_scrapers/cookstr.py | 2 ++ recipe_scrapers/epicurious.py | 2 ++ recipe_scrapers/finedininglovers.py | 2 ++ recipe_scrapers/foodrepublic.py | 2 ++ recipe_scrapers/hundredandonecookbooks.py | 2 ++ recipe_scrapers/jamieoliver.py | 2 ++ recipe_scrapers/mybakingaddiction.py | 2 ++ recipe_scrapers/paninihappy.py | 2 ++ recipe_scrapers/realsimple.py | 4 +++- recipe_scrapers/simplyrecipes.py | 2 ++ recipe_scrapers/steamykitchen.py | 2 ++ recipe_scrapers/tastykitchen.py | 2 ++ recipe_scrapers/tests/__init__.py | 2 ++ recipe_scrapers/tests/test_allrecipes.py | 2 ++ recipe_scrapers/tests/test_bbcfood.py | 2 ++ recipe_scrapers/tests/test_bbcgoodfood.py | 2 ++ recipe_scrapers/tests/test_bonappetit.py | 2 ++ recipe_scrapers/tests/test_closetcooking.py | 2 ++ recipe_scrapers/tests/test_cookstr.py | 2 ++ recipe_scrapers/tests/test_data/.gitkeep | 2 ++ recipe_scrapers/tests/test_epicurious.py | 2 ++ .../tests/test_finedininglovers.py | 2 ++ recipe_scrapers/tests/test_foodrepublic.py | 2 ++ .../tests/test_hundredandonecookbooks.py | 2 ++ recipe_scrapers/tests/test_jamieoliver.py | 2 ++ .../tests/test_mybakingaddiction.py | 2 ++ recipe_scrapers/tests/test_paninihappy.py | 2 ++ recipe_scrapers/tests/test_realsimple.py | 2 ++ recipe_scrapers/tests/test_simplyrecipes.py | 2 ++ recipe_scrapers/tests/test_steamykitchen.py | 2 ++ recipe_scrapers/tests/test_tastykitchen.py | 2 ++ recipe_scrapers/tests/test_thepioneerwoman.py | 2 ++ recipe_scrapers/tests/test_thevintagemixer.py | 2 ++ .../tests/test_twopeasandtheirpod.py | 2 ++ .../tests/test_whatsgabycooking.py | 2 ++ recipe_scrapers/thepioneerwoman.py | 2 ++ recipe_scrapers/thevintagemixer.py | 2 ++ recipe_scrapers/twopeasandtheirpod.py | 2 ++ recipe_scrapers/whatsgabycooking.py | 2 ++ run_tester.py | 8 +------- setup.py | 2 ++ tests.py | 3 +++ 53 files changed, 127 insertions(+), 15 deletions(-) diff --git a/.travis.yml b/.travis.yml index 23b666c83..f51c3ef24 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,9 +10,5 @@ python: install: - pip install beautifulsoup4==4.4.0 -before_script: - - curl -L -o test_data.zip https://www.dropbox.com/sh/wkxm933pae6q0e6/AAAXSinoeSn1-fz5Fz_LHhM6a?dl=1 - - unzip test_data.zip -d recipe_scrapers/tests/test_data || true - script: - - coverage run tests.py + - python tests.py diff --git a/README.md b/README.md index 307e85719..19ac35b93 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,9 @@ [![Build Status](https://travis-ci.org/hhursev/recipe-scraper.svg?branch=master)](https://travis-ci.org/hhursev/recipe-scraper) A simple web scraping tool for recipe sites I use in a project of mine that makes sense to live as -a separate package. **No Python 2 support.** +a separate package. - pip install git+git://github.com/hhursev/recipe-scraper.git + pip install git+git://github.com/RyanNoelk/recipe-scraper.git then: @@ -18,6 +18,10 @@ then: scrap_me.total_time() scrap_me.ingredients() scrap_me.instructions() + + or + + scrap_me.data() ### Contribute diff --git a/recipe_scrapers/__init__.py b/recipe_scrapers/__init__.py index f512ea81d..68f885707 100644 --- a/recipe_scrapers/__init__.py +++ b/recipe_scrapers/__init__.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import re from .allrecipes import AllRecipes diff --git a/recipe_scrapers/_abstract.py b/recipe_scrapers/_abstract.py index 82a804ce1..e9006e1ca 100644 --- a/recipe_scrapers/_abstract.py +++ b/recipe_scrapers/_abstract.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 try: from urllib import request except: @@ -46,3 +48,20 @@ def ingredients(self): def instructions(self): raise NotImplementedError("This should be implemented.") + + def description(self): + return NotImplementedError("This should be implemented.") + + def image(self): + return NotImplementedError("This should be implemented.") + + def data(self): + return { + 'title': self.title(), + 'servings': self.servings(), + 'total_time': self.total_time(), + 'ingredients': self.ingredients(), + 'instructions': self.instructions(), + 'description': self.description(), + 'image': self.image(), + } diff --git a/recipe_scrapers/_utils.py b/recipe_scrapers/_utils.py index 3fbeb50bf..78dcb567c 100644 --- a/recipe_scrapers/_utils.py +++ b/recipe_scrapers/_utils.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import re diff --git a/recipe_scrapers/allrecipes.py b/recipe_scrapers/allrecipes.py index dc70e6dca..3fad766ed 100644 --- a/recipe_scrapers/allrecipes.py +++ b/recipe_scrapers/allrecipes.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 from ._abstract import AbstractScraper from ._utils import get_minutes, normalize_string diff --git a/recipe_scrapers/bbcfood.py b/recipe_scrapers/bbcfood.py index bd9a8d87b..24234270a 100644 --- a/recipe_scrapers/bbcfood.py +++ b/recipe_scrapers/bbcfood.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 from ._abstract import AbstractScraper from ._utils import get_minutes, normalize_string diff --git a/recipe_scrapers/bbcgoodfood.py b/recipe_scrapers/bbcgoodfood.py index bb6eab06c..b5c001f3e 100644 --- a/recipe_scrapers/bbcgoodfood.py +++ b/recipe_scrapers/bbcgoodfood.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 from ._abstract import AbstractScraper from ._utils import get_minutes, normalize_string diff --git a/recipe_scrapers/bonappetit.py b/recipe_scrapers/bonappetit.py index da7b912fb..ff872860d 100644 --- a/recipe_scrapers/bonappetit.py +++ b/recipe_scrapers/bonappetit.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 from ._abstract import AbstractScraper from ._utils import get_minutes, normalize_string diff --git a/recipe_scrapers/budgetbytes.py b/recipe_scrapers/budgetbytes.py index e8eb5c990..43bffe691 100644 --- a/recipe_scrapers/budgetbytes.py +++ b/recipe_scrapers/budgetbytes.py @@ -1,5 +1,7 @@ #!/usr/bin/env python # encoding: utf-8 +#!/usr/bin/env python +# encoding: utf-8 import unicodedata from ._abstract import AbstractScraper diff --git a/recipe_scrapers/closetcooking.py b/recipe_scrapers/closetcooking.py index aaacd1939..907c3e822 100644 --- a/recipe_scrapers/closetcooking.py +++ b/recipe_scrapers/closetcooking.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 from ._abstract import AbstractScraper from ._utils import get_minutes, normalize_string diff --git a/recipe_scrapers/cookstr.py b/recipe_scrapers/cookstr.py index a3fd73884..27f2c8f6a 100644 --- a/recipe_scrapers/cookstr.py +++ b/recipe_scrapers/cookstr.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 from ._abstract import AbstractScraper from ._utils import get_minutes, normalize_string diff --git a/recipe_scrapers/epicurious.py b/recipe_scrapers/epicurious.py index ac9470a65..0b910ca33 100644 --- a/recipe_scrapers/epicurious.py +++ b/recipe_scrapers/epicurious.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 from ._abstract import AbstractScraper from ._utils import get_minutes, normalize_string diff --git a/recipe_scrapers/finedininglovers.py b/recipe_scrapers/finedininglovers.py index 92d8fbbb8..9d3a187dd 100644 --- a/recipe_scrapers/finedininglovers.py +++ b/recipe_scrapers/finedininglovers.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 from ._abstract import AbstractScraper from ._utils import get_minutes, normalize_string diff --git a/recipe_scrapers/foodrepublic.py b/recipe_scrapers/foodrepublic.py index 6fb2783b2..c84db3ff4 100644 --- a/recipe_scrapers/foodrepublic.py +++ b/recipe_scrapers/foodrepublic.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 from ._abstract import AbstractScraper from ._utils import get_minutes, normalize_string diff --git a/recipe_scrapers/hundredandonecookbooks.py b/recipe_scrapers/hundredandonecookbooks.py index 1622f6f69..11e59ad87 100644 --- a/recipe_scrapers/hundredandonecookbooks.py +++ b/recipe_scrapers/hundredandonecookbooks.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 from ._abstract import AbstractScraper from ._utils import get_minutes, normalize_string diff --git a/recipe_scrapers/jamieoliver.py b/recipe_scrapers/jamieoliver.py index cb5480e4c..8fbc8dfe6 100644 --- a/recipe_scrapers/jamieoliver.py +++ b/recipe_scrapers/jamieoliver.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 from ._abstract import AbstractScraper from ._utils import get_minutes, normalize_string diff --git a/recipe_scrapers/mybakingaddiction.py b/recipe_scrapers/mybakingaddiction.py index 0b9479d8d..cfb27585f 100644 --- a/recipe_scrapers/mybakingaddiction.py +++ b/recipe_scrapers/mybakingaddiction.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 from ._abstract import AbstractScraper from ._utils import get_minutes, normalize_string diff --git a/recipe_scrapers/paninihappy.py b/recipe_scrapers/paninihappy.py index 9e15548d7..8ecdb1eb9 100644 --- a/recipe_scrapers/paninihappy.py +++ b/recipe_scrapers/paninihappy.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 from ._abstract import AbstractScraper from ._utils import get_minutes, normalize_string diff --git a/recipe_scrapers/realsimple.py b/recipe_scrapers/realsimple.py index 800b4cd8c..30293eb0c 100644 --- a/recipe_scrapers/realsimple.py +++ b/recipe_scrapers/realsimple.py @@ -5,7 +5,9 @@ class RealSimple(AbstractScraper): @classmethod - def host(self): + def host(self):#!/usr/bin/env python +# encoding: utf-8 + return 'realsimple.com' def title(self): diff --git a/recipe_scrapers/simplyrecipes.py b/recipe_scrapers/simplyrecipes.py index 8133ea8e9..4bd45a303 100644 --- a/recipe_scrapers/simplyrecipes.py +++ b/recipe_scrapers/simplyrecipes.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 from ._abstract import AbstractScraper from ._utils import get_minutes, normalize_string diff --git a/recipe_scrapers/steamykitchen.py b/recipe_scrapers/steamykitchen.py index 16f9b1791..c37a3472a 100644 --- a/recipe_scrapers/steamykitchen.py +++ b/recipe_scrapers/steamykitchen.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 from ._abstract import AbstractScraper from ._utils import get_minutes, normalize_string diff --git a/recipe_scrapers/tastykitchen.py b/recipe_scrapers/tastykitchen.py index d3c8e62ec..5e8c6a099 100644 --- a/recipe_scrapers/tastykitchen.py +++ b/recipe_scrapers/tastykitchen.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 from ._abstract import AbstractScraper from ._utils import get_minutes, normalize_string diff --git a/recipe_scrapers/tests/__init__.py b/recipe_scrapers/tests/__init__.py index e69de29bb..6f804bce3 100644 --- a/recipe_scrapers/tests/__init__.py +++ b/recipe_scrapers/tests/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/env python +# encoding: utf-8 diff --git a/recipe_scrapers/tests/test_allrecipes.py b/recipe_scrapers/tests/test_allrecipes.py index fd658baa4..1f7e64db8 100644 --- a/recipe_scrapers/tests/test_allrecipes.py +++ b/recipe_scrapers/tests/test_allrecipes.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import os import unittest diff --git a/recipe_scrapers/tests/test_bbcfood.py b/recipe_scrapers/tests/test_bbcfood.py index b8c682058..37a5b7ef1 100644 --- a/recipe_scrapers/tests/test_bbcfood.py +++ b/recipe_scrapers/tests/test_bbcfood.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import os import unittest diff --git a/recipe_scrapers/tests/test_bbcgoodfood.py b/recipe_scrapers/tests/test_bbcgoodfood.py index d0f9abe04..5e86aa90c 100644 --- a/recipe_scrapers/tests/test_bbcgoodfood.py +++ b/recipe_scrapers/tests/test_bbcgoodfood.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import os import unittest diff --git a/recipe_scrapers/tests/test_bonappetit.py b/recipe_scrapers/tests/test_bonappetit.py index 8dcccab79..85a22ea4f 100644 --- a/recipe_scrapers/tests/test_bonappetit.py +++ b/recipe_scrapers/tests/test_bonappetit.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import os import unittest diff --git a/recipe_scrapers/tests/test_closetcooking.py b/recipe_scrapers/tests/test_closetcooking.py index 8a70966b2..3c9266f36 100644 --- a/recipe_scrapers/tests/test_closetcooking.py +++ b/recipe_scrapers/tests/test_closetcooking.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import os import unittest diff --git a/recipe_scrapers/tests/test_cookstr.py b/recipe_scrapers/tests/test_cookstr.py index 043e1d591..233a882b0 100644 --- a/recipe_scrapers/tests/test_cookstr.py +++ b/recipe_scrapers/tests/test_cookstr.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import os import unittest diff --git a/recipe_scrapers/tests/test_data/.gitkeep b/recipe_scrapers/tests/test_data/.gitkeep index e69de29bb..10bda5184 100644 --- a/recipe_scrapers/tests/test_data/.gitkeep +++ b/recipe_scrapers/tests/test_data/.gitkeep @@ -0,0 +1,2 @@ +hello world +hello world diff --git a/recipe_scrapers/tests/test_epicurious.py b/recipe_scrapers/tests/test_epicurious.py index 30bf49b59..5669fdffe 100644 --- a/recipe_scrapers/tests/test_epicurious.py +++ b/recipe_scrapers/tests/test_epicurious.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import os import unittest diff --git a/recipe_scrapers/tests/test_finedininglovers.py b/recipe_scrapers/tests/test_finedininglovers.py index 1bf6c23d8..c6cdd0393 100644 --- a/recipe_scrapers/tests/test_finedininglovers.py +++ b/recipe_scrapers/tests/test_finedininglovers.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import os import unittest diff --git a/recipe_scrapers/tests/test_foodrepublic.py b/recipe_scrapers/tests/test_foodrepublic.py index 3ed86e0b3..f13178eb9 100644 --- a/recipe_scrapers/tests/test_foodrepublic.py +++ b/recipe_scrapers/tests/test_foodrepublic.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import os import unittest diff --git a/recipe_scrapers/tests/test_hundredandonecookbooks.py b/recipe_scrapers/tests/test_hundredandonecookbooks.py index 3273d1cdb..0f4d1df96 100644 --- a/recipe_scrapers/tests/test_hundredandonecookbooks.py +++ b/recipe_scrapers/tests/test_hundredandonecookbooks.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import os import unittest diff --git a/recipe_scrapers/tests/test_jamieoliver.py b/recipe_scrapers/tests/test_jamieoliver.py index d43a3cd71..75dbb0694 100644 --- a/recipe_scrapers/tests/test_jamieoliver.py +++ b/recipe_scrapers/tests/test_jamieoliver.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import os import unittest diff --git a/recipe_scrapers/tests/test_mybakingaddiction.py b/recipe_scrapers/tests/test_mybakingaddiction.py index ed6f385ee..0b718ea9f 100644 --- a/recipe_scrapers/tests/test_mybakingaddiction.py +++ b/recipe_scrapers/tests/test_mybakingaddiction.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import os import unittest diff --git a/recipe_scrapers/tests/test_paninihappy.py b/recipe_scrapers/tests/test_paninihappy.py index ab3c6cbb9..49e09260a 100644 --- a/recipe_scrapers/tests/test_paninihappy.py +++ b/recipe_scrapers/tests/test_paninihappy.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import os import unittest diff --git a/recipe_scrapers/tests/test_realsimple.py b/recipe_scrapers/tests/test_realsimple.py index 7bc1f8f5c..77ab09318 100644 --- a/recipe_scrapers/tests/test_realsimple.py +++ b/recipe_scrapers/tests/test_realsimple.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import os import unittest diff --git a/recipe_scrapers/tests/test_simplyrecipes.py b/recipe_scrapers/tests/test_simplyrecipes.py index a00b33162..73b5c26e1 100644 --- a/recipe_scrapers/tests/test_simplyrecipes.py +++ b/recipe_scrapers/tests/test_simplyrecipes.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import os import unittest diff --git a/recipe_scrapers/tests/test_steamykitchen.py b/recipe_scrapers/tests/test_steamykitchen.py index 163ac8173..433607de0 100644 --- a/recipe_scrapers/tests/test_steamykitchen.py +++ b/recipe_scrapers/tests/test_steamykitchen.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import os import unittest diff --git a/recipe_scrapers/tests/test_tastykitchen.py b/recipe_scrapers/tests/test_tastykitchen.py index f59880764..cb109c923 100644 --- a/recipe_scrapers/tests/test_tastykitchen.py +++ b/recipe_scrapers/tests/test_tastykitchen.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import os import unittest diff --git a/recipe_scrapers/tests/test_thepioneerwoman.py b/recipe_scrapers/tests/test_thepioneerwoman.py index 8aa865aa5..c211aa84a 100644 --- a/recipe_scrapers/tests/test_thepioneerwoman.py +++ b/recipe_scrapers/tests/test_thepioneerwoman.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import os import unittest diff --git a/recipe_scrapers/tests/test_thevintagemixer.py b/recipe_scrapers/tests/test_thevintagemixer.py index de8a3e894..ffca3a5aa 100644 --- a/recipe_scrapers/tests/test_thevintagemixer.py +++ b/recipe_scrapers/tests/test_thevintagemixer.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import os import unittest diff --git a/recipe_scrapers/tests/test_twopeasandtheirpod.py b/recipe_scrapers/tests/test_twopeasandtheirpod.py index 6a066b5b2..9c1618ee9 100644 --- a/recipe_scrapers/tests/test_twopeasandtheirpod.py +++ b/recipe_scrapers/tests/test_twopeasandtheirpod.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import os import unittest diff --git a/recipe_scrapers/tests/test_whatsgabycooking.py b/recipe_scrapers/tests/test_whatsgabycooking.py index 46b7c7964..d9a9dac69 100644 --- a/recipe_scrapers/tests/test_whatsgabycooking.py +++ b/recipe_scrapers/tests/test_whatsgabycooking.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import os import unittest diff --git a/recipe_scrapers/thepioneerwoman.py b/recipe_scrapers/thepioneerwoman.py index 2fc1627cc..0b8577e1e 100644 --- a/recipe_scrapers/thepioneerwoman.py +++ b/recipe_scrapers/thepioneerwoman.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 from ._abstract import AbstractScraper from ._utils import get_minutes, normalize_string diff --git a/recipe_scrapers/thevintagemixer.py b/recipe_scrapers/thevintagemixer.py index f6a052537..b2284a671 100644 --- a/recipe_scrapers/thevintagemixer.py +++ b/recipe_scrapers/thevintagemixer.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 from ._abstract import AbstractScraper from ._utils import get_minutes, normalize_string diff --git a/recipe_scrapers/twopeasandtheirpod.py b/recipe_scrapers/twopeasandtheirpod.py index d68046bcf..6c6c56063 100644 --- a/recipe_scrapers/twopeasandtheirpod.py +++ b/recipe_scrapers/twopeasandtheirpod.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 from ._abstract import AbstractScraper from ._utils import get_minutes, normalize_string diff --git a/recipe_scrapers/whatsgabycooking.py b/recipe_scrapers/whatsgabycooking.py index 170981abb..188cc4840 100644 --- a/recipe_scrapers/whatsgabycooking.py +++ b/recipe_scrapers/whatsgabycooking.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 from ._abstract import AbstractScraper from ._utils import get_minutes, normalize_string diff --git a/run_tester.py b/run_tester.py index 78983d8ac..154af2842 100755 --- a/run_tester.py +++ b/run_tester.py @@ -6,10 +6,4 @@ # give the url as a string, it can be url from any site listed below scrap_me = scrap_me('https://www.budgetbytes.com/2017/03/lemon-garlic-roasted-chicken') -# print(scrap_me.title()) -# print(scrap_me.servings()) -# print(scrap_me.total_time()) -print(scrap_me.ingredients()) -# print(scrap_me.instructions()) -# print(scrap_me.description()) -# print(scrap_me.image()) +print(scrap_me.data()) diff --git a/setup.py b/setup.py index 66c7da47d..4fc8995b0 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# encoding: utf-8 import os from setuptools import setup, find_packages diff --git a/tests.py b/tests.py index 7b1ef38d0..daeab6c02 100644 --- a/tests.py +++ b/tests.py @@ -1,3 +1,6 @@ +#!/usr/bin/env python +# encoding: utf-8 + from recipe_scrapers.tests.test_allrecipes import * from recipe_scrapers.tests.test_bbcfood import * from recipe_scrapers.tests.test_bbcgoodfood import * From 7c625ae06a702ef9c3cf14d6935161408746d0e3 Mon Sep 17 00:00:00 2001 From: Ryan Noelk Date: Tue, 4 Apr 2017 22:09:04 -0400 Subject: [PATCH 08/10] Feature/doc clean up (#5) * adding utf-8 * adding data fetcher * updating abstract * removing some sites I don't use --- README.md | 7 --- recipe_scrapers/__init__.py | 14 ----- recipe_scrapers/bbcfood.py | 34 ----------- recipe_scrapers/bbcgoodfood.py | 46 -------------- recipe_scrapers/closetcooking.py | 33 ---------- recipe_scrapers/hundredandonecookbooks.py | 29 --------- recipe_scrapers/paninihappy.py | 33 ---------- recipe_scrapers/realsimple.py | 33 ---------- recipe_scrapers/tests/test_bbcfood.py | 59 ------------------ recipe_scrapers/tests/test_bbcgoodfood.py | 58 ------------------ recipe_scrapers/tests/test_closetcooking.py | 56 ----------------- .../tests/test_hundredandonecookbooks.py | 60 ------------------- recipe_scrapers/tests/test_paninihappy.py | 58 ------------------ recipe_scrapers/tests/test_realsimple.py | 58 ------------------ recipe_scrapers/tests/test_thepioneerwoman.py | 57 ------------------ recipe_scrapers/thepioneerwoman.py | 33 ---------- tests.py | 7 --- 17 files changed, 675 deletions(-) delete mode 100644 recipe_scrapers/bbcfood.py delete mode 100644 recipe_scrapers/bbcgoodfood.py delete mode 100644 recipe_scrapers/closetcooking.py delete mode 100644 recipe_scrapers/hundredandonecookbooks.py delete mode 100644 recipe_scrapers/paninihappy.py delete mode 100644 recipe_scrapers/realsimple.py delete mode 100644 recipe_scrapers/tests/test_bbcfood.py delete mode 100644 recipe_scrapers/tests/test_bbcgoodfood.py delete mode 100644 recipe_scrapers/tests/test_closetcooking.py delete mode 100644 recipe_scrapers/tests/test_hundredandonecookbooks.py delete mode 100644 recipe_scrapers/tests/test_paninihappy.py delete mode 100644 recipe_scrapers/tests/test_realsimple.py delete mode 100644 recipe_scrapers/tests/test_thepioneerwoman.py delete mode 100644 recipe_scrapers/thepioneerwoman.py diff --git a/README.md b/README.md index 19ac35b93..d81511761 100644 --- a/README.md +++ b/README.md @@ -38,24 +38,17 @@ If you are programmer PRs with fixes are warmly welcomed and acknowledged with a ### Scrapers available for: -- [http://101cookbooks.com/](http://101cookbooks.com/) - [http://allrecipes.com/](http://allrecipes.com/) -- [http://bbc.co.uk/](http://bbc.co.uk/food/recipes/) -- [http://bbcgoodfood.com/](http://bbcgoodfood.com/) - [http://bonappetit.com/](http://bonappetit.com/) -- [http://closetcooking.com/](http://closetcooking.com/) - [http://cookstr.com/](http://cookstr.com/) - [http://epicurious.com/](http://epicurious.com/) - [http://finedininglovers.com/](https://www.finedininglovers.com/) - [http://foodrepublic.com/](http://foodrepublic.com) - [http://jamieoliver.com/](http://www.jamieoliver.com/) - [http://mybakingaddiction.com/](http://mybakingaddiction.com/) -- [http://paninihappy.com/](http://paninihappy.com/) -- [http://realsimple.com/](http://www.realsimple.com/) - [http://simplyrecipes.com/](http://www.simplyrecipes.com) - [http://steamykitchen.com/](http://steamykitchen.com/) - [http://tastykitchen.com/](http://tastykitchen.com/) -- [http://thepioneerwoman.com/](http://thepioneerwoman.com/) - [http://thevintagemixer.com/](http://www.thevintagemixer.com/) - [http://twopeasandtheirpod.com/](http://twopeasandtheirpod.com/) - [http://whatsgabycooking.com/](http://whatsgabycooking.com/) diff --git a/recipe_scrapers/__init__.py b/recipe_scrapers/__init__.py index 68f885707..1b0dc809d 100644 --- a/recipe_scrapers/__init__.py +++ b/recipe_scrapers/__init__.py @@ -3,24 +3,17 @@ import re from .allrecipes import AllRecipes -from .bbcfood import BBCFood -from .bbcgoodfood import BBCGoodFood from .bonappetit import BonAppetit from .budgetbytes import BudgetBytes -from .closetcooking import ClosetCooking from .cookstr import Cookstr from .epicurious import Epicurious from .finedininglovers import FineDiningLovers from .foodrepublic import FoodRepublic -from .hundredandonecookbooks import HundredAndOneCookbooks from .jamieoliver import JamieOliver from .mybakingaddiction import MyBakingAddiction -from .paninihappy import PaniniHappy -from .realsimple import RealSimple from .simplyrecipes import SimplyRecipes from .steamykitchen import SteamyKitchen from .tastykitchen import TastyKitchen -from .thepioneerwoman import ThePioneerWoman from .thevintagemixer import TheVintageMixer from .twopeasandtheirpod import TwoPeasAndTheirPod from .whatsgabycooking import WhatsGabyCooking @@ -28,24 +21,17 @@ SCRAPERS = { AllRecipes.host(): AllRecipes, - BBCFood.host(): BBCFood, - BBCGoodFood.host(): BBCGoodFood, BonAppetit.host(): BonAppetit, BudgetBytes.host(): BudgetBytes, - ClosetCooking.host(): ClosetCooking, Cookstr.host(): Cookstr, Epicurious.host(): Epicurious, FineDiningLovers.host(): FineDiningLovers, FoodRepublic.host(): FoodRepublic, - HundredAndOneCookbooks.host(): HundredAndOneCookbooks, JamieOliver.host(): JamieOliver, MyBakingAddiction.host(): MyBakingAddiction, - PaniniHappy.host(): PaniniHappy, - RealSimple.host(): RealSimple, SimplyRecipes.host(): SimplyRecipes, SteamyKitchen.host(): SteamyKitchen, TastyKitchen.host(): TastyKitchen, - ThePioneerWoman.host(): ThePioneerWoman, TheVintageMixer.host(): TheVintageMixer, TwoPeasAndTheirPod.host(): TwoPeasAndTheirPod, WhatsGabyCooking.host(): WhatsGabyCooking, diff --git a/recipe_scrapers/bbcfood.py b/recipe_scrapers/bbcfood.py deleted file mode 100644 index 24234270a..000000000 --- a/recipe_scrapers/bbcfood.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 -from ._abstract import AbstractScraper -from ._utils import get_minutes, normalize_string - - -class BBCFood(AbstractScraper): - - @classmethod - def host(self): - return 'bbc.co.uk' - - def title(self): - return self.soup.find('div', {'class': 'article-title'}).find('h1').get_text() - - def total_time(self): - return get_minutes(self.soup.find('span', {'class': 'prepTime'})) +\ - get_minutes(self.soup.find('span', {'class': 'cookTime'})) - - def ingredients(self): - ingredients_html = self.soup.findAll('p', {'class': "ingredient"}) - - return [ - normalize_string(ingredient.get_text()) - for ingredient in ingredients_html - ] - - def instructions(self): - instructions_html = self.soup.findAll('li', {'class': 'instruction'}) - - return '\n'.join([ - normalize_string(instruction.get_text()) - for instruction in instructions_html - ]) diff --git a/recipe_scrapers/bbcgoodfood.py b/recipe_scrapers/bbcgoodfood.py deleted file mode 100644 index b5c001f3e..000000000 --- a/recipe_scrapers/bbcgoodfood.py +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 -from ._abstract import AbstractScraper -from ._utils import get_minutes, normalize_string - - -class BBCGoodFood(AbstractScraper): - - @classmethod - def host(self): - return 'bbcgoodfood.com' - - def title(self): - return self.soup.find('h1', {'itemprop': 'name'}).get_text() - - def total_time(self): - time_full = get_minutes(self.soup.find('span', {'class': 'cooking-time-full'})) - time_prep = get_minutes(self.soup.find('span', {'class': 'cooking-time-prep'})) - time_cook = get_minutes(self.soup.find('span', {'class': 'cooking-time-cook'})) - - if time_full == 0: - return time_prep + time_cook - return time_full - - def ingredients(self): - ingredients_html = self.soup.find('section', {'id': "recipe-ingredients"}).findAll('li') - - return [ - normalize_string(ingredient.get_text()) - for ingredient in ingredients_html - ] - - def instructions(self): - instructions_html = self.soup.find('section', {'id': 'recipe-method'}).findAll('li') - - instructions_string = '\n'.join([ - normalize_string(instruction.get_text()) - for instruction in instructions_html - ]) - - if len(instructions_string) == 0: - instructions_string = normalize_string( - self.soup.find('section', {'id': 'recipe-method'}).get_text() - ) - - return instructions_string diff --git a/recipe_scrapers/closetcooking.py b/recipe_scrapers/closetcooking.py deleted file mode 100644 index 907c3e822..000000000 --- a/recipe_scrapers/closetcooking.py +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 -from ._abstract import AbstractScraper -from ._utils import get_minutes, normalize_string - - -class ClosetCooking(AbstractScraper): - - @classmethod - def host(self): - return 'closetcooking.com' - - def title(self): - return normalize_string(self.soup.find('h2', {'class': 'post-title'}).get_text()) - - def total_time(self): - return get_minutes(self.soup.find('meta', {'itemprop': 'totalTime'})) - - def ingredients(self): - ingredients_html = self.soup.findAll('li', {'itemprop': "ingredients"}) - - return [ - normalize_string(ingredient.get_text()) - for ingredient in ingredients_html - ] - - def instructions(self): - instructions_html = self.soup.findAll('li', {'itemprop': 'recipeInstructions'}) - - return '\n'.join([ - normalize_string(instruction.get_text()) - for instruction in instructions_html - ]) diff --git a/recipe_scrapers/hundredandonecookbooks.py b/recipe_scrapers/hundredandonecookbooks.py deleted file mode 100644 index 11e59ad87..000000000 --- a/recipe_scrapers/hundredandonecookbooks.py +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 -from ._abstract import AbstractScraper -from ._utils import get_minutes, normalize_string - - -class HundredAndOneCookbooks(AbstractScraper): - - @classmethod - def host(self): - return '101cookbooks.com' - - def title(self): - return self.soup.find('h1').get_text() - - def total_time(self): - return get_minutes(self.soup.find('span', {'class': 'preptime'})) - - def ingredients(self): - ingredients_html = self.soup.find('div', {'id': 'recipe'}).find('blockquote').find('p') - return ingredients_html.get_text().split('\n') - - def instructions(self): - instructions_html = self.soup.find('div', {'id': 'recipe'}).find('blockquote').find_next_siblings() - - return '\n'.join([ - normalize_string(instruction.get_text()) - for instruction in instructions_html - ]) diff --git a/recipe_scrapers/paninihappy.py b/recipe_scrapers/paninihappy.py deleted file mode 100644 index 8ecdb1eb9..000000000 --- a/recipe_scrapers/paninihappy.py +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 -from ._abstract import AbstractScraper -from ._utils import get_minutes, normalize_string - - -class PaniniHappy(AbstractScraper): - - @classmethod - def host(self): - return 'paninihappy.com' - - def title(self): - return self.soup.find('h1', {'class': 'entry-title'}).get_text() - - def total_time(self): - return get_minutes(self.soup.find('span', {'class': 'duration'})) - - def ingredients(self): - ingredients_html = self.soup.findAll('li', {'class': "ingredient"}) - - return [ - normalize_string(ingredient.get_text()) - for ingredient in ingredients_html - ] - - def instructions(self): - instructions_html = self.soup.findAll('li', {'class': 'instruction'}) - - return '\n'.join([ - normalize_string(instruction.get_text()) - for instruction in instructions_html - ]) diff --git a/recipe_scrapers/realsimple.py b/recipe_scrapers/realsimple.py deleted file mode 100644 index 30293eb0c..000000000 --- a/recipe_scrapers/realsimple.py +++ /dev/null @@ -1,33 +0,0 @@ -from ._abstract import AbstractScraper -from ._utils import get_minutes, normalize_string - - -class RealSimple(AbstractScraper): - - @classmethod - def host(self):#!/usr/bin/env python -# encoding: utf-8 - - return 'realsimple.com' - - def title(self): - return self.soup.find('h1').get_text(strip=True) - - def total_time(self): - return get_minutes(self.soup.find('time', {'itemprop': 'totalTime'})) - - def ingredients(self): - ingredients_html = self.soup.findAll('ol', {'class': "ingredient-list"})[0] - - return [ - normalize_string(ingredient.get_text()) - for ingredient in ingredients_html.findAll('span', {'itemprop': 'ingredients'}) - ] - - def instructions(self): - instructions_html = self.soup.find('section', {'itemprop': 'recipeInstructions'}) - - return '\n'.join([ - normalize_string(instruction.get_text()) - for instruction in instructions_html.findAll('li') - ]) diff --git a/recipe_scrapers/tests/test_bbcfood.py b/recipe_scrapers/tests/test_bbcfood.py deleted file mode 100644 index 37a5b7ef1..000000000 --- a/recipe_scrapers/tests/test_bbcfood.py +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 -import os -import unittest - -from recipe_scrapers.bbcfood import BBCFood - - -class TestBBCFoodScraper(unittest.TestCase): - def setUp(self): - # tests are run from tests.py - with open(os.path.join( - os.getcwd(), - 'recipe_scrapers', - 'tests', - 'test_data', - 'bbc_food.html' - )) as file_opened: - self.harvester_class = BBCFood(file_opened, test=True) - - def test_host(self): - self.assertEqual( - 'bbc.co.uk', - self.harvester_class.host() - ) - - def test_title(self): - self.assertEqual( - self.harvester_class.title(), - 'Irish cream and chocolate cheesecake' - ) - - def test_total_time(self): - self.assertEqual( - 130, - self.harvester_class.total_time() - ) - - def test_ingredients(self): - self.assertListEqual( - [ - '100g/3½oz butter', - '250g/8¾oz digestive biscuits, crushed', - '600g/1lb 5oz cream cheese', - '25ml/1fl oz Baileys or other Irish cream liqueur', - '100ml/3½oz icing sugar', - '300ml/10½oz double cream, whipped', - '100g/3½oz grated chocolate', - '200ml/7¼oz double cream, whipped', - 'cocoa powder, to dust' - ], - self.harvester_class.ingredients() - ) - - def test_instructions(self): - return self.assertEqual( - 'Melt the butter in a pan and add the crushed digestive biscuits. Mix well until the biscuits have absorbed all the butter.\nRemove from the heat and press into the bottom of a lined 18cm/7in springform tin. Place in the refrigerator and allow to set for one hour.\nMeanwhile, prepare the filling. Lightly whip the cream cheese then beat in the Irish cream and icing sugar. Fold in the whipped cream and grated chocolate. When smooth, spoon evenly onto the biscuits.\nRefrigerate and allow to set for a further two hours. Once set, remove and decorate with whipped cream and cocoa powder dusted over the top. Serve.', - self.harvester_class.instructions() - ) diff --git a/recipe_scrapers/tests/test_bbcgoodfood.py b/recipe_scrapers/tests/test_bbcgoodfood.py deleted file mode 100644 index 5e86aa90c..000000000 --- a/recipe_scrapers/tests/test_bbcgoodfood.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 -import os -import unittest - -from recipe_scrapers.bbcgoodfood import BBCGoodFood - - -class TestBBCGoodFoodScraper(unittest.TestCase): - def setUp(self): - # tests are run from tests.py - with open(os.path.join( - os.getcwd(), - 'recipe_scrapers', - 'tests', - 'test_data', - 'bbc_good_food.html' - )) as file_opened: - self.harvester_class = BBCGoodFood(file_opened, test=True) - - def test_host(self): - self.assertEqual( - 'bbcgoodfood.com', - self.harvester_class.host() - ) - - def test_title(self): - self.assertEqual( - self.harvester_class.title(), - 'Cookie Monster cupcakes' - ) - - def test_total_time(self): - self.assertEqual( - 0, - self.harvester_class.total_time() - ) - - def test_ingredients(self): - self.assertListEqual( - - [ - '12 Cupcakes (I used the Lemon and Poppyseed recipe on this website and substituted vanilla for the lemon and poppyseeds)', - 'Frosting', - 'The frosting I made using a vegetable shortening (in Aus its called So Lite, I think in the US Crisco is similar) and icing sugar mixture. You could use butter cream but the frosting is pure white and colours beautifully. You would need to double the buttercream you usually use for 12 cupcakes', - 'Blue colouring - use a good one for the best colour', - 'Coconut - dyed blue to match the frosting', - '12 Choc chip cookies', - 'Melted white and dark chocolate to make the eyes.' - ], - self.harvester_class.ingredients() - ) - - def test_instructions(self): - return self.assertEqual( - "I used an icecream scoop full of frosting to get the right shape and then dipped in it coconut. Press the coconut on and tidy up the shape.\nWhen the frosting has firmed a little cut a slice out near the bottom and push a cookie in (you may have to trim the cookie little depending on the size of your cookies). Or make his mouth further up and put cookie pieces around it.\nThe eyes I made by using melted white to make a circle and then dark chocolate in the middle. Don't make them all exactly the same - it adds a bit of character to the finished cakes!\nSimple - very messy and lots of fun!", - self.harvester_class.instructions() - ) diff --git a/recipe_scrapers/tests/test_closetcooking.py b/recipe_scrapers/tests/test_closetcooking.py deleted file mode 100644 index 3c9266f36..000000000 --- a/recipe_scrapers/tests/test_closetcooking.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 -import os -import unittest - -from recipe_scrapers.closetcooking import ClosetCooking - - -class TestClosetCooking(unittest.TestCase): - def setUp(self): - # tests are run from tests.py - with open(os.path.join( - os.getcwd(), - 'recipe_scrapers', - 'tests', - 'test_data', - 'closetcooking.html' - )) as file_opened: - self.harvester_class = ClosetCooking(file_opened, test=True) - - def test_host(self): - self.assertEqual( - 'closetcooking.com', - self.harvester_class.host() - ) - - def test_title(self): - self.assertEqual( - self.harvester_class.title(), - 'Bacon Wrapped Jalapeno Popper Stuffed Chicken' - ) - - def test_total_time(self): - self.assertEqual( - 40, - self.harvester_class.total_time() - ) - - def test_ingredients(self): - self.assertListEqual( - [ - '4 (6 ounce) chicken breasts, pounded thin', - 'salt and pepper to taste', - '4 jalapenos, diced', - '4 ounces cream cheese, room temperature', - '1 cup cheddar cheese, shredded', - '8 slices bacon' - ], - self.harvester_class.ingredients() - ) - - def test_instructions(self): - return self.assertEqual( - 'Lay the chicken flat, season both sides with salt and pepper, place 1/4 of the mixture of the jalapenos, cream cheese and cheddar on the chicken and roll them up.\nWrap each chicken breast up in 2 slices of bacon and place them in a baking dish on a wire rack.\nBake in a pre-heated 400F/200C oven until cooked, about 25-35 minutes.', - self.harvester_class.instructions() - ) diff --git a/recipe_scrapers/tests/test_hundredandonecookbooks.py b/recipe_scrapers/tests/test_hundredandonecookbooks.py deleted file mode 100644 index 0f4d1df96..000000000 --- a/recipe_scrapers/tests/test_hundredandonecookbooks.py +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 -import os -import unittest - -from recipe_scrapers.hundredandonecookbooks import HundredAndOneCookbooks - - -class TestHundredAndOneCookbooksScraper(unittest.TestCase): - def setUp(self): - # tests are run from tests.py - with open(os.path.join( - os.getcwd(), - 'recipe_scrapers', - 'tests', - 'test_data', - '101cookbooks.html' - )) as file_opened: - self.harvester_class = HundredAndOneCookbooks(file_opened, test=True) - - def test_host(self): - self.assertEqual( - '101cookbooks.com', - self.harvester_class.host() - ) - - def test_title(self): - self.assertEqual( - self.harvester_class.title(), - "Nikki's Healthy Cookies Recipe" - ) - - def test_total_time(self): - self.assertEqual( - 0, - self.harvester_class.total_time() - ) - - def test_ingredients(self): - self.assertListEqual( - [ - '3 large, ripe bananas, well mashed (about 1 1/2 cups)', - '1 teaspoon vanilla extract', - "1/4 cup coconut oil, barely warm - so it isn't solid (or alternately, olive oil)", - '2 cups rolled oats', - '2/3 cup almond meal', - '1/3 cup coconut, finely shredded & unsweetened', - '1/2 teaspoon cinnamon', - '1/2 teaspoon fine grain sea salt', - '1 teaspoon baking powder', - '6 - 7 ounces chocolate chips or dark chocolate bar chopped' - ], - self.harvester_class.ingredients() - ) - - def test_instructions(self): - return self.assertEqual( - "Preheat oven to 350 degrees, racks in the top third.\nIn a large bowl combine the bananas, vanilla extract, and coconut oil. Set aside. In another bowl whisk together the oats, almond meal, shredded coconut, cinnamon, salt, and baking powder. Add the dry ingredients to the wet ingredients and stir until combined. Fold in the chocolate chunks/chips.The dough is a bit looser than a standard cookie dough, don't worry about it. Drop dollops of the dough, each about 2 teaspoons in size, an inch apart, onto a parchment (or Silpat) lined baking sheet. Bake for 12 - 14 minutes. I baked these as long as possible without burning the bottoms and they were perfect - just shy of 15 minutes seems to be about right in my oven.\nMakes about 3 dozen bite-sized cookies.\nPrint Recipe", - self.harvester_class.instructions() - ) diff --git a/recipe_scrapers/tests/test_paninihappy.py b/recipe_scrapers/tests/test_paninihappy.py deleted file mode 100644 index 49e09260a..000000000 --- a/recipe_scrapers/tests/test_paninihappy.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 -import os -import unittest - -from recipe_scrapers.paninihappy import PaniniHappy - - -class TestPaniniHappyScraper(unittest.TestCase): - def setUp(self): - # tests are run from tests.py - with open(os.path.join( - os.getcwd(), - 'recipe_scrapers', - 'tests', - 'test_data', - 'paninihappy.html' - )) as file_opened: - self.harvester_class = PaniniHappy(file_opened, test=True) - - def test_host(self): - self.assertEqual( - 'paninihappy.com', - self.harvester_class.host() - ) - - def test_title(self): - self.assertEqual( - self.harvester_class.title(), - 'Grilled Mac & Cheese with BBQ Pulled Pork' - ) - - def test_total_time(self): - self.assertEqual( - 30, - self.harvester_class.total_time() - ) - - def test_ingredients(self): - self.assertListEqual( - [ - '4 tablespoons unsalted butter, divided', - '4 cups prepared macaroni and cheese, warmed', - '2 onions, thinly sliced', - 'Kosher salt and freshly ground pepper', - '1 cup barbecue sauce', - '2 cups prepared pulled pork', - '8 slices sourdough bread', - '12 slices sharp cheddar cheese (about 6 ounces)' - ], - self.harvester_class.ingredients() - ) - - def test_instructions(self): - return self.assertEqual( - 'Spread the macaroni and cheese in an 8-inch-square baking dish to about 3/4 inch thick. Cover with plastic wrap and chill until firm, about 45 minutes. Cut the macaroni and cheese into squares that are slightly smaller than the bread slices.\nMeanwhile, melt 2 tablespoons butter in a skillet over medium heat. Add the onions and cook, stirring, until caramelized, about 20 minutes. Season with salt and pepper.\nCombine the barbecue sauce and pulled pork in a saucepan over low heat and cook until warmed through, about 5 minutes.\nPreheat the panini grill to medium-high heat.\nMelt the remaining 2 tablespoons butter and brush on one side of each bread slice. Flip over half of the bread slices; layer 1 slice of cheddar, 1 macaroni-and-cheese square and another slice of cheddar on each. Top each with one-quarter of the pulled pork and caramelized onions and another slice of cheddar. Top with the remaining bread slices, buttered-side up.\nWorking in batches, cook the sandwiches until the cheese melts and the bread is golden, about 5 minutes.', - self.harvester_class.instructions() - ) diff --git a/recipe_scrapers/tests/test_realsimple.py b/recipe_scrapers/tests/test_realsimple.py deleted file mode 100644 index 77ab09318..000000000 --- a/recipe_scrapers/tests/test_realsimple.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 -import os -import unittest - -from recipe_scrapers.realsimple import RealSimple - - -class TestRealSimpleScraper(unittest.TestCase): - def setUp(self): - # tests are run from tests.py - with open(os.path.join( - os.getcwd(), - 'recipe_scrapers', - 'tests', - 'test_data', - 'realsimple.html' - )) as file_opened: - self.harvester_class = RealSimple(file_opened, test=True) - - def test_host(self): - self.assertEqual( - 'realsimple.com', - self.harvester_class.host() - ) - - def test_title(self): - self.assertEqual( - self.harvester_class.title(), - 'Pan-Roasted Chicken With Lemon-Garlic Green Beans' - ) - - def test_total_time(self): - self.assertEqual( - 75, - self.harvester_class.total_time() - ) - - def test_ingredients(self): - self.assertListEqual( - [ - 'tablespoons olive oil', - '2 lemons, 1 thinly sliced, 1 juiced', - '4 cloves garlic, minced', - 'teaspoon kosher salt', - 'teaspoon freshly ground black pepper', - 'pound trimmed green beans', - '8 small red potatoes, quartered', - '4 chicken breasts (bones left in, with skin, about 3 1/4 pounds)' - ], - self.harvester_class.ingredients() - ) - - def test_instructions(self): - self.assertEqual( - 'Preheat oven to 450°F. Coat a large baking dish or cast-iron skillet with 1 tablespoon of the olive oil. Arrange the lemon slices in a single layer in the bottom of the dish or skillet.\nIn a large bowl, combine the remaining oil, lemon juice, garlic, salt, and pepper; add the green beans and toss to coat. Using a slotted spoon or tongs, remove the green beans and arrange them on top of the lemon slices. Add the potatoes to the same olive-oil mixture and toss to coat. Using a slotted spoon or tongs, arrange the potatoes along the inside edge of the dish or skillet on top of the green beans. Place the chicken in the same bowl with the olive-oil mixture and coat thoroughly. Place the chicken, skin-side up, in the dish or skillet. Pour any of the remaining olive-oil mixture over the chicken.\nRoast for 50 minutes. Remove the chicken from the dish or skillet. Place the beans and potatoes back in oven for 10 minutes more or until the potatoes are tender. Place a chicken breast on each of 4 serving plates; divide the green beans and potatoes equally. Serve warm.', - self.harvester_class.instructions() - ) diff --git a/recipe_scrapers/tests/test_thepioneerwoman.py b/recipe_scrapers/tests/test_thepioneerwoman.py deleted file mode 100644 index c211aa84a..000000000 --- a/recipe_scrapers/tests/test_thepioneerwoman.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 -import os -import unittest - -from recipe_scrapers.thepioneerwoman import ThePioneerWoman - - -class TestThePioneerWomanScraper(unittest.TestCase): - def setUp(self): - # tests are run from tests.py - with open(os.path.join( - os.getcwd(), - 'recipe_scrapers', - 'tests', - 'test_data', - 'thepioneerwoman.html' - )) as file_opened: - self.harvester_class = ThePioneerWoman(file_opened, test=True) - - def test_host(self): - self.assertEqual( - 'thepioneerwoman.com', - self.harvester_class.host() - ) - - def test_title(self): - self.assertEqual( - self.harvester_class.title(), - 'Patty Melts' - ) - - def test_total_time(self): - self.assertEqual( - 35, - self.harvester_class.total_time() - ) - - def test_ingredients(self): - self.assertListEqual( - [ - '1 stick Butter', - '1 whole Large Onion, Halved And Sliced', - '1-1/2 pound Ground Beef', - 'Salt And Pepper, to taste', - '5 dashes Worcestershire Sauce', - '8 slices Swiss Cheese', - '8 slices Rye Bread' - ], - self.harvester_class.ingredients() - ) - - def test_instructions(self): - return self.assertEqual( - 'In a medium skillet, melt 2 tablespoons of butter over medium-low heat.\n Throw in the sliced onions and cook slowly for 20 to 25 minutes, stirring occasionally, until the onions are golden brown and soft.\n In a medium bowl, mix together the ground beef, salt & pepper, and Worcestershire.\n Form into 4 patties.\nMelt 2 tablespoons butter in a separate skillet over medium heat.\n Cook the patties on both sides until totally done in the middle.\n Assemble patty melts this way: Slice of bread, slice of cheese, hamburger patty, 1/4 of the cooked onions, another slice of cheese, and another slice of bread.\n On a clean griddle or in a skillet, melt 2 tablespoons butter and grill the sandwiches over medium heat until golden brown.\n Remove the sandwiches and add the remaining 2 tablespoons of butter to the skillet.\n Turn the sandwiches to the skillet, flipping them to the other side.\n Cook until golden brown and crisp, and until cheese is melted.\n Slice in half and serve immediately!', - self.harvester_class.instructions() - ) diff --git a/recipe_scrapers/thepioneerwoman.py b/recipe_scrapers/thepioneerwoman.py deleted file mode 100644 index 0b8577e1e..000000000 --- a/recipe_scrapers/thepioneerwoman.py +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 -from ._abstract import AbstractScraper -from ._utils import get_minutes, normalize_string - - -class ThePioneerWoman(AbstractScraper): - - @classmethod - def host(self): - return 'thepioneerwoman.com' - - def title(self): - return self.soup.find('h3', {'class': 'recipe-title'}).get_text() - - def total_time(self): - return sum([ - get_minutes(dd) - for dd in self.soup.find('div', {'class': 'recipe-summary-time'}).findAll('dd') - ]) - - def ingredients(self): - ingredients_html = self.soup.find('ul', {'class': "list-ingredients"}).findAll('li') - - return [ - normalize_string(ingredient.get_text()) - for ingredient in ingredients_html - ] - - def instructions(self): - instructions_html = self.soup.findAll('div', {'class': 'panel-body'})[-1] - - return normalize_string(instructions_html.get_text()).replace('.', '.\n') diff --git a/tests.py b/tests.py index daeab6c02..9f378f7fd 100644 --- a/tests.py +++ b/tests.py @@ -2,23 +2,16 @@ # encoding: utf-8 from recipe_scrapers.tests.test_allrecipes import * -from recipe_scrapers.tests.test_bbcfood import * -from recipe_scrapers.tests.test_bbcgoodfood import * from recipe_scrapers.tests.test_bonappetit import * -from recipe_scrapers.tests.test_closetcooking import * from recipe_scrapers.tests.test_cookstr import * from recipe_scrapers.tests.test_epicurious import * from recipe_scrapers.tests.test_finedininglovers import * from recipe_scrapers.tests.test_foodrepublic import * -from recipe_scrapers.tests.test_hundredandonecookbooks import * from recipe_scrapers.tests.test_jamieoliver import * from recipe_scrapers.tests.test_mybakingaddiction import * -from recipe_scrapers.tests.test_paninihappy import * -from recipe_scrapers.tests.test_realsimple import * from recipe_scrapers.tests.test_simplyrecipes import * from recipe_scrapers.tests.test_steamykitchen import * from recipe_scrapers.tests.test_tastykitchen import * -from recipe_scrapers.tests.test_thepioneerwoman import * from recipe_scrapers.tests.test_thevintagemixer import * from recipe_scrapers.tests.test_twopeasandtheirpod import * from recipe_scrapers.tests.test_whatsgabycooking import * From e100d75f837b82c90cb0183f60296714cf7ef5ae Mon Sep 17 00:00:00 2001 From: "ryan.noelk" Date: Wed, 5 Apr 2017 22:15:37 -0400 Subject: [PATCH 09/10] updating comment and scripts --- README.md | 17 ++++++----------- example.py | 10 ++++++++++ run_tester.py | 9 --------- setup.py | 2 +- 4 files changed, 17 insertions(+), 21 deletions(-) create mode 100755 example.py delete mode 100755 run_tester.py diff --git a/README.md b/README.md index d81511761..5c3f7ead3 100644 --- a/README.md +++ b/README.md @@ -5,23 +5,18 @@ A simple web scraping tool for recipe sites I use in a project of mine that makes sense to live as a separate package. - pip install git+git://github.com/RyanNoelk/recipe-scraper.git + pip install git+git://github.com/RyanNoelk/recipe-scraper.git@1.0.3 then: from recipe_scrapers import scrap_me # give the url as a string, it can be url from any site listed below - scrap_me = scrap_me('http://allrecipes.com/Recipe/Apple-Cake-Iv/Detail.aspx') - - scrap_me.title() - scrap_me.total_time() - scrap_me.ingredients() - scrap_me.instructions() - - or - - scrap_me.data() + try: + scrap_me = scrap_me('https://www.budgetbytes.com/2017/03/lemon-garlic-roasted-chicken') + print(scrap_me.data()) + except KeyError: + print 'Website is not supported.' ### Contribute diff --git a/example.py b/example.py new file mode 100755 index 000000000..e7562bf25 --- /dev/null +++ b/example.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python +# encoding: utf-8 + +from recipe_scrapers import scrap_me + +try: + scrap_me = scrap_me('https://www.budgetbytes.com/2017/03/lemon-garlic-roasted-chicken') + print(scrap_me.data()) +except KeyError: + print "Website is not supported." diff --git a/run_tester.py b/run_tester.py deleted file mode 100755 index 154af2842..000000000 --- a/run_tester.py +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 - -from recipe_scrapers import scrap_me - -# give the url as a string, it can be url from any site listed below -scrap_me = scrap_me('https://www.budgetbytes.com/2017/03/lemon-garlic-roasted-chicken') - -print(scrap_me.data()) diff --git a/setup.py b/setup.py index 4fc8995b0..8e63eae11 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( name='openeats-recipe-scraper', url='https://github.com/RyanNoelk/recipe-scraper/', - version='1.0.2', + version='1.0.3', description='Python package, scraping recipes from all over the internet', keywords='python recipes scraper harvest', long_description=README, From feabea3260b11a9570be7da614105d2bc6b6c031 Mon Sep 17 00:00:00 2001 From: "ryan.noelk" Date: Wed, 5 Apr 2017 23:06:41 -0400 Subject: [PATCH 10/10] adding allrecipes --- example.py | 5 ++-- recipe_scrapers/__init__.py | 26 +++++++++---------- recipe_scrapers/allrecipes.py | 46 ++++++++++++++++++++++++++++------ recipe_scrapers/budgetbytes.py | 2 -- 4 files changed, 54 insertions(+), 25 deletions(-) diff --git a/example.py b/example.py index e7562bf25..8aec580e6 100755 --- a/example.py +++ b/example.py @@ -1,10 +1,11 @@ #!/usr/bin/env python # encoding: utf-8 +from pprint import pprint from recipe_scrapers import scrap_me try: - scrap_me = scrap_me('https://www.budgetbytes.com/2017/03/lemon-garlic-roasted-chicken') - print(scrap_me.data()) + scrap_me = scrap_me('http://allrecipes.com/recipe/213742/meatball-nirvana/?internalSource=staff%20pick&referringId=80&referringContentType=recipe%20hub&clickId=cardslot%205') + pprint(scrap_me.data()) except KeyError: print "Website is not supported." diff --git a/recipe_scrapers/__init__.py b/recipe_scrapers/__init__.py index 1b0dc809d..ef856b9d8 100644 --- a/recipe_scrapers/__init__.py +++ b/recipe_scrapers/__init__.py @@ -21,20 +21,20 @@ SCRAPERS = { AllRecipes.host(): AllRecipes, - BonAppetit.host(): BonAppetit, + # BonAppetit.host(): BonAppetit, BudgetBytes.host(): BudgetBytes, - Cookstr.host(): Cookstr, - Epicurious.host(): Epicurious, - FineDiningLovers.host(): FineDiningLovers, - FoodRepublic.host(): FoodRepublic, - JamieOliver.host(): JamieOliver, - MyBakingAddiction.host(): MyBakingAddiction, - SimplyRecipes.host(): SimplyRecipes, - SteamyKitchen.host(): SteamyKitchen, - TastyKitchen.host(): TastyKitchen, - TheVintageMixer.host(): TheVintageMixer, - TwoPeasAndTheirPod.host(): TwoPeasAndTheirPod, - WhatsGabyCooking.host(): WhatsGabyCooking, + # Cookstr.host(): Cookstr, + # Epicurious.host(): Epicurious, + # FineDiningLovers.host(): FineDiningLovers, + # FoodRepublic.host(): FoodRepublic, + # JamieOliver.host(): JamieOliver, + # MyBakingAddiction.host(): MyBakingAddiction, + # SimplyRecipes.host(): SimplyRecipes, + # SteamyKitchen.host(): SteamyKitchen, + # TastyKitchen.host(): TastyKitchen, + # TheVintageMixer.host(): TheVintageMixer, + # TwoPeasAndTheirPod.host(): TwoPeasAndTheirPod, + # WhatsGabyCooking.host(): WhatsGabyCooking, } diff --git a/recipe_scrapers/allrecipes.py b/recipe_scrapers/allrecipes.py index 3fad766ed..7786472cb 100644 --- a/recipe_scrapers/allrecipes.py +++ b/recipe_scrapers/allrecipes.py @@ -1,5 +1,7 @@ #!/usr/bin/env python # encoding: utf-8 + +from fractions import Fraction from ._abstract import AbstractScraper from ._utils import get_minutes, normalize_string @@ -14,21 +16,49 @@ def title(self): return self.soup.find('h1').get_text() def total_time(self): - return get_minutes(self.soup.find('span', {'class': 'ready-in-time'})) + return { + 'prep-time': get_minutes(self.soup.find('time', {'itemprop': 'prepTime'})), + 'cook-time': get_minutes(self.soup.find('time', {'itemprop': 'totalTime'})) + } + + def servings(self): + return self.soup.find('span', {'ng-bind': 'adjustedServings'}).get_text() def ingredients(self): + ingredients_html = self.soup.findAll('span', {'class': "recipe-ingred_txt added"}) + ingredients = [] - return [ - normalize_string(ingredient.get_text()) - for ingredient in ingredients_html - if ingredient.get_text(strip=True) not in ('Add all ingredients to list', '') - ] + for ingredient in ingredients_html: + ingredient = normalize_string(ingredient.get_text()) + + try: + array = ingredient.split(' ', 2) + ingredient_dict = { + 'amount': round(float(sum(Fraction(s) for s in array[0].split())), 3), + 'type': array[1], + 'title': array[2] + } + except: + ingredient_dict = { + 'title': ingredient + } + + ingredients.append(ingredient_dict) + return ingredients def instructions(self): instructions_html = self.soup.findAll('span', {'class': 'recipe-directions__list--item'}) - return '\n'.join([ + return [ normalize_string(instruction.get_text()) for instruction in instructions_html - ]) + ] + + def description(self): + return normalize_string( + self.soup.find('div', {'class': 'submitter__description'}).get_text() + ) + + def image(self): + return self.soup.find('img', {'class': 'rec-photo'})["src"] diff --git a/recipe_scrapers/budgetbytes.py b/recipe_scrapers/budgetbytes.py index 43bffe691..e8eb5c990 100644 --- a/recipe_scrapers/budgetbytes.py +++ b/recipe_scrapers/budgetbytes.py @@ -1,7 +1,5 @@ #!/usr/bin/env python # encoding: utf-8 -#!/usr/bin/env python -# encoding: utf-8 import unicodedata from ._abstract import AbstractScraper