diff --git a/README.md b/README.md index d81511761..5c3f7ead3 100644 --- a/README.md +++ b/README.md @@ -5,23 +5,18 @@ A simple web scraping tool for recipe sites I use in a project of mine that makes sense to live as a separate package. - pip install git+git://github.com/RyanNoelk/recipe-scraper.git + pip install git+git://github.com/RyanNoelk/recipe-scraper.git@1.0.3 then: from recipe_scrapers import scrap_me # give the url as a string, it can be url from any site listed below - scrap_me = scrap_me('http://allrecipes.com/Recipe/Apple-Cake-Iv/Detail.aspx') - - scrap_me.title() - scrap_me.total_time() - scrap_me.ingredients() - scrap_me.instructions() - - or - - scrap_me.data() + try: + scrap_me = scrap_me('https://www.budgetbytes.com/2017/03/lemon-garlic-roasted-chicken') + print(scrap_me.data()) + except KeyError: + print 'Website is not supported.' ### Contribute diff --git a/example.py b/example.py new file mode 100755 index 000000000..8aec580e6 --- /dev/null +++ b/example.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +# encoding: utf-8 + +from pprint import pprint +from recipe_scrapers import scrap_me + +try: + scrap_me = scrap_me('http://allrecipes.com/recipe/213742/meatball-nirvana/?internalSource=staff%20pick&referringId=80&referringContentType=recipe%20hub&clickId=cardslot%205') + pprint(scrap_me.data()) +except KeyError: + print "Website is not supported." diff --git a/recipe_scrapers/__init__.py b/recipe_scrapers/__init__.py index 1b0dc809d..ef856b9d8 100644 --- a/recipe_scrapers/__init__.py +++ b/recipe_scrapers/__init__.py @@ -21,20 +21,20 @@ SCRAPERS = { AllRecipes.host(): AllRecipes, - BonAppetit.host(): BonAppetit, + # BonAppetit.host(): BonAppetit, BudgetBytes.host(): BudgetBytes, - Cookstr.host(): Cookstr, - Epicurious.host(): Epicurious, - FineDiningLovers.host(): FineDiningLovers, - FoodRepublic.host(): FoodRepublic, - JamieOliver.host(): JamieOliver, - MyBakingAddiction.host(): MyBakingAddiction, - SimplyRecipes.host(): SimplyRecipes, - SteamyKitchen.host(): SteamyKitchen, - TastyKitchen.host(): TastyKitchen, - TheVintageMixer.host(): TheVintageMixer, - TwoPeasAndTheirPod.host(): TwoPeasAndTheirPod, - WhatsGabyCooking.host(): WhatsGabyCooking, + # Cookstr.host(): Cookstr, + # Epicurious.host(): Epicurious, + # FineDiningLovers.host(): FineDiningLovers, + # FoodRepublic.host(): FoodRepublic, + # JamieOliver.host(): JamieOliver, + # MyBakingAddiction.host(): MyBakingAddiction, + # SimplyRecipes.host(): SimplyRecipes, + # SteamyKitchen.host(): SteamyKitchen, + # TastyKitchen.host(): TastyKitchen, + # TheVintageMixer.host(): TheVintageMixer, + # TwoPeasAndTheirPod.host(): TwoPeasAndTheirPod, + # WhatsGabyCooking.host(): WhatsGabyCooking, } diff --git a/recipe_scrapers/allrecipes.py b/recipe_scrapers/allrecipes.py index 3fad766ed..7786472cb 100644 --- a/recipe_scrapers/allrecipes.py +++ b/recipe_scrapers/allrecipes.py @@ -1,5 +1,7 @@ #!/usr/bin/env python # encoding: utf-8 + +from fractions import Fraction from ._abstract import AbstractScraper from ._utils import get_minutes, normalize_string @@ -14,21 +16,49 @@ def title(self): return self.soup.find('h1').get_text() def total_time(self): - return get_minutes(self.soup.find('span', {'class': 'ready-in-time'})) + return { + 'prep-time': get_minutes(self.soup.find('time', {'itemprop': 'prepTime'})), + 'cook-time': get_minutes(self.soup.find('time', {'itemprop': 'totalTime'})) + } + + def servings(self): + return self.soup.find('span', {'ng-bind': 'adjustedServings'}).get_text() def ingredients(self): + ingredients_html = self.soup.findAll('span', {'class': "recipe-ingred_txt added"}) + ingredients = [] - return [ - normalize_string(ingredient.get_text()) - for ingredient in ingredients_html - if ingredient.get_text(strip=True) not in ('Add all ingredients to list', '') - ] + for ingredient in ingredients_html: + ingredient = normalize_string(ingredient.get_text()) + + try: + array = ingredient.split(' ', 2) + ingredient_dict = { + 'amount': round(float(sum(Fraction(s) for s in array[0].split())), 3), + 'type': array[1], + 'title': array[2] + } + except: + ingredient_dict = { + 'title': ingredient + } + + ingredients.append(ingredient_dict) + return ingredients def instructions(self): instructions_html = self.soup.findAll('span', {'class': 'recipe-directions__list--item'}) - return '\n'.join([ + return [ normalize_string(instruction.get_text()) for instruction in instructions_html - ]) + ] + + def description(self): + return normalize_string( + self.soup.find('div', {'class': 'submitter__description'}).get_text() + ) + + def image(self): + return self.soup.find('img', {'class': 'rec-photo'})["src"] diff --git a/recipe_scrapers/budgetbytes.py b/recipe_scrapers/budgetbytes.py index 43bffe691..e8eb5c990 100644 --- a/recipe_scrapers/budgetbytes.py +++ b/recipe_scrapers/budgetbytes.py @@ -1,7 +1,5 @@ #!/usr/bin/env python # encoding: utf-8 -#!/usr/bin/env python -# encoding: utf-8 import unicodedata from ._abstract import AbstractScraper diff --git a/setup.py b/setup.py index 4fc8995b0..8e63eae11 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( name='openeats-recipe-scraper', url='https://github.com/RyanNoelk/recipe-scraper/', - version='1.0.2', + version='1.0.3', description='Python package, scraping recipes from all over the internet', keywords='python recipes scraper harvest', long_description=README,