From 5a8786fca225c98e88a46fa4e196a6bea14da124 Mon Sep 17 00:00:00 2001 From: Ryan Noelk Date: Sat, 22 Jul 2017 11:17:20 -0400 Subject: [PATCH] updating scrapers --- example.py | 2 +- recipe_scrapers/budgetbytes.py | 14 ++++++++---- recipe_scrapers/budgetbytesv2.py | 38 ++++++++++++++++++++++---------- 3 files changed, 37 insertions(+), 17 deletions(-) diff --git a/example.py b/example.py index 8aec580e6..64d062d80 100755 --- a/example.py +++ b/example.py @@ -5,7 +5,7 @@ from recipe_scrapers import scrap_me try: - scrap_me = scrap_me('http://allrecipes.com/recipe/213742/meatball-nirvana/?internalSource=staff%20pick&referringId=80&referringContentType=recipe%20hub&clickId=cardslot%205') + scrap_me = scrap_me('https://www.budgetbytes.com/2017/07/slow-cooker-sesame-beef/') pprint(scrap_me.data()) except KeyError: print "Website is not supported." diff --git a/recipe_scrapers/budgetbytes.py b/recipe_scrapers/budgetbytes.py index d862c132e..11c84a047 100644 --- a/recipe_scrapers/budgetbytes.py +++ b/recipe_scrapers/budgetbytes.py @@ -22,10 +22,13 @@ def total_time(self): } def servings(self): - return self.soup.find('span', {'itemprop': 'recipeYield'}).get_text() + try: + return self.soup.find('span', {'itemprop': 'recipeYield'}).get_text() + except: + return '' def ingredients(self): - ingredients_html = self.soup.findAll('li', {'class': 'ingredient'}) + ingredients_html = self.soup.findAll('li', {'class': 'wprm-recipe-ingredient'}) ingredients = [] for ingredient in ingredients_html: @@ -49,7 +52,7 @@ def ingredients(self): return ingredients def instructions(self): - instructions_html = self.soup.findAll('li', {'class': 'instruction'}) + instructions_html = self.soup.findAll('li', {'class': 'wprm-recipe-instruction'}) return [ normalize_string(instruction.get_text()) @@ -61,4 +64,7 @@ def description(self): return li[0].get_text() def image(self): - return self.soup.find('img', {'itemprop': 'image'})["src"] + try: + return self.soup.find('img', {'itemprop': 'image'})["src"] + except: + return '' diff --git a/recipe_scrapers/budgetbytesv2.py b/recipe_scrapers/budgetbytesv2.py index 1cc6ee406..f3cdcdfe4 100644 --- a/recipe_scrapers/budgetbytesv2.py +++ b/recipe_scrapers/budgetbytesv2.py @@ -16,19 +16,27 @@ def title(self): return self.soup.find('h1').get_text() def total_time(self): - return { - 'prep-time': self.soup.find( + try: + prep = self.soup.find( 'span', {'class': 'wprm-recipe-prep_time-minutes'} - ).get_text(), - 'cook-time': self.soup.find( + ).get_text() + except AttributeError: + prep = '0' + try: + cook = self.soup.find( 'span', {'class': 'wprm-recipe-cook_time-minutes'} ).get_text() - } + except AttributeError: + cook = '0' + return {prep, cook} def servings(self): - return self.soup.find('span', {'itemprop': 'recipeYield'}).get_text().split(' ', 1)[0] + try: + return self.soup.find('span', {'itemprop': 'recipeYield'}).get_text().split(' ', 1)[0] + except: + return '' def ingredients(self): ingredients_html = self.soup.findAll('li', {'class': 'wprm-recipe-ingredient'}) @@ -52,14 +60,14 @@ def ingredients(self): } except AttributeError: ingredient_dict = { - 'title': ingredient.find( + 'title': normalize_string(ingredient.find( 'span', {'class': 'wprm-recipe-ingredient-name'} - ).get_text() + ).get_text()) } except: ingredient_dict = { - 'title': ingredient + 'title': normalize_string(ingredient.get_text()) } ingredients.append(ingredient_dict) @@ -75,8 +83,14 @@ def instructions(self): ] def description(self): - li = self.soup.find('article', {'class': 'post'}).findAll('p') - return li[0].get_text() + try: + li = self.soup.find('article', {'class': 'post'}).findAll('p') + return li[0].get_text() + except: + return '' def image(self): - return self.soup.find('img', {'class': 'alignnone'})["src"] + try: + return self.soup.find('img', {'class': 'alignnone'})["src"] + except: + return ''