Skip to content

Commit

Permalink
updating scrapers
Browse files Browse the repository at this point in the history
  • Loading branch information
RyanNoelk committed Jul 22, 2017
1 parent 01fff9c commit 5a8786f
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 17 deletions.
2 changes: 1 addition & 1 deletion example.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from recipe_scrapers import scrap_me

try:
scrap_me = scrap_me('http://allrecipes.com/recipe/213742/meatball-nirvana/?internalSource=staff%20pick&referringId=80&referringContentType=recipe%20hub&clickId=cardslot%205')
scrap_me = scrap_me('https://www.budgetbytes.com/2017/07/slow-cooker-sesame-beef/')
pprint(scrap_me.data())
except KeyError:
print "Website is not supported."
14 changes: 10 additions & 4 deletions recipe_scrapers/budgetbytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,13 @@ def total_time(self):
}

def servings(self):
return self.soup.find('span', {'itemprop': 'recipeYield'}).get_text()
try:
return self.soup.find('span', {'itemprop': 'recipeYield'}).get_text()
except:
return ''

def ingredients(self):
ingredients_html = self.soup.findAll('li', {'class': 'ingredient'})
ingredients_html = self.soup.findAll('li', {'class': 'wprm-recipe-ingredient'})
ingredients = []

for ingredient in ingredients_html:
Expand All @@ -49,7 +52,7 @@ def ingredients(self):
return ingredients

def instructions(self):
instructions_html = self.soup.findAll('li', {'class': 'instruction'})
instructions_html = self.soup.findAll('li', {'class': 'wprm-recipe-instruction'})

return [
normalize_string(instruction.get_text())
Expand All @@ -61,4 +64,7 @@ def description(self):
return li[0].get_text()

def image(self):
return self.soup.find('img', {'itemprop': 'image'})["src"]
try:
return self.soup.find('img', {'itemprop': 'image'})["src"]
except:
return ''
38 changes: 26 additions & 12 deletions recipe_scrapers/budgetbytesv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,27 @@ def title(self):
return self.soup.find('h1').get_text()

def total_time(self):
return {
'prep-time': self.soup.find(
try:
prep = self.soup.find(
'span',
{'class': 'wprm-recipe-prep_time-minutes'}
).get_text(),
'cook-time': self.soup.find(
).get_text()
except AttributeError:
prep = '0'
try:
cook = self.soup.find(
'span',
{'class': 'wprm-recipe-cook_time-minutes'}
).get_text()
}
except AttributeError:
cook = '0'
return {prep, cook}

def servings(self):
return self.soup.find('span', {'itemprop': 'recipeYield'}).get_text().split(' ', 1)[0]
try:
return self.soup.find('span', {'itemprop': 'recipeYield'}).get_text().split(' ', 1)[0]
except:
return ''

def ingredients(self):
ingredients_html = self.soup.findAll('li', {'class': 'wprm-recipe-ingredient'})
Expand All @@ -52,14 +60,14 @@ def ingredients(self):
}
except AttributeError:
ingredient_dict = {
'title': ingredient.find(
'title': normalize_string(ingredient.find(
'span',
{'class': 'wprm-recipe-ingredient-name'}
).get_text()
).get_text())
}
except:
ingredient_dict = {
'title': ingredient
'title': normalize_string(ingredient.get_text())
}

ingredients.append(ingredient_dict)
Expand All @@ -75,8 +83,14 @@ def instructions(self):
]

def description(self):
li = self.soup.find('article', {'class': 'post'}).findAll('p')
return li[0].get_text()
try:
li = self.soup.find('article', {'class': 'post'}).findAll('p')
return li[0].get_text()
except:
return ''

def image(self):
return self.soup.find('img', {'class': 'alignnone'})["src"]
try:
return self.soup.find('img', {'class': 'alignnone'})["src"]
except:
return ''

0 comments on commit 5a8786f

Please sign in to comment.