Skip to content

Commit

Permalink
Merge pull request #7 from RyanNoelk/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
RyanNoelk authored Apr 6, 2017
2 parents 231e2bf + 588e68d commit 24a92b5
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 35 deletions.
17 changes: 6 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,18 @@
A simple web scraping tool for recipe sites I use in a project of mine that makes sense to live as
a separate package.

pip install git+git://github.com/RyanNoelk/recipe-scraper.git
pip install git+git://github.com/RyanNoelk/recipe-scraper.git@1.0.3

then:

from recipe_scrapers import scrap_me

# give the url as a string, it can be url from any site listed below
scrap_me = scrap_me('http://allrecipes.com/Recipe/Apple-Cake-Iv/Detail.aspx')

scrap_me.title()
scrap_me.total_time()
scrap_me.ingredients()
scrap_me.instructions()

or

scrap_me.data()
try:
scrap_me = scrap_me('https://www.budgetbytes.com/2017/03/lemon-garlic-roasted-chicken')
print(scrap_me.data())
except KeyError:
print 'Website is not supported.'


### Contribute
Expand Down
11 changes: 11 additions & 0 deletions example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env python
# encoding: utf-8

from pprint import pprint
from recipe_scrapers import scrap_me

try:
scrap_me = scrap_me('http://allrecipes.com/recipe/213742/meatball-nirvana/?internalSource=staff%20pick&referringId=80&referringContentType=recipe%20hub&clickId=cardslot%205')
pprint(scrap_me.data())
except KeyError:
print "Website is not supported."
26 changes: 13 additions & 13 deletions recipe_scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,20 @@

SCRAPERS = {
AllRecipes.host(): AllRecipes,
BonAppetit.host(): BonAppetit,
# BonAppetit.host(): BonAppetit,
BudgetBytes.host(): BudgetBytes,
Cookstr.host(): Cookstr,
Epicurious.host(): Epicurious,
FineDiningLovers.host(): FineDiningLovers,
FoodRepublic.host(): FoodRepublic,
JamieOliver.host(): JamieOliver,
MyBakingAddiction.host(): MyBakingAddiction,
SimplyRecipes.host(): SimplyRecipes,
SteamyKitchen.host(): SteamyKitchen,
TastyKitchen.host(): TastyKitchen,
TheVintageMixer.host(): TheVintageMixer,
TwoPeasAndTheirPod.host(): TwoPeasAndTheirPod,
WhatsGabyCooking.host(): WhatsGabyCooking,
# Cookstr.host(): Cookstr,
# Epicurious.host(): Epicurious,
# FineDiningLovers.host(): FineDiningLovers,
# FoodRepublic.host(): FoodRepublic,
# JamieOliver.host(): JamieOliver,
# MyBakingAddiction.host(): MyBakingAddiction,
# SimplyRecipes.host(): SimplyRecipes,
# SteamyKitchen.host(): SteamyKitchen,
# TastyKitchen.host(): TastyKitchen,
# TheVintageMixer.host(): TheVintageMixer,
# TwoPeasAndTheirPod.host(): TwoPeasAndTheirPod,
# WhatsGabyCooking.host(): WhatsGabyCooking,
}


Expand Down
46 changes: 38 additions & 8 deletions recipe_scrapers/allrecipes.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/usr/bin/env python
# encoding: utf-8

from fractions import Fraction
from ._abstract import AbstractScraper
from ._utils import get_minutes, normalize_string

Expand All @@ -14,21 +16,49 @@ def title(self):
return self.soup.find('h1').get_text()

def total_time(self):
return get_minutes(self.soup.find('span', {'class': 'ready-in-time'}))
return {
'prep-time': get_minutes(self.soup.find('time', {'itemprop': 'prepTime'})),
'cook-time': get_minutes(self.soup.find('time', {'itemprop': 'totalTime'}))
}

def servings(self):
return self.soup.find('span', {'ng-bind': 'adjustedServings'}).get_text()

def ingredients(self):

ingredients_html = self.soup.findAll('span', {'class': "recipe-ingred_txt added"})
ingredients = []

return [
normalize_string(ingredient.get_text())
for ingredient in ingredients_html
if ingredient.get_text(strip=True) not in ('Add all ingredients to list', '')
]
for ingredient in ingredients_html:
ingredient = normalize_string(ingredient.get_text())

try:
array = ingredient.split(' ', 2)
ingredient_dict = {
'amount': round(float(sum(Fraction(s) for s in array[0].split())), 3),
'type': array[1],
'title': array[2]
}
except:
ingredient_dict = {
'title': ingredient
}

ingredients.append(ingredient_dict)
return ingredients

def instructions(self):
instructions_html = self.soup.findAll('span', {'class': 'recipe-directions__list--item'})

return '\n'.join([
return [
normalize_string(instruction.get_text())
for instruction in instructions_html
])
]

def description(self):
return normalize_string(
self.soup.find('div', {'class': 'submitter__description'}).get_text()
)

def image(self):
return self.soup.find('img', {'class': 'rec-photo'})["src"]
2 changes: 0 additions & 2 deletions recipe_scrapers/budgetbytes.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#!/usr/bin/env python
# encoding: utf-8
#!/usr/bin/env python
# encoding: utf-8

import unicodedata
from ._abstract import AbstractScraper
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
setup(
name='openeats-recipe-scraper',
url='https://github.com/RyanNoelk/recipe-scraper/',
version='1.0.2',
version='1.0.3',
description='Python package, scraping recipes from all over the internet',
keywords='python recipes scraper harvest',
long_description=README,
Expand Down

0 comments on commit 24a92b5

Please sign in to comment.