Merge pull request #7 from RyanNoelk/dev

Dev
RyanNoelk · Apr 6, 2017 · 24a92b5 · 24a92b5
2 parents 231e2bf + 588e68d
commit 24a92b5
Show file tree

Hide file tree

Showing 6 changed files with 69 additions and 35 deletions.
diff --git a/README.md b/README.md
@@ -5,23 +5,18 @@
 A simple web scraping tool for recipe sites I use in a project of mine that makes sense to live as
 a separate package.
 
-    pip install git+git://github.com/RyanNoelk/recipe-scraper.git
+    pip install git+git://github.com/RyanNoelk/recipe-scraper.git@1.0.3
 
 then:
 
     from recipe_scrapers import scrap_me
 
     # give the url as a string, it can be url from any site listed below
-    scrap_me = scrap_me('http://allrecipes.com/Recipe/Apple-Cake-Iv/Detail.aspx')
-
-    scrap_me.title()
-    scrap_me.total_time()
-    scrap_me.ingredients()
-    scrap_me.instructions()
-
-    or
-
-    scrap_me.data()
+    try:
+        scrap_me = scrap_me('https://www.budgetbytes.com/2017/03/lemon-garlic-roasted-chicken')
+        print(scrap_me.data())
+    except KeyError:
+        print 'Website is not supported.'
 
 
 ### Contribute

diff --git a/example.py b/example.py
@@ -0,0 +1,11 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+from pprint import pprint
+from recipe_scrapers import scrap_me
+
+try:
+    scrap_me = scrap_me('http://allrecipes.com/recipe/213742/meatball-nirvana/?internalSource=staff%20pick&referringId=80&referringContentType=recipe%20hub&clickId=cardslot%205')
+    pprint(scrap_me.data())
+except KeyError:
+    print "Website is not supported."
diff --git a/recipe_scrapers/__init__.py b/recipe_scrapers/__init__.py
@@ -21,20 +21,20 @@
 
 SCRAPERS = {
     AllRecipes.host(): AllRecipes,
-    BonAppetit.host(): BonAppetit,
+    # BonAppetit.host(): BonAppetit,
     BudgetBytes.host(): BudgetBytes,
-    Cookstr.host(): Cookstr,
-    Epicurious.host(): Epicurious,
-    FineDiningLovers.host(): FineDiningLovers,
-    FoodRepublic.host(): FoodRepublic,
-    JamieOliver.host(): JamieOliver,
-    MyBakingAddiction.host(): MyBakingAddiction,
-    SimplyRecipes.host(): SimplyRecipes,
-    SteamyKitchen.host(): SteamyKitchen,
-    TastyKitchen.host(): TastyKitchen,
-    TheVintageMixer.host(): TheVintageMixer,
-    TwoPeasAndTheirPod.host(): TwoPeasAndTheirPod,
-    WhatsGabyCooking.host(): WhatsGabyCooking,
+    # Cookstr.host(): Cookstr,
+    # Epicurious.host(): Epicurious,
+    # FineDiningLovers.host(): FineDiningLovers,
+    # FoodRepublic.host(): FoodRepublic,
+    # JamieOliver.host(): JamieOliver,
+    # MyBakingAddiction.host(): MyBakingAddiction,
+    # SimplyRecipes.host(): SimplyRecipes,
+    # SteamyKitchen.host(): SteamyKitchen,
+    # TastyKitchen.host(): TastyKitchen,
+    # TheVintageMixer.host(): TheVintageMixer,
+    # TwoPeasAndTheirPod.host(): TwoPeasAndTheirPod,
+    # WhatsGabyCooking.host(): WhatsGabyCooking,
 }
 
 

diff --git a/recipe_scrapers/allrecipes.py b/recipe_scrapers/allrecipes.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python
 # encoding: utf-8
+
+from fractions import Fraction
 from ._abstract import AbstractScraper
 from ._utils import get_minutes, normalize_string
 
@@ -14,21 +16,49 @@ def title(self):
         return self.soup.find('h1').get_text()
 
     def total_time(self):
-        return get_minutes(self.soup.find('span', {'class': 'ready-in-time'}))
+        return {
+            'prep-time': get_minutes(self.soup.find('time', {'itemprop': 'prepTime'})),
+            'cook-time': get_minutes(self.soup.find('time', {'itemprop': 'totalTime'}))
+        }
+
+    def servings(self):
+        return self.soup.find('span', {'ng-bind': 'adjustedServings'}).get_text()
 
     def ingredients(self):
+
         ingredients_html = self.soup.findAll('span', {'class': "recipe-ingred_txt added"})
+        ingredients = []
 
-        return [
-            normalize_string(ingredient.get_text())
-            for ingredient in ingredients_html
-            if ingredient.get_text(strip=True) not in ('Add all ingredients to list', '')
-        ]
+        for ingredient in ingredients_html:
+            ingredient = normalize_string(ingredient.get_text())
+
+            try:
+                array = ingredient.split(' ', 2)
+                ingredient_dict = {
+                    'amount': round(float(sum(Fraction(s) for s in array[0].split())), 3),
+                    'type': array[1],
+                    'title': array[2]
+                }
+            except:
+                ingredient_dict = {
+                    'title': ingredient
+                }
+
+            ingredients.append(ingredient_dict)
+        return ingredients
 
     def instructions(self):
         instructions_html = self.soup.findAll('span', {'class': 'recipe-directions__list--item'})
 
-        return '\n'.join([
+        return [
             normalize_string(instruction.get_text())
             for instruction in instructions_html
-        ])
+        ]
+
+    def description(self):
+        return normalize_string(
+            self.soup.find('div', {'class': 'submitter__description'}).get_text()
+        )
+
+    def image(self):
+        return self.soup.find('img', {'class': 'rec-photo'})["src"]
diff --git a/recipe_scrapers/budgetbytes.py b/recipe_scrapers/budgetbytes.py
@@ -1,7 +1,5 @@
 #!/usr/bin/env python
 # encoding: utf-8
-#!/usr/bin/env python
-# encoding: utf-8
 
 import unicodedata
 from ._abstract import AbstractScraper

diff --git a/setup.py b/setup.py
@@ -10,7 +10,7 @@
 setup(
     name='openeats-recipe-scraper',
     url='https://github.com/RyanNoelk/recipe-scraper/',
-    version='1.0.2',
+    version='1.0.3',
     description='Python package, scraping recipes from all over the internet',
     keywords='python recipes scraper harvest',
     long_description=README,