scrapinghub · further-reading · Nov 10, 2021 · Nov 10, 2021 · Nov 19, 2021 · Nov 19, 2021
diff --git a/scrapy_price_monitor/.gitignore b/scrapy_price_monitor/.gitignore
@@ -8,7 +8,6 @@ __pycache__/
 
 # Distribution / packaging
 .Python
-env/
 build/
 develop-eggs/
 dist/
@@ -20,9 +19,13 @@ lib64/
 parts/
 sdist/
 var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
+MANIFEST
 
 # PyInstaller
 #  Usually these files are written by a python script from a template
@@ -37,13 +40,16 @@ pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
+.nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
-*,cover
+*.cover
+*.py,cover
 .hypothesis/
+.pytest_cache/
 
 # Translations
 *.mo
@@ -52,6 +58,8 @@ coverage.xml
 # Django stuff:
 *.log
 local_settings.py
+db.sqlite3
+db.sqlite3-journal
 
 # Flask stuff:
 instance/
@@ -66,27 +74,58 @@ docs/_build/
 # PyBuilder
 target/
 
-# IPython Notebook
+# Jupyter Notebook
 .ipynb_checkpoints
 
+# IPython
+profile_default/
+ipython_config.py
+
 # pyenv
 .python-version
 
-# celery beat schedule file
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
 celerybeat-schedule
+celerybeat.pid
 
-# dotenv
-.env
+# SageMath parsed files
+*.sage.py
 
-# virtualenv
-.venv/
+# Environments
+.env
+.venv
+env/
 venv/
 ENV/
+env.bak/
+venv.bak/
 
 # Spyder project settings
 .spyderproject
+.spyproject
 
 # Rope project settings
 .ropeproject
 
-.scrapy
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+.idea
diff --git a/scrapy_price_monitor/README.md b/scrapy_price_monitor/README.md
@@ -2,7 +2,8 @@ Scrapy Price Monitor
 ====================
 
 This is a simple price monitor built with [Scrapy](https://github.com/scrapy/scrapy)
-and [Scrapy Cloud](https://scrapinghub.com/scrapy-cloud).
+and [Scrapy Cloud](https://www.zyte.com/scrapy-cloud/). It is an updated version of 
+[this sample](https://github.com/scrapinghub/sample-projects/tree/master/scrapy_price_monitor/_scrapy_price_monitor_OLD).
 
 It is basically a Scrapy project with one spider for each online retailer that
 we want to monitor prices from. In addition to the spiders, there's a Python
@@ -19,11 +20,6 @@ the already supported retailers, just add a new key for that product and add
 the URL list as its value, such as:
 
     {
-        "headsetlogitech": [
-            "https://www.amazon.com/.../B005GTO07O/",
-            "http://www.bestbuy.com/.../3436118.p",
-            "http://www.ebay.com/.../110985874014"
-        ],
         "NewProduct": [
             "http://url.for.retailer.x",
             "http://url.for.retailer.y",
@@ -34,36 +30,26 @@ the URL list as its value, such as:
 
 ## Supporting Further Retailers
 
-This project currently only works with 3 online retailers, and you can list them
-running:
+To add a retailer, just create a spider to handle the product pages from it.
+To include a spider for fake-website.com, you could run:
 
-    $ scrapy list
-    amazon.com
-    bestbuy.com
-    ebay.com
+    $ scrapy genspider fake-website.com fake-website.com
 
-If the retailer that you want to monitor is not yet supported, just create a spider
-to handle the product pages from it. To include a spider for samsclub.com, you
-could run:
+And then you can open the newly created `fake_website_com.py` file in your IDE to edit the file.
 
-    $ scrapy genspider samsclub.com samsclub.com
-
-And then, open the spider and add the extraction rules:
-
-    $ scrapy edit samsclub.com
-
-Have a look at the current spiders and implement the new ones using the same
+Have a look at the sample books.toscrape.com spider and implement the new ones using the same
 structure, subclassing `BaseSpider` instead of `scrapy.Spider`. This way, your
 spiders will automatically read the URLs list from `resources/urls.json`.
 
 
 ## Customizing the Price Monitor
 
-The price monitor sends an email using Amazon SES service, so to run it you
-have to set both `AWS_ACCESS_KEY` and `AWS_SECRET_KEY` variables in
-`price_monitor/settings.py`. If you want to use another email service,
-you have to rewrite the `send_email_alert` function in
-`price_monitor/bin/monitor.py`.
+The price monitor script uses an `send_alert` function in the `price_monitor/bin/alert.py` 
+file to send an alert.  The current sample sends an email using Amazon SES 
+service, so to run it you  have to set both `AWS_ACCESS_KEY` and `AWS_SECRET_KEY` 
+variables in the file, along with details for the email sender and intended recipient.
+If you want to use another email service or another form of alert altogether,
+you can rewrite this file and include an equivalent `send_alert` function.
 
 The price monitor can be further customized via parameters to the
 `price_monitor/bin/monitor.py` script. We will dig on those parameters
@@ -74,17 +60,17 @@ later when showing how to schedule the project on Scrapy Cloud.
 
 1. Clone this repo:
 
-        $ git clone [email protected]:stummjr/scrapy_price_monitor.git
+        $ git clone [email protected]:scrapinghub/sample-projects.git
 
 2. Enter the folder and install the project dependencies:
 
         $ cd scrapy_price_monitor
         $ pip install -r requirements.txt
 
-3. Create a free forever account on Scrapy Cloud:
-https://app.scrapinghub.com/account/signup/.
+3. Create an account on Zyte:  
+https://app.zyte.com/
 
-4. Create a Scrapy project on Scrapy Cloud and copy the project id from the project URL.
+4. Scroll to Scrapy Cloud Projects, select Creat Project take note of the project ID in the new project's url.
 
 5. Install [Scrapinghub command line tool (shub)](https://github.com/scrapinghub/shub):
 
@@ -141,9 +127,9 @@ To do that, first add your Scrapy Cloud project id to [settings.py `SHUB_PROJ_ID
 
 Then run the spiders via command line:
 
-    $ scrapy crawl bestbuy.com
+    $ scrapy crawl books.toscrape.com
 
-This will run the spider named as `bestbuy.com` and store the scraped data into
+This will run the spider named as `books.toscrape.com` and store the scraped data into
 a Scrapy Cloud collection, under the project you set in the last step.
 
 You can also run the price monitor via command line:

diff --git a/scrapy_price_monitor/bin/__init__.py b/scrapy_price_monitor/bin/__init__.py
diff --git a/scrapy_price_monitor/bin/alert.py b/scrapy_price_monitor/bin/alert.py
@@ -0,0 +1,30 @@
+# Below is sample code for sending alerts via an ASN Email service
+# If you wish to alert through another means such as slack, text, etc replace this section with the appropiate code
+
+import boto
+from jinja2 import Environment, PackageLoader
+
+from w3lib.html import remove_tags
+import logging
+logger = logging.getLogger(__name__)
+
+jinja_env = Environment(loader=PackageLoader('price_monitor', 'alert_template'))
+
+# settings for Amazon SES email service
+AWS_ACCESS_KEY = 'AWS_ACCESS_KEY'
+AWS_SECRET_KEY = 'AWS_ACCESS_KEY'
+EMAIL_ALERT_FROM = 'Price Monitor <[email protected]>'
+EMAIL_ALERT_TO = ['[email protected]']
+
+
+def send_alert(items):
+    ses = boto.connect_ses(AWS_ACCESS_KEY, AWS_SECRET_KEY)
+    html_body = jinja_env.get_template('email.html').render(items=items)
+
+    ses.send_email(
+        EMAIL_ALERT_FROM,
+        'Price drop alert',
+        remove_tags(html_body),
+        EMAIL_ALERT_TO,
+        html_body=html_body
+    )
diff --git a/scrapy_price_monitor/bin/monitor.py b/scrapy_price_monitor/bin/monitor.py
@@ -4,14 +4,10 @@
 import os
 from datetime import datetime, timedelta
 
-import boto
-from hubstorage import HubstorageClient
-from jinja2 import Environment, PackageLoader
 from price_monitor import settings
 from price_monitor.utils import get_product_names, get_retailers_for_product
-from w3lib.html import remove_tags
-
-jinja_env = Environment(loader=PackageLoader('price_monitor', 'templates'))
+from price_monitor.collection_helper import CollectionHelper
+from bin.alert import send_alert
 
 
 class DealsChecker(object):
@@ -42,8 +38,13 @@ class DealsFetcher(object):
 
     def __init__(self, product_name, apikey, project_id, hours):
         self.product_name = product_name
-        project = HubstorageClient(apikey).get_project(project_id)
-        self.item_store = project.collections.new_store(product_name)
+        collection = CollectionHelper(
+            proj_id=project_id,
+            collection_name=product_name,
+            api_key=apikey,
+            create=True,
+        )
+        self.item_store = collection.store
         self.load_items_from_last_n_hours(hours)
 
     def load_items_from_last_n_hours(self, n=24):
@@ -75,21 +76,8 @@ def get_deals(self):
         return latest_deals, previous_deals
 
 
-def send_email_alert(items):
-    ses = boto.connect_ses(settings.AWS_ACCESS_KEY, settings.AWS_SECRET_KEY)
-    html_body = jinja_env.get_template('email.html').render(items=items)
-
-    ses.send_email(
-        settings.EMAIL_ALERT_FROM,
-        'Price drop alert',
-        remove_tags(html_body),
-        settings.EMAIL_ALERT_TO,
-        html_body=html_body
-    )
-
-
 def main(args):
-    items = []
+    items = ['stuff']
     for prod_name in get_product_names():
         fetcher = DealsFetcher(prod_name, args.apikey, args.project, args.days * 24)
         checker = DealsChecker(*fetcher.get_deals(), args.threshold)
@@ -98,7 +86,7 @@ def main(args):
             items.append(best_deal)
 
     if items:
-        send_email_alert(items)
+        send_alert(items)
 
 
 def parse_args():

diff --git a/...onitor/price_monitor/templates/email.html → ...r/price_monitor/alert_template/email.html b/...onitor/price_monitor/templates/email.html → ...r/price_monitor/alert_template/email.html
@@ -11,4 +11,3 @@ <h1>🎉 Hey, we found a good deal! 🎁</h1>
 </td></tr>
 {% endfor %}
 </table>
-
Original file line number	Diff line number	Diff line change
Expand Up		@@ -11,4 +11,3 @@ <h1>🎉 Hey, we found a good deal! 🎁</h1>
		</td></tr>
		{% endfor %}
		</table>