Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

consolidate scraping #12

Merged
merged 1 commit into from
Nov 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions src/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import formatter
from bs4 import BeautifulSoup
import re
import csv_writer
import csv


def httpsGet(URL):
Expand All @@ -31,7 +33,7 @@ def httpsGet(URL):
return BeautifulSoup(soup1.prettify(), "html.parser")


def searchAmazon(query, df_flag=0):
def searchAmazon(query, df_flag):
"""
The searchAmazon function scrapes amazon.com
"""
Expand All @@ -47,7 +49,7 @@ def searchAmazon(query, df_flag=0):
products.append(product)
return products

def searchWalmart(query, df_flag=0):
def searchWalmart(query, df_flag):
"""
The searchWalmart function scrapes walmart.com
"""
Expand All @@ -65,7 +67,7 @@ def searchWalmart(query, df_flag=0):
products.append(product)
return products

def searchEtsy(query, df_flag=0):
def searchEtsy(query, df_flag):
"""
The searchEtsy function scrapes Etsy.com
"""
Expand All @@ -82,3 +84,15 @@ def searchEtsy(query, df_flag=0):
product = formatter.formatResult("Etsy", titles, prices, links, ratings, df_flag)
products.append(product)
return products

def driver(product, num=None, df_flag=0,csv=False):
products_1 = searchAmazon(product,df_flag)
products_2 = searchWalmart(product,df_flag)
products_3 = searchEtsy(product,df_flag)
results=products_1+products_2+products_3
if csv==True:

print("CSV Saved at: ",args.cd)
print("File Name:", csv_writer.write_csv(results, args.search, args.cd))
return products_1[:num]+products_2[:num]+products_3[:num]

17 changes: 5 additions & 12 deletions src/slash.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import os
import csv
import full_version
import csv_writer




Expand All @@ -27,32 +27,25 @@ def main():
parser.add_argument('--link', action='store_true', help="Show links in the table")
parser.add_argument('--des', action='store_true', help="Sort in descending (non-increasing) order")
parser.add_argument('--cd', type=str, help="Change directory to save CSV file with search results", default=os.getcwd())
parser.add_argument('--csv', action='store_false', help="Save results as CSV",)
args = parser.parse_args()
if args.full=='T':

full_version.full_version().driver()
return

products_1 = scraper.searchAmazon(args.search)
products_2 = scraper.searchWalmart(args.search)
products_3 = scraper.searchEtsy(args.search)
results = scraper.driver(args.search,args.num,args.csv)


for sortBy in args.sort:
products1 = formatter.sortList(products_1, sortBy, args.des)[:args.num]
products2 = formatter.sortList(products_2, sortBy, args.des)[:args.num]
products3 = formatter.sortList(products_3, sortBy, args.des)[:args.num]
results = products1 + products2 + products3
results_1 = products_1 + products_2 + products_3
results = formatter.sortList(results, "ra" , args.des)
results = formatter.sortList(results, sortBy , args.des)


print(args.des)
print()
print(tabulate(results, headers="keys", tablefmt="github"))
print()
print()
print("CSV Saved at: ",args.cd)
print("File Name:", csv_writer.write_csv(results_1, args.search, args.cd))

if __name__ == '__main__':
main()