Formatting updates

SEProjGrp5 · Nov 1, 2021 · 48fde02 · 48fde02
1 parent b05afe3
commit 48fde02
Show file tree

Hide file tree

Showing 4 changed files with 26 additions and 15 deletions.
diff --git a/src/formatter.py b/src/formatter.py
@@ -15,7 +15,7 @@
 from datetime import datetime
 import math
 
-def formatResult(website, titles, prices, links,ratings):
+def formatResult(website, titles, prices, links,ratings,df_flag):
     """
     The formatResult function takes the scraped HTML as input, and extracts the 
     necessary values from the HTML code. Ex. extracting a price '$19.99' from
@@ -24,18 +24,21 @@ def formatResult(website, titles, prices, links,ratings):
     title, price, link, rating = '', '', '', ''
     if titles: title = titles[0].get_text().strip()
     if prices: price = prices[0].get_text().strip()
-    #if links: link = links[0]['href']
+    if links: link = links[0]['href']
     if ratings: rating = ratings[0].get_text().strip().split()[0]
+    if df_flag==0: title=formatTitle(title)
+    if df_flag==0: link=formatTitle(link)
     product = {
         'timestamp': datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
-        "title": formatTitle(title),
+        "title": title,
         "price": price, 
-     #   "link":f'www.{website}.com{link}', 
+        "link":f'www.{website}.com{link}', 
         "website": website,
         "rating" : rating,
     }
     return product
 
+
 def sortList(arr, sortBy, reverse):
     """
     The sortList function is used to sort the products list based on the
@@ -64,6 +67,7 @@ def formatTitle(title):
         return title[:40] + "..."
     return title
 
+
 def getNumbers(st):
     """
     The getNumbers function extracts float values (price) from a string.

diff --git a/src/full_version.py b/src/full_version.py
@@ -1,5 +1,7 @@
 import json
 import os
+import pandas as pd
+import scraper
 
 class full_version:
 	def __init__(self):
@@ -13,6 +15,7 @@ def __init__(self):
 			"json", 
 			"user_data.json"
 			)
+		self.df=pd.DataFrame()
 
 
 	def login(self):
@@ -43,10 +46,13 @@ def extract_list(self):
 		pass
 
 	def scrape(self,prod):
-		products_1 = scraper.searchAmazon(prod)
-    	products_2 = scraper.searchWalmart(prod)
-    	products_3 = scraper.searchEtsy(prod)
-
+		products_1 = scraper.searchAmazon(prod,1)
+		products_2 = scraper.searchWalmart(prod,1)
+		products_3 = scraper.searchEtsy(prod,1)
+		results=products_1+products_2+products_3
+		#esults = formatter.sortList(results, "ra" , True)
+		self.df=pd.DataFrame.from_dict(results, orient='columns')
+		print(self.df)
 
 
 

diff --git a/src/scraper.py b/src/scraper.py
@@ -31,7 +31,7 @@ def httpsGet(URL):
     return BeautifulSoup(soup1.prettify(), "html.parser")
 
 
-def searchAmazon(query):
+def searchAmazon(query, df_flag=0):
     """
     The searchAmazon function scrapes amazon.com
     """
@@ -43,11 +43,11 @@ def searchAmazon(query):
     for res in results:
         titles, prices, links = res.select("h2 a span"), res.select("span.a-price span"), res.select("h2 a.a-link-normal")
         ratings = res.select("span.a-icon-alt")
-        product = formatter.formatResult("amazon",  titles, prices, links,ratings)
+        product = formatter.formatResult("amazon",  titles, prices, links,ratings, df_flag)
         products.append(product)
     return products
 
-def searchWalmart(query):
+def searchWalmart(query, df_flag=0):
     """
     The searchWalmart function scrapes walmart.com
     """
@@ -61,11 +61,11 @@ def searchWalmart(query):
     for res in results:
         titles, prices, links = res.select("span.lh-title"), res.select("div.lh-copy"), res.select("a")
         ratings = res.findAll("span",{"class":"w_Cj"},text=pattern)
-        product = formatter.formatResult("walmart", titles, prices, links,ratings)
+        product = formatter.formatResult("walmart", titles, prices, links,ratings, df_flag)
         products.append(product)
     return products
 
-def searchEtsy(query):
+def searchEtsy(query, df_flag=0):
     """
     The searchEtsy function scrapes Etsy.com
     """
@@ -79,6 +79,6 @@ def searchEtsy(query):
     for item in soup.select('.wt-grid__item-xs-6'):
         titles, prices, links = (item.select("h3")), (item.select(".currency-value")), (item.select('.width-full'))
         ratings = item.select('span.screen-reader-only')
-        product = formatter.formatResult("Etsy", titles, prices, links, ratings)
+        product = formatter.formatResult("Etsy", titles, prices, links, ratings, df_flag)
         products.append(product)
     return products
diff --git a/src/slash.py b/src/slash.py
@@ -14,6 +14,7 @@
 import os
 import csv
 import full_version
+import csv_writer
 
 
 
@@ -50,7 +51,7 @@ def main():
     print(tabulate(results, headers="keys", tablefmt="github"))
     print()
     print()
-    rint("CSV Saved at: ",args.cd)
+    print("CSV Saved at: ",args.cd)
     print("File Name:", csv_writer.write_csv(results_1, args.search, args.cd))
 
 if __name__ == '__main__':