diff --git a/.gitignore b/.gitignore index af235a83..c1ca7bca 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,7 @@ dmypy.json # Pyre type checker .pyre/ + + +*.csv +user_data.json \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 00000000..26d33521 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 00000000..5064a650 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,14 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 00000000..105ce2da --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 00000000..2305f400 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 00000000..7ad07a6b --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/slash_new.iml b/.idea/slash_new.iml new file mode 100644 index 00000000..8e5446ac --- /dev/null +++ b/.idea/slash_new.iml @@ -0,0 +1,14 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 00000000..94a25f7f --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/README.md b/README.md index 9785e94d..74781d3d 100644 --- a/README.md +++ b/README.md @@ -183,11 +183,11 @@ python slash.py --search "philips hue" --num 5 - - - - - + + + + +

Shubham Mankar

Pratik Devnani


Moksh Jain


Rahil Sarvaiya


Anushi Keswani


Anant Gadodia

Anmolika Goyal


Shubhangi Jain


Shreya Karra


Srujana Rao

diff --git a/requirements.txt b/requirements.txt index 76792b38..a8032e3a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -34,4 +34,6 @@ urllib3==1.26.6 Werkzeug==1.0.1 wheel==0.37.0 zipp==3.5.0 -DateTime==4.3 \ No newline at end of file +DateTime==4.3 +lxml==4.6.3 +requests-oauthlib==1.3.0 diff --git a/src/csv_writer.py b/src/csv_writer.py new file mode 100644 index 00000000..41b3e654 --- /dev/null +++ b/src/csv_writer.py @@ -0,0 +1,19 @@ +import csv +from datetime import datetime +import os + + +def write_csv(arr,product,file_path): + ''' Returns the CSV file with the naming nomenclature as 'ProductDate_Time' + Parameters- product: product entered by the user, file_path: path where the csv needs to be stored + Returns- file_name: CSV file ''' + os.chdir(file_path) + keys = arr[0].keys() + now=datetime.now() + file_name=product+now.strftime("%m%d%y_%H%M")+'.csv' + a_file = open(file_name, "w", newline='') + dict_writer = csv.DictWriter(a_file, keys) + dict_writer.writeheader() + dict_writer.writerows(arr) + a_file.close() + return file_name diff --git a/src/formatter.py b/src/formatter.py index 11412cac..b1ad8e8e 100644 --- a/src/formatter.py +++ b/src/formatter.py @@ -15,25 +15,36 @@ from datetime import datetime import math -def formatResult(website, titles, prices, links): +def formatResult(website, titles, prices, links,ratings,df_flag, currency): """ The formatResult function takes the scraped HTML as input, and extracts the necessary values from the HTML code. Ex. extracting a price '$19.99' from a paragraph tag. """ - title, price, link = '', '', '' + + title, price, link, rating, converted_cur = '', '', '', '', '' if titles: title = titles[0].get_text().strip() if prices: price = prices[0].get_text().strip() + if '$' not in price: + price='$'+price if links: link = links[0]['href'] + if ratings: rating = ratings[0].get_text().strip().split()[0] + if df_flag==0: title=formatTitle(title) + if df_flag==0: link=formatTitle(link) + if currency: converted_cur = getCurrency(currency, price) product = { 'timestamp': datetime.now().strftime("%d/%m/%Y %H:%M:%S"), - "title": formatTitle(title), + "title": title, "price": price, - # "link":f'www.{website}.com{link}', + "link":f'www.{website}.com{link}', "website": website, + "rating" : rating, + "converted price": converted_cur } + return product + def sortList(arr, sortBy, reverse): """ The sortList function is used to sort the products list based on the @@ -43,7 +54,7 @@ def sortList(arr, sortBy, reverse): return sorted(arr, key=lambda x: getNumbers(x["price"]), reverse=reverse) # To-do: sort by rating elif sortBy == "ra": - # return sorted(arr, key=lambda x: getNumbers(x.price), reverse=reverse) + return sorted(arr, key=lambda x: getNumbers(x["rating"]), reverse=reverse) pass return arr @@ -62,6 +73,7 @@ def formatTitle(title): return title[:40] + "..." return title + def getNumbers(st): """ The getNumbers function extracts float values (price) from a string. @@ -75,4 +87,15 @@ def getNumbers(st): ans = float(ans) except: ans = math.inf - return ans \ No newline at end of file + return ans + +def getCurrency(currency, price): + + converted_cur = 0.0 + if len(price)>1 : + if currency == "inr": + converted_cur = 75 * int(price[(price.index("$")+1):price.index(".")].replace(",","")) + elif currency == "euro": + converted_cur = 1.16 * int(price[(price.index("$")+1):price.index(".")].replace(",","")) + converted_cur=currency.upper()+' '+str(converted_cur) + return converted_cur diff --git a/src/full_version.py b/src/full_version.py new file mode 100644 index 00000000..efee97bc --- /dev/null +++ b/src/full_version.py @@ -0,0 +1,105 @@ +import json +import os +import pandas as pd +import scraper + +class full_version: + def __init__(self): + self.data={} + self.name="" + self.email="" + self.user_data = os.path.join( + os.path.dirname( + os.path.dirname( + os.path.abspath(__file__))), + "json", + "user_data.json" + ) + self.user_list = os.path.join( + os.path.dirname( + os.path.dirname( + os.path.abspath(__file__))), + "csvs", + "user_list.csv" + ) + self.df=pd.DataFrame() + pd.set_option('display.max_rows', None) + pd.set_option('display.max_columns', None) + pd.set_option('display.width', None) + pd.set_option('display.max_colwidth', 40) + + + def login(self): + if not os.path.exists(self.user_data): + print("Welcome to Slash!") + print("Please enter the following information: ") + name=input("Name: ") + email=input("Email: ") + self.data['name']=name + self.data['email']=email + with open(self.user_data, 'w') as outfile: + json.dump(self.data, outfile) + self.name=name + self.email=email + else: + with open(self.user_data) as json_file: + data = json.load(json_file) + self.name=data['name'] + self.email=data['email'] + return self.name, self.email + + def search_fn(self): + prod=input("Enter name of product to Search: ") + self.scrape(prod) + ch=int(input("\n\nEnter 1 to save product to list \nelse enter any other key to continue")) + if ch==1: + indx=int(input("Enter row number of product to save: ")) + if indx