-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathproducts_crawl.py
49 lines (40 loc) · 2.01 KB
/
products_crawl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
import json
d=[]
class ProductsCrawlSpider(CrawlSpider):
name = 'products_crawl'
allowed_domains = ['www.midsouthshooterssupply.com']
start_urls = ['https://www.midsouthshooterssupply.com/dept/reloading/primers?itemsperpage=90']
rules = (
Rule(LinkExtractor(allow=r'item/'), callback='parse_item', follow=False),
)
def parse_item(self, response):
full_description=response.xpath('//section[@class="page-content"]/div[@id="description"]/b/text()').extract()
manufacturer=full_description[1].lower().split("by")[1]
status=response.xpath('//div[@class="product-info"]/span[@class="status"]/span/text()').extract()
stock=True
if status[0]=='Out of Stock':
stock=False
d.append({
'Title':response.xpath('/html/body/form/main/div/section/div[1]/div[1]/h1/text()').extract(),
'Price':response.xpath('//div[@class="product-info"]/div[@class="offer"]/span[@class="price"]/span/text()').extract(),
'full_Desc':full_description,
'Description':response.xpath('/html/body/form/main/div/section/div[2]/text()').extract(),
'status':stock,
'DeliveryInfo':response.xpath('//div[@id="delivery-info"]/ul/li/text()').extract(),
'manufacturer':manufacturer,
'review':response.xpath('/html/body/form/main/div/section/div[1]/div[3]/div[4]/div/section/div/div[1]/div/div[1]/div/div[2]').extract()
})
jdf=json.dumps(d)
with open('scrapy.json','w') as file:
file.write(jdf)
#'Price':response.xpath('/html/body/form/main/div/section/div[1]/div[3]/div[1]/span/span/text()').extract_first(),
# Price in dollars
# Description
# Review
# Delivery Info
# Title
# Stock status i.e. in-stock or out-stock. If in-stock then the value would true and for out-stock value should be false.
# Manufacturer i.e. Remington, Winchester, etc.