-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse_book.py
63 lines (45 loc) · 1.65 KB
/
parse_book.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from bs4 import BeautifulSoup
import sys
import urllib
import json
from google_currency import convert
ERROR_REQUEST = "There was an error sending request."
#Creates the soup
def createSoup(url):
try:
response = urllib.request.urlopen( url )
except urllib.error.HTTPError as e:
print( "HTTPError with: ", url, "\t", e )
return None
the_page = response.read()
soup = BeautifulSoup( the_page, 'lxml' )
return soup
def getData(soup):
json_data = soup.find( 'script', { 'type': 'application/ld+json' } )
return json.loads(json_data.contents[0])
def getBookTitle(data):
return data['name']
def getBookAuthor(data):
return ', '.join([author['name'] for author in data['author']])
def getBookPrice(data):
expectsAcceptanceOf = data['workExample']['potentialAction']['expectsAcceptanceOf']
real_offers = list(filter(lambda item: item.get('@type') == 'Offer', expectsAcceptanceOf))
#Get first available offer
if len(real_offers):
return { 'amount': real_offers[0]['price'], 'currency': real_offers[0]['priceCurrency'] }
if __name__ == '__main__':
args = sys.argv[1:]
if not args:
print(sys.stderr, 'SYNTAX: parse_book.py [book-id]')
sys.exit(-1)
url = 'https://play.google.com/store/books/details/?id=' + args[0]
soup = createSoup(url)
data = getData(soup)
title = getBookTitle(data)
price = getBookPrice(data)
author = getBookAuthor(data)
if title and price and author:
response = { 'title': title, 'author': author, 'price': price }
print(json.dumps(response))
else:
print(ERROR_REQUEST)