-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmetadata.py
99 lines (81 loc) · 3.4 KB
/
metadata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
"""Metadata Parser document"""
import os
import sys
import json
import re
class metadata:
def __init__(self, productID, title, price, imURL, alsoBought, alsoViewed, boughtTogether, salesRank, categories):
self.productID = productID
self.title = title
self.price = price
self.imURL = imURL
self.alsoBought = alsoBought
self.alsoViewed = alsoViewed
self.boughtTogether = boughtTogether
self.salesRank = salesRank
self.categories = categories
# metrics
self.numAlsoBought = len(alsoBought)
self.numAlsoViewed = len(alsoViewed)
self.numAlsoBoughtTogether = len(boughtTogether)
def to_dict(self):
return {
'productID': self.x,
'title': self.y,
'price': self.price,
'imURL': self.imURL,
'alsoBought': self.alsoBought,
'alsoViewed': self.alsoViewed,
'boughtTogether': self.boughtTogether,
'salesRank': self.salesRank,
'categories': self.categories,
'numAlsoBought': self.numAlsoBought,
'numAlsoViewed': self.numAlsoViewed,
'numAlsoBoughtTogether': self.numAlsoBoughtTogether,
}
def getPrintString(self):
printString = "Reviewer ID: " + str(self.productID) + "\n"
printString += "Product ID: " + str(self.title) + "\n"
printString += "Up Votes: " + str(self.price) + "\n"
printString += "Total Votes: " + str(self.alsoBought) + "\n"
printString += "Unix Review Time: " + str(self.alsoViewed) + "\n"
printString += "Unix Review Time: " + str(self.boughtTogether) + "\n"
return printString
# General review data intake function
def readInMetadataDecision(fileName):
print("Returning Metadata Structure")
return readInMetadata(fileName)
# get the metadata
def readInMetadata(fileName):
metadataOut = {}
for line in open(fileName, 'r'):
# get json data from line of file
strictJSON = json.dumps(eval(line))
data = json.loads(strictJSON)
# parts of metadata
productID = data['asin']
title = data['title'] if 'title' in data else ""
price = float(data['price']) if 'price' in data else -1
imURL = data['imUrl'] if 'imUrl' in data else ""
relatedDict = data['related'] if 'related' in data else {}
alsoBought = relatedDict['also_bought'] if 'also_bought' in relatedDict else []
alsoViewed = relatedDict['also_viewed'] if 'also_viewed' in relatedDict else []
boughtTogether = relatedDict['bought_together'] if 'bought_together' in relatedDict else []
salesRank = data['salesRank'] if 'salesRank' in data else {}
salesRank = next(iter(salesRank.values())) if len(salesRank) else -1
categories = data['categories']
metaEntry = metadata(productID, title, price, imURL, alsoBought, alsoViewed, boughtTogether, salesRank, categories)
metadataOut[productID] = metaEntry
print("Metadata structure written")
return metadataOut
def main():
print('___Start of Metadata Function___')
fileName = 'meta_Cell_Phones_and_Accessories.json'
if len(sys.argv) == 2:
fileName = sys.argv[1]
print('running on file: ' + str(fileName) + '\n')
metadata = readInMetadataDecision(fileName)
return metadata
if __name__ == '__main__':
print('Main Function Beginning')
main()