-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path0612_Amazon.R
108 lines (42 loc) · 1.71 KB
/
0612_Amazon.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
url <- 'https://www.amazon.com/Who-Moved-My-Cheese-Amazing/product-reviews/0399144463/ref=cm_cr_getr_d_paging_btm_prev_1?ie=UTF8&reviewerType=all_reviews&pageNumber=1'
library(xml2)
library(XML)
html <- read_html(url)
html.parsed <- htmlParse(html)
# product
product <- xpathSApply(html.parsed, "//h1/a[@data-hook='product-link']", xmlValue)
product
# author
xpathSApply(html.parsed, "//span[@class='a-profile-name']", xmlValue)
xpathSApply(html.parsed, "//span[@class='a-profile-name']", xmlValue)[-c(1,2)]
xpathSApply(html.parsed, "//div[@class='a-profile-content']/span[@class='a-profile-name']", xmlValue)
author <- xpathSApply(html.parsed, "//div[@class='a-section review aok-relative']//span[@class='a-profile-name']", xmlValue)
author
# date
date <- xpathSApply(html.parsed, "//span[@data-hook='review-date']", xmlValue)
date
date <- gsub("Reviewed.*on ", '' , date)
date
# transformation procedure
Sys.setlocale('LC_TIME', 'English')
?strptime
date <- as.Date(date, format='%B %d, %Y')
date
Sys.setlocale()
# quote
quote <- xpathSApply(html.parsed, "//a[@data-hook='review-title']/span", xmlValue)
quote
# review
review <- xpathSApply(html.parsed, "//span[@data-hook='review-body']", xmlValue)
review
# rating
rating <- xpathSApply(html.parsed, "//i[@data-hook='review-star-rating']/span", xmlValue)
rating
rating <- xpathSApply(html.parsed, "//i[@data-hook='review-star-rating']", xmlValue)
rating
# extraction
rating <- as.numeric(substr(rating, 1, 3))
rating
# helpful
helpful <- xpathSApply(html.parsed, "//span[@data-hook='helpful-vote-statement']", xmlValue)
helpful