forked from 321k/Google-Trends
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Searches by state
64 lines (53 loc) · 2.56 KB
/
Searches by state
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
celebrities=c('Snooki', 'Paris Hilton', 'Nicole Polizzi', 'Miley Cyrus', 'Lindsay Lohan', 'Kris Jenner', 'Kourtney Kardashian', 'Kim Kardashian', 'Khloe Kardashian', 'Justin Bieber', 'Honey Boo Boo')
downloadDir="C:/Users/erik.johansson/Downloads"
scoringTable=function(data=data.frame()){
summary=as.data.frame(matrix(NA, nrow=min(1, (ncol(data)-1)), ncol=5))
for(i in 1:(ncol(data)-1)) summary[i,2]=data[which(data[,1]=="")[1]+2,i+1]
for(i in 1:(ncol(data)-1)) summary[i,3]=data[which(data[,1]=="")[2]-2,i+1]
summary[,3]=as.numeric(as.character(summary[,3]))
highScore=max(summary[,3])
roundWinner=summary[which(summary[,3]==highScore),1]
summary[,4]=summary[,3]/highScore
summary[which(summary[,3]==highScore),5]="Round winner"
summary[,1]=data[2,1]
return(summary)
}
mostGoogled=function(query, downloadDir){
country="US"
states=c('AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY')
states=states[4:14]
#Initialize the score board
scoreBoard=as.data.frame(matrix(NA, 0, 6)) #Main score board
for(i in 1:length(states)){
#roundWinner requires initial value
roundWinner=query[1]
for(j in seq(1,length(query), by=5)){
queries=roundWinner
for(k in 1:4) if(!is.na(query[j+k])) queries=paste(queries, "%2C ", query[j+k])
URL=paste("http://www.google.com/trends/trendsReport?hl=en-US", "&geo=", country, "-", states[i], "&q=", queries, "&cmpt=q&content=1&export=1", sep="")
#Get the file path for the csv
startingFiles=list.files(downloadDir)
endingFiles=list.files(downloadDir)
browseURL(URL)
while(length(setdiff(endingFiles,startingFiles))==0) {
Sys.sleep(3)
endingFiles=list.files(downloadDir)
}
filePath=setdiff(endingFiles,startingFiles)
#Read the csv
error_handler=tryCatch(
read.csv(paste(downloadDir, "/", filePath, sep=""), header=FALSE, blank.lines.skip=FALSE, stringsAsFactors=FALSE),
error=function(e) e
)
if(inherits(error_handler, "error")){
Sys.sleep(8)
}
trendsData=read.csv(paste(downloadDir, "/", filePath, sep=""), header=FALSE, blank.lines.skip=FALSE, stringsAsFactors=FALSE)
score=scoringTable(trendsData)
score[ncol(score)+1]=filePath
roundWinner=score[which(score[,5]=="Round winner"),2][1]
scoreBoard=rbind(scoreBoard, score)
}
}
}
mostGoogled(celebrities, downloadDir)