-
Notifications
You must be signed in to change notification settings - Fork 0
/
proportionDistribution.R
40 lines (34 loc) · 1.03 KB
/
proportionDistribution.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
df<-read.csv("/home/ben/Dropbox/TPM Research Bootcamp/individual-test2-composition.csv", header=FALSE, row.names = 1)
head(df)
highest<-c()
all<-c()
for (i in seq(length(df[,1]))){
print(i)
topicNumber<-df[i, c(TRUE,FALSE) ] # rows)
topicNumber
proportion<-df[i, c(FALSE,TRUE) ] # rows
speechProp.df<-data.frame(topicNumber = t(topicNumber), proportion=t(proportion))
highest<-append(highest,(max(proportion)))
all <- append(all,unlist(proportion))
}
rownames(df)
rownames(speechProp.df)
colnames(speechProp.df)
date<- rownames(df)[1]
date<-strsplit(date, "/")
date[[1]]
date
words <- strsplit(date, "/")[[1]]
words[2]
date<-words[2]
date<-strsplit(date,"\.")[[1]]
date
high.df <- data.frame(topic, mp, year)
hist(highest, breaks = 50)
hist(all, breaks=100)
write.csv(highest,"highest.csv")
write.csv(all,"all.csv")
#df<-data.frame(topicNumber = t(topicNumber), weightedProportion=t(weightedProportion))
#rownames(df)<-t(topicNumber)
#colnames(df)<- c("topicNumber","proportion")
#df<-df[with(df ,order(-proportion)) ,]