forked from AnupamKhare/code-Repository
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathR Usefull Code.txt
60 lines (34 loc) · 1.41 KB
/
R Usefull Code.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
############################ Text Analytics#####################################
### trimming the text
########### v is the variable with text data
library(stringr)
df3$activity_name<-str_trim(df3$activity_name, side=c('both'))
### tRemove alphanumeric
df3$v<-gsub("[^[:alnum:]' ]", "", df3$v)
### tRemove words with more that 3 characters
df3$v<-gsub('\\b\\w{1,3}\\b','', df3$v)
### tRemove white spaces within sentence
df3$v<-gsub('"\\s+"','', df3$v)
### tRemove all numbers###
df3$v<-gsub('[0-9]+','', df3$v)
##################### Removing Variable with Zeroor nearzero variance########
df_cat_dum<-createDummyFeatures(df, cols=c('v1','v2','v3'))
x = nearZeroVar(df_cat_dum)
df_cat_dum_final<-df_cat_dum[, -x]
#### Working With H20 on R########################
library(h2o)
h2o.init()
data.hex<-as.h2o(df_analytical_v2)
splits <- h2o.splitFrame(data.hex, seed = 1234,
destination_frames=c("train.hex", "test.hex"),
ratios = 0.75)
train <- splits[[1]]
test <- splits[[2]]
response <- "target"
predictors <- c("v1", "v2", "v3", "v4",)
data.gbm<-h2o.gbm(y = response, x = predictors, training_frame =
train,
validation_frame = test,
score_tree_interval = 10, ntrees = 500,
sample_rate = 0.8, col_sample_rate = 0.8, seed = 1234,distribution= "gaussian")
https://www.h2o.ai/wp-content/uploads/2018/01/RBooklet.pdf