Skip to content

Commit

Permalink
Adding Selemix functions
Browse files Browse the repository at this point in the history
  • Loading branch information
pafrance authored and pafrance committed Dec 3, 2019
1 parent 4cf1d7e commit 68dd18f
Showing 1 changed file with 254 additions and 0 deletions.
254 changes: 254 additions & 0 deletions RScripts/selemix/IS2_selemix.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
# Copyright 2019 ISTAT
#
# Licensed under the EUPL, Version 1.1 or – as soon they will be approved by
# the European Commission - subsequent versions of the EUPL (the "Licence");
# You may not use this work except in compliance with the Licence. You may
# obtain a copy of the Licence at:
#
# http://ec.europa.eu/idabc/eupl5
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the Licence is distributed on an "AS IS" basis, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# Licence for the specific language governing permissions and limitations under
# the Licence.
#
# @author Francesco Amato <framato @ istat.it>
# @author Mauro Bruno <mbruno @ istat.it>
# @author Paolo Francescangeli <pafrance @ istat.it>
# @author Renzo Iannacone <iannacone @ istat.it>
# @author Stefano Macone <macone @ istat.it>
#
# @version 1.0.0
#
#eliminazione di ogni precedente elaborazione (pulizia memoria)
rm(list=ls())
#caricamento libreria
library("SeleMix")

# Lista Ruoli
#0 SKIP N VARIABILE NON UTILIZZATA
#1 IDENTIFICATIVO I CHIAVE OSSERVAZIONE
#2 TARGET Y VARIABILE DI OGGETTO DI ANALISI
#3 COVARIATA X VARIABILE INDIPENDENTE
#4 PREDIZIONE P VARIABILE DI PREDIZIONE
#5 OUTLIER O FLAG OUTLIER
#6 PESO W PESO CAMPIONARIO
#7 ERRORE E ERRORE INFLUENTE
#8 RANKING R INFLUENCE RANKING
#9 OUTPUT T VARIABILE DI OUTPUT
#10 STRATO S PARTIZIONAMENTO DEL DATASET
#11 PARAMETRI Z PARAMETRI DI ESERCIZIO
#12 MODELLO M MODELLO DATI
#13 SCORE F INFLUENCE SCORE
#14 REPORT G PARAMETRO DI OUTPUT / REPORT


# Lista oggetti Bridge Java - R
# SELEMIX_RESULTSET = "sel_out";
# SELEMIX_WORKSET = "workset";
# SELEMIX_RUOLI_VAR = "role_var";
# SELEMIX_RUOLI_VAR_OUTPUT = "role_var_out";
# SELEMIX_RUOLI_INPUT = "role_in";
# SELEMIX_RUOLI_OUTPUT = "ruol_out";
# SELEMIX_PARAMETRI = "array_par";
# SELEMIX_MODELLO = "array_mod";


### ESEMPI DI MODELLI MONO E MULTIVARIATI

#MODELLO MULTIVARIATO 2 VAR TARGET
#B <- c(1.78840493, -0.065592887, 0.74442347, -0.009121287, -0.04293598, 1.048079464)
#sigma <- c(0.17068817, 0.03489681, 0.03489681, 0.49760310)
#lambda <- 19.96269
#w <- 0.2122271

#MODELLO MULTIVARIATO CON 2 VAR TARGET SENZA COVAR
#B <- c(5.973958, 5.112587)
#sigma <- c(0.3867086, -0.2527695, -0.2527695, 1.2920025)
#lambda <- 11.205
#w <- 0.332

#MODELLO MONOVARIATO
#B <- c(-0.152, 1.215)
#sigma <- c(1.25)
#lambda <- 15.5
#w <- 0.0479

#MODELLO MONOVARIATO SENZA COVAR
#B <- c(-0.152)
#sigma <- c(1.25)
#lambda <- 15.5
#w <- 0.0479

# Impostazione manuale dataset
#workset <- read.csv2('C:/Users/pafrance/photon-workspace/Rscripts/input.strata.csv', dec=".", sep=";")

# imposta variabili di ruolo (specifica manualmente chi è target, covar ecc ecc)
#Y <- "Y1"
#X <- "X1"
#S <- "S1"


#stima completa
mlest <- function(workset, y, x=NULL, ...) {
#environment check
if(missing(y)) stop('iSS Error: Missing TARGET Variable(s)')
y <- matrix(as.numeric(workset[,Y]),ncol=length(Y),nrow=nrow(workset))
if(!missing(x)) x <- matrix(as.numeric(workset[,X]),ncol=length(X),nrow=nrow(workset))

#parameter check
if(!exists("model")) model="LN" #da controllare
if(!exists("t.outl")) t.outl=0.5
if(!exists("lambda")) lambda=3
if(!exists("w")) w=0.05
if(!exists("lambda.fix")) lambda.fix=FALSE
if(!exists("w.fix")) w.fix=FALSE
if(!exists("eps")) eps=1e-7
if(!exists("max.iter")) max.iter=500

#calcolo funzione
est <- ml.est(y=y, x=x, model = model, lambda= as.numeric(lambda), w= as.numeric(w), lambda.fix=lambda.fix, w.fix=w.fix, eps=as.numeric(eps), max.iter=as.numeric(max.iter), t.outl= as.numeric(t.outl), graph=FALSE)
if(length(workset)>1) ypred <- matrix(est$ypred,nrow=nrow(workset),ncol=length(Y))
else ypred <- as.matrix(est$ypred)

#reimpostazione nomi delle variabili
out <- data.frame(tau=est$tau, outlier=est$outlier, pattern=est$pattern)
predname = c()
out1 = c()
for(i in 1:ncol(ypred)) {
pred = ypred[,i]
predname = c(predname, paste("YPRED",i,sep="_"))
out1 <- cbind(out1,pred)
}
out1=data.frame(out1)
colnames(out1) <- predname
#output parameters
report <- list(n.outlier = sum(est$outlier), missing = sum(as.numeric(est$pattern)), is.conv = est$is.conv, sing = est$sing, bic.aic = est$bic.aic)
mod <- list(B=est$B, sigma=est$sigma, lambda=est$lambda, w=est$w )
#setting output roles
roles <- list (P= predname, O="outlier", M=names(mod), G=names(report))
result <-list( out=data.frame(out,out1), roles= roles, mod=mod, report = report)
return(result)
}

#eseguzione strato
strata.mlest <- function(workset, y, x=NULL, s, ...) {
#sistemazione dell'input
strata <- as.factor(workset[,S])
workset[,Y] <- as.numeric(workset[,Y])
workset[,X] <- as.numeric(workset[,X])
#init data
df <- data.frame()
mod <- data.frame()
report <- data.frame()
#esegue MLEST sullo strato
for (i in levels(strata)) {
w <- workset[workset[,S]==i, , drop = TRUE ]
est1<- mlest(w, Y, X)
df <- rbind(df, cbind(w, est1$out)) #ricreazione del dataset
}
#costruisce la lista di ritorno
result <-list( out= df, roles= est1$roles, mod = est1$mod, report = est1$report)
return(result)
}

#Predizione da modello
ypred <- function(workset, y, x=NULL, ... ) {
if(!exists("model")) model="LN"
if(!exists("t.outl")) t.outl=0.5

#environment check
if(missing(y)) stop('iSS Error: Missing TARGET Variable(s)')
y <- matrix(as.numeric(workset[,Y]),ncol=length(Y),nrow=nrow(workset))
if(!missing(x)) x <- matrix(as.numeric(workset[,X]),ncol=length(X),nrow=nrow(workset))

if((exists('B')& exists('sigma')& exists('lambda')& exists('w'))) {
sizex = ifelse(exists("X"), length(X), 0)
beta <- matrix(as.numeric(B), nrow=1+sizex, ncol=length(Y), byrow=TRUE)
s <- matrix(as.numeric(sigma), nrow=length(Y), ncol=length(Y), byrow=TRUE)
l <- as.numeric(lambda)
v <- as.numeric(w)
}
else {
stop('iSS Error: Missing model')
}
est <- pred.y(y=y, x=x, sigma = s, B = beta, model = model, lambda= l, w= v, t.outl= as.numeric(t.outl))
out <- data.frame(est)

#setting output roles
report <- list(n.outlier = sum(est$outlier), missing = sum(as.numeric(est$pattern)) )
roles <- list (P=colnames(out)[1:length(Y)], O= "outlier", G=names(report))
result <- list( out=out, roles= roles, report = report)
return(result)
}

#eseguzione strato
strata.ypred <- function(workset, y, x=NULL, s, ...) {
#sistemazione dell'input
strata <- as.factor(workset[,S])
#workset[,S] <- as.factor(workset[,S])
workset[,Y] <- as.numeric(workset[,Y])
workset[,X] <- as.numeric(workset[,X])

#esempio di selezione per strato
df <- data.frame()
report <- data.frame()
#esegue YPRED sullo strato
for (i in levels(strata)) {
w <- workset[workset[,S]==i, , drop = TRUE ]
est1<- ypred(w, Y, X) #ypred(w, y=y, x=x, sigma = s, B = beta, model = model, lambda= l, w= v, t.outl= as.numeric(t.outl))
df <- rbind(df, cbind(w, est1$out)) #ricreazione del dataset
}
result <-list( out=df, roles= est1$roles, report = est1$report)
return(result)
}

#editing selettivo
seledit <- function(workset, y, p, ...) {
#controllo environment
if(missing(y)|missing(p)) {
stop('iSS Error: Missing TARGET or PREDICTION Variable(s)')
}
if(length(y)!=length(p)) {
stop('iSS Error: Input dimension mismatch')
}

data <- matrix(as.numeric(workset[,Y]),ncol=length(Y),nrow=nrow(workset))
ypred <- matrix(as.numeric(workset[,P]),ncol=length(P),nrow=nrow(workset))
if(!exists("wgt")) wgt=rep(1,nrow(workset))
if(!exists("tot")) tot=colSums(ypred * wgt)
if(!exists("t.sel")) t.sel=0.01

what <- sel.edit (y=data, ypred=ypred, wgt, tot, t.sel= as.numeric(t.sel))
out= data.frame(what)

#setting output roles
report <- list(n.error = sum(out$sel))
roles <- list (E="sel", R= "rank", F="global.score", G=names(report))
result <-list( out= out, roles= roles, report = report)
return(result)
}

#eseguzione strato
strata.seledit <- function(workset, y, p, s, ...) {
#sistemazione dell'input
strata <- as.factor(workset[,S])
workset[,Y] <- as.numeric(workset[,Y])
workset[,P] <- as.numeric(workset[,P])

#esempio di selezione per strato
df <- data.frame()
#report <- data.frame()
#esegue SELEDIT sullo strato
for (i in levels(strata)) {
w <- workset[workset[,S]==i, , drop = TRUE ]
est1<- seledit(w, Y, P)
df <- rbind(df, cbind(w, est1$out)) #ricreazione del dataset
}
result <-list( out=df, roles= est1$roles, report = est1$report )
return(result)
}

#esecuzione di prova in ambiente R
#est2 <- strata.mlest(workset, Y, X, S)

0 comments on commit 68dd18f

Please sign in to comment.