Skip to content

Commit

Permalink
added warning when training indicator is set wrong
Browse files Browse the repository at this point in the history
  • Loading branch information
petersen-f committed Nov 29, 2023
1 parent 5b2d412 commit b768038
Showing 1 changed file with 30 additions and 3 deletions.
33 changes: 30 additions & 3 deletions R/predictiveAnalytics.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,19 @@ predictiveAnalytics <- function(jaspResults, dataset, options) {
"That way the data is automatically extended into the future based on your settings. \n",
"If you just want to check how well the predictions perform historically you can choose the option 'No forecast - verification only'")))

},
#function checks whether we have a proper training indicator sequence that consists of uninterrupted 1s and 0s
.trainingIndicatorOrderCheck <- function(){
if(options$trainingIndicator == "") return()
idx <- as.logical(dataset[[encodeColNames(options$trainingIndicator)]])
if( all(rle(idx)$values != c(1,0))){
return(gettext(paste(
"The 'Include in Training' variable you provided does not consist of an uninterrupted sequence of ones (1) followed by an uninterrupted sequence of zeros (0). \n",
"This is necessary as the module performs forecast verification on historical data to perform out-of-sample predictions for the future.",
"Since time series data is temporally dependent, you cannot randomly allocate the ones and zeros in the 'Include in Training' variable. \n",
"Please provide an alternative 'Include in Training' variable or only perform forecast verification/periodical prediction."
)))
}
}
)
.hasErrors(dataset = dataset,
Expand Down Expand Up @@ -896,15 +909,29 @@ lagit <- function(a,k) {

dataControl <- jaspResults[["predanResults"]][["predanBounds"]]$object[[1]]

#check whether based on training indicator
trainingIndSum <- sum(as.numeric(dataset[[encodeColNames(options$trainingIndicator)]]),na.rm=T) - options$featEngLags

if(options$trainingIndicator != "" && (trainingIndSum ) < (options$resampleInitialTraining + options$resampleForecastHorizon)){
errorPlotTrain <- createJaspPlot(dependencies= c("trainingIndicator","resampleInitialTraining","resampleForecastHorizon","featEngLags"))
errorPlotTrain$setError(gettext(paste(
"Too little data available for training! The 'Include in Training' variable determines which observations are used for training/verification (by setting them to one).",
"However the selected data is not enough for the indicated Training and Prediction Window.",
"Please select a 'Include in Training' variable that includes more observations for training or reduce the Training and Prediction Window variables.",trainingIndSum
)))
jaspResults[["predanMainContainer"]][["cvContainer"]][["errorPlotTrain"]] <- errorPlotTrain
return()
}

# throw error when lags are larger than training window as lags can't be computed
if(options$featEngLags > options$resampleInitialTraining){
errorPlot <- createJaspPlot(dependencies= c("featEngLags","resampleInitialTraining"))
errorPlot$setError(gettext(paste(
errorPlotLags <- createJaspPlot(dependencies= c("featEngLags","resampleInitialTraining"))
errorPlotLags$setError(gettext(paste(
"The length of the training window is shorter than the number of lags selected in the 'Feature Engineering' section.",
"This makes it impossible to compute all the values of the lagged dependent variable as there is too little data for training",
"Either increase the training window size or reduce the number of lags."
)))
jaspResults[["predanMainContainer"]][["cvContainer"]][["errorPlot"]] <- errorPlot
jaspResults[["predanMainContainer"]][["cvContainer"]][["errorPlotLags"]] <- errorPlotLags
return()
}

Expand Down

0 comments on commit b768038

Please sign in to comment.