diff --git a/R/ClimateMethods/Models/harmonic_markov_model.R b/R/ClimateMethods/Models/harmonic_markov_model.R new file mode 100644 index 0000000..a669684 --- /dev/null +++ b/R/ClimateMethods/Models/harmonic_markov_model.R @@ -0,0 +1,131 @@ +#================================================================================================== +#' @title +#' @name +#' @author + +#' @description \code{} +#' +#' @param +#' @param +#' @param +#' @param +#' @param +#' @param +#' @param +# +#' @examples +#' +#' +#' @return +#' +climate$methods(harmonic_probability_rain_model = function(data_list=list(),fac=NULL,h_order=4,weights=NULL, parallel=FALSE, separate_station_models=TRUE, save_model_name="") { + + input_parameters=list(data_list=data_list,fac=fac,h_order=h_order,weights=weights, parallel=parallel, separate_station_models=separate_station_models, save_model_name=save_model_name) + model_outputs=list() + data_list=add_to_data_info_required_variable_list(data_list, rain_label) + data_list=add_to_data_info_time_period(data_list, daily_label) +# if(!separate_station_models || !data_obj$is_present(station_label)) { +# data_list=add_to_data_info_merge(data_list, TRUE) +# } + if(!separate_station_models) { + data_list=add_to_data_info_merge(data_list, TRUE) + } + climate_data_objs = get_climate_data_objects(data_list) + + for(data_obj in climate_data_objs) { + + #if doy or year/dos season is not in the data frame, create it. + if( !( data_obj$is_present(dos_label) && data_obj$is_present(season_label) ) ) { + data_obj$add_doy_col() + } + dos_col_name = data_obj$getvname(dos_label) + + if(!data_obj$is_present(rain_day_label)) data_obj$add_rain_day_column() + rain_day_col_name = data_obj$getvname(rain_day_label) + + if(is.null(fac)) { + model_string <- paste("factor() ~ ",harmonic(dos_col_name,366,h_order)) #Creates string for default case, no factor. + } + else { + if(parallel) { + model_string <- paste("factor(", rain_day_col_name, ") ~", harmonic(dos_col_name,366,h_order),"+",fac) #The parallel case. + } + else { + model_string <- paste("factor(", rain_day_col_name, ") ~", harmonic(dos_col_name,366,h_order),"*",fac) + } + } + + curr_data_list=data_obj$get_data_for_analysis(data_list) + i=0 + for( curr_data in curr_data_list ) { + nice_name=data_obj$get_meta(data_name_label) + if (i>0){ + nice_name=paste0(nice_name,i) + } + print(model_string) + model_outputs[[nice_name]]<-glm(data = curr_data,as.formula(model_string), family=binomial, weights=weights, na.action=na.exclude) #Glm fits the data by a binomial and computes the coefficients of the model. + i=i+1 + } + + } + if (missing(save_model_name)) { + save_model_name=last_model_name + } + append_model_objects(save_model_name, instat_model$new("harmonic_probability_rain_model",input_parameters,"glm",model_outputs)) +} +) + +climate$methods(harmonic_markov_amount_rain_model = function(data,fac=NULL,h_order=4,weights=NULL, parallel=FALSE) { + + md<-model(h_order,fac,parallel) #This calls the model function below. + + fit1<-glm(as.formula(md), family=binomial, weights=NULL, na.action=na.exclude) #Glm fits the data by a binomial and computes the coefficients of the model. + + + subdata<-subset(data,wet_or_dry=="w") #This subsets a portion of the data. + + n_rain<-subdata$Rain + + attach(subdata, warn.conflicts=FALSE) #This creates a directory with the subset data. + + md<-model1(h_order,fac,parallel) #Calls the model funtion for amounts of rainfall. + + + fit1<-glm(as.formula(md), family=Gamma, weights=NULL, na.action=na.exclude) #This fits the data by a gamma distribution and computes the coefficients of the model. + + detach(subdata) + fit1 #This is the returned model. +} +) + +#These functions generates the string for the model to be fitted. +#There are two options; the parallel model and the interacting model. +#in both cases the functions returns the string for the model to be fitted. +#model1 returns the string for rainfall amounts, and model returns the string for chance of rainfall. + +model1 <- function(h_order=4,fac=NULL,parallel=FALSE) { + + if(is.null(fac)) { + fm1 <- paste("as.numeric(n_rain) ~ ",harmonic(h_order)) #Creates string for default case, no factor. + } + else { + if(parallel) { + fm1 <- paste("as.numeric(n_rain) ~ ",harmonic(h_order),"+",fac) #The parallel case. + } + else { + fm1 <- paste("as.numeric(n_rain) ~ ",harmonic(h_order),"*",fac) #Including the interacting. + } + } + fm1 #The returned string for the model. + +} + +harmonic <- function(circ_col, circ_length,h_order=4){ + sc<-NULL #Initializes sc variable. + for(i in 1:h_order){ + sc<-c(sc,paste("cos(",circ_col,"*",i,"*2*pi/",circ_length,")")) #(a)these two lines generates the harmonic cols for the perodicity of the data. + sc<-c(sc,paste("sin(",circ_col,"*",i,"*2*pi/",circ_length,")")) + } + ret=paste("(",paste(sc,collapse="+"),")") + ret + } \ No newline at end of file diff --git a/R/ClimateMethods/Models/model_refclass.R b/R/ClimateMethods/Models/model_refclass.R new file mode 100644 index 0000000..e4c6592 --- /dev/null +++ b/R/ClimateMethods/Models/model_refclass.R @@ -0,0 +1,24 @@ +# Defining the reference class "model" +# This reference class is a wrapper to save models created through instat or climatic object methods. +# The fields are the properties every climate_data object will have. + +# method_name : The name of the instat or climate object method this model came from +# method_parameters : The list parameters used in the method call +# function_name : The name of the R function used, i.e. the type of model outputs +# output_list : A list containing the outputs to all the models created by the method + + +intstat_model <- setRefClass("intstat_model", + fields = list(method_name = "character", method_parameters = "list", + function_name = "character", output_list = "list") +) + +intstat_model$methods(initialize = function(call_name, call_parameters, R_model_type, models) { + + method_name <<- call_name + method_parameters <<- call_parameters + function_name <<- R_model_type + output_list <<- models + +} +) \ No newline at end of file diff --git a/R/climate_data_refclass.R b/R/climate_data_refclass.R index 9d483ba..d5a30c0 100644 --- a/R/climate_data_refclass.R +++ b/R/climate_data_refclass.R @@ -997,6 +997,8 @@ climate_data$methods(add_spell_length_col = function(col_name = "Spell Length", climate_data$methods(add_rain_day_column = function(col_name = "Rain Day", threshold=0.85) { + missing_dates_check() + if(!is_present(rain_label)) stop("rain variable is required to calculate Rain day column") rain_col = getvname(rain_label) threshold = get_meta(threshold_label, missing(threshold), threshold) @@ -1007,61 +1009,76 @@ climate_data$methods(add_rain_day_column = function(col_name = "Rain Day", thres append_column_to_data(wd,col_name) append_to_variables(rain_day_label, col_name) + append_to_variables(paste(rain_day_lag_label, "0", sep="_"), col_name) } ) -climate_data$methods(add_rain_day_lags_column = function(lag_prefix = "Rain Day", lag_order=1) +climate_data$methods(add_rain_day_lags_column = function(lag_order=1) { - - # Complete dates needed for calculations + #Validation + if(!is.numeric(lag_order) || lag_order < 0 || lag_order%%1 != 0) stop("lag_order must be a positive integer") + missing_dates_check() + # rain_day column always needed if(!is_present(rain_day_label)) add_rain_day_column() - rain_day_col_name = getvname(rain_day_label) - - curr_data_list = get_data_for_analysis(data_info = list()) - staion_lag_tables = list() - i = 1 - for (curr_data in curr_data_list) { - - curr_lag_data = as.data.frame(matrix(nrow=nrow(curr_data),ncol=0)) - - date_col = curr_data[[getvname(date_label)]] - curr_lag_data[[getvname(date_label)]] = date_col + + + if(!is_present(paste(rain_day_lag_label, lag_order, sep="_"))) { + rain_day_col_name = getvname(rain_day_label) - if(is_present(station_label)) { - station_col = curr_data[[getvname(station_label)]] - curr_lag_data[[getvname(station_label)]] = station_col + for(j in (lag_order-1):0) { + if(!is_present(paste(rain_day_lag_label, j, sep="_"))) { + .self$add_rain_day_lags_column(lag_order = j) + } } - curr_lag_data[[rain_day_col_name]] = curr_data[[rain_day_col_name]] + curr_data_list = get_data_for_analysis(data_info = list()) - wet_dry <- as.data.frame(matrix(nrow=nrow(curr_data),ncol=lag_order+1)) - wet_dry[ ,1] = curr_data[[rain_day_col_name]] + #list of data frames to be merged and joined with original data + lag_data_frames = list() - for(n in 1:lag_order) { + i = 1 + for (curr_data in curr_data_list) { + + # data frame to be added to lag_data_frames + curr_lag_data = as.data.frame(matrix(nrow=nrow(curr_data),ncol=0)) + + date_col = curr_data[[getvname(date_label)]] + curr_lag_data[[getvname(date_label)]] = date_col + + if(is_present(station_label)) { + station_col = curr_data[[getvname(station_label)]] + curr_lag_data[[getvname(station_label)]] = station_col + } - wet_dry[ ,n+1] = c(NA,wet_dry[1:(nrow(curr_data)-1),n]) - lags=do.call("paste",c(as.list(wet_dry[,1:n+1]),sep="")) - lags[grep("NA",lags)] <- NA - print(lags) + prev_lag_col = curr_data[[getvname(paste(rain_day_lag_label, lag_order-1, sep="_"))]] + prev_lag_tail = tail(prev_lag_col, nrow(curr_data)-lag_order) + + lag_0_col = curr_data[[getvname(paste(rain_day_lag_label, "0", sep="_"))]] + lag_0_head = head(lag_0_col, nrow(curr_data)-lag_order) + + new_col = c(rep(NA, lag_order), paste0(prev_lag_tail, lag_0_head)) + if(sum(is.na(prev_lag_tail)) > 0 || sum(is.na(lag_0_head))) { + new_col[grep("NA", new_col)] <- NA + } + + col_name = paste(rain_day_default_col_name, lag_order) + curr_lag_data[[paste(rain_day_default_col_name, lag_order)]] = new_col - lagname=paste(lag_prefix, n) - curr_lag_data[[lagname]] = sapply(lags,function(x) substring(x,2)) + lag_data_frames[[i]] = curr_lag_data + i = i + 1 + } - staion_lag_tables[[i]] <- curr_lag_data - i = i + 1 - } - - - lags_merge = do.call("rbind",staion_lag_tables) - .self$join_data(lags_merge, match="first", type="full", by = c(getvname(date_label), getvname(station_label))) - - for(n in 1:(lag_order)) { - laglabel = paste(rain_day_lag_label, n, sep="_") - lagname = paste(lag_prefix, n, sep="_") - append_to_variables(laglabel, lagname) + lags_merge = do.call("rbind",lag_data_frames) + if(is_present(station_label)) { + .self$join_data(lags_merge, match="first", type="full", by = c(getvname(date_label), getvname(station_label))) + } + else { + .self$join_data(lags_merge, match="first", type="full", by = getvname(date_label)) + } + append_to_variables(paste(rain_day_lag_label, lag_order, sep="_"), col_name) } } ) diff --git a/R/climate_refclass.R b/R/climate_refclass.R index f88e6b1..32ed1a2 100644 --- a/R/climate_refclass.R +++ b/R/climate_refclass.R @@ -9,7 +9,7 @@ climate <- setRefClass("climate", fields = list(climate_data_objects = "list", used_data_objects = "list", - meta_data = "list") + meta_data = "list", models = "list") ) # INITIALIZE method @@ -41,6 +41,7 @@ climate$methods(initialize = function(data_tables = list(), climate_obj_meta_dat meta_data <<- climate_obj_meta_data used_data_objects <<- list() + models <<- list() if (missing(data_tables) || length(data_tables) == 0) { climate_data_objects <<- list() @@ -337,6 +338,18 @@ climate$methods(append_used_data_objects = function(data, data_name, meta_data = } ) +climate$methods(append_model_objects = function(name, obj) { + if( !class(name) == "character") { + stop("name must be a character") + } + + if ( !class(obj) == "instat_model") { + stop("obj must be a instat_model object") + } + + climate_data_objects[[name]] <<- obj +} +) # is_present_or_meta can check if a given variable name (or list of variable names) is in the data.frame or the meta_data or neither. # This will be used by other functions particularly related to station level data such as latitude, longditude etc. # TO DO check functionality for missing cols and if there are multiple elements in long format (currently will return true even if there are no instances possibly correct as like returning true when all values are missing?) @@ -352,7 +365,6 @@ climate$methods(is_present_or_meta = function(str, require_all=TRUE, require_in_ } ) - # Other methods ############################################################################################# # All analysis functions will be methods of climate objects and NOT climate_data objects. diff --git a/R/labels_and_defaults.R b/R/labels_and_defaults.R index 14a1ace..16b4ceb 100644 --- a/R/labels_and_defaults.R +++ b/R/labels_and_defaults.R @@ -43,7 +43,7 @@ dd_label = "dry_day" rd_short_label = "w" dd_short_label = "d" rain_amount_label = "rain_amount" - +rain_day_default_col_name = "Rain Day" start_of_label="start_of" end_of_label="end_of" @@ -129,6 +129,9 @@ Added_metadata="Added metadata" Converted_col_="Converted column" Replaced_value="Replaced value" +#Other defaults +last_model_name="last_model" + # Try to identify columns automatically # This function is called in climate_data initialize method. @@ -342,7 +345,6 @@ add_defaults <- function (imported_from,user) { if(!(wind_speed_label %in% names(merged))) merged[[wind_speed_label]]<-"Wind speed" if(!(wind_direction_label %in% names(merged))) merged[[wind_direction_label]]<-"Wind direction" if(!(rain_day_label %in% names(merged))) merged[[rain_day_label]]<-"Rain Day" - if(!(rain_day_lag_label %in% names(merged))) merged[[rain_day_lag_label]]<-"Rain Day" return(merged) }