From 0455c884d7fe3cd33a67fd3da9d4d6d978615c18 Mon Sep 17 00:00:00 2001 From: dannyparsons Date: Sat, 6 Feb 2016 18:07:20 +0300 Subject: [PATCH 1/3] completed add_rain_day_lags_column and add_rain_day_column methods --- R/climate_data_refclass.R | 95 +++++++++++++++++++++++---------------- R/labels_and_defaults.R | 3 +- 2 files changed, 57 insertions(+), 41 deletions(-) diff --git a/R/climate_data_refclass.R b/R/climate_data_refclass.R index 9d483ba..d5a30c0 100644 --- a/R/climate_data_refclass.R +++ b/R/climate_data_refclass.R @@ -997,6 +997,8 @@ climate_data$methods(add_spell_length_col = function(col_name = "Spell Length", climate_data$methods(add_rain_day_column = function(col_name = "Rain Day", threshold=0.85) { + missing_dates_check() + if(!is_present(rain_label)) stop("rain variable is required to calculate Rain day column") rain_col = getvname(rain_label) threshold = get_meta(threshold_label, missing(threshold), threshold) @@ -1007,61 +1009,76 @@ climate_data$methods(add_rain_day_column = function(col_name = "Rain Day", thres append_column_to_data(wd,col_name) append_to_variables(rain_day_label, col_name) + append_to_variables(paste(rain_day_lag_label, "0", sep="_"), col_name) } ) -climate_data$methods(add_rain_day_lags_column = function(lag_prefix = "Rain Day", lag_order=1) +climate_data$methods(add_rain_day_lags_column = function(lag_order=1) { - - # Complete dates needed for calculations + #Validation + if(!is.numeric(lag_order) || lag_order < 0 || lag_order%%1 != 0) stop("lag_order must be a positive integer") + missing_dates_check() + # rain_day column always needed if(!is_present(rain_day_label)) add_rain_day_column() - rain_day_col_name = getvname(rain_day_label) - - curr_data_list = get_data_for_analysis(data_info = list()) - staion_lag_tables = list() - i = 1 - for (curr_data in curr_data_list) { - - curr_lag_data = as.data.frame(matrix(nrow=nrow(curr_data),ncol=0)) - - date_col = curr_data[[getvname(date_label)]] - curr_lag_data[[getvname(date_label)]] = date_col + + + if(!is_present(paste(rain_day_lag_label, lag_order, sep="_"))) { + rain_day_col_name = getvname(rain_day_label) - if(is_present(station_label)) { - station_col = curr_data[[getvname(station_label)]] - curr_lag_data[[getvname(station_label)]] = station_col + for(j in (lag_order-1):0) { + if(!is_present(paste(rain_day_lag_label, j, sep="_"))) { + .self$add_rain_day_lags_column(lag_order = j) + } } - curr_lag_data[[rain_day_col_name]] = curr_data[[rain_day_col_name]] + curr_data_list = get_data_for_analysis(data_info = list()) - wet_dry <- as.data.frame(matrix(nrow=nrow(curr_data),ncol=lag_order+1)) - wet_dry[ ,1] = curr_data[[rain_day_col_name]] + #list of data frames to be merged and joined with original data + lag_data_frames = list() - for(n in 1:lag_order) { + i = 1 + for (curr_data in curr_data_list) { + + # data frame to be added to lag_data_frames + curr_lag_data = as.data.frame(matrix(nrow=nrow(curr_data),ncol=0)) + + date_col = curr_data[[getvname(date_label)]] + curr_lag_data[[getvname(date_label)]] = date_col + + if(is_present(station_label)) { + station_col = curr_data[[getvname(station_label)]] + curr_lag_data[[getvname(station_label)]] = station_col + } - wet_dry[ ,n+1] = c(NA,wet_dry[1:(nrow(curr_data)-1),n]) - lags=do.call("paste",c(as.list(wet_dry[,1:n+1]),sep="")) - lags[grep("NA",lags)] <- NA - print(lags) + prev_lag_col = curr_data[[getvname(paste(rain_day_lag_label, lag_order-1, sep="_"))]] + prev_lag_tail = tail(prev_lag_col, nrow(curr_data)-lag_order) + + lag_0_col = curr_data[[getvname(paste(rain_day_lag_label, "0", sep="_"))]] + lag_0_head = head(lag_0_col, nrow(curr_data)-lag_order) + + new_col = c(rep(NA, lag_order), paste0(prev_lag_tail, lag_0_head)) + if(sum(is.na(prev_lag_tail)) > 0 || sum(is.na(lag_0_head))) { + new_col[grep("NA", new_col)] <- NA + } + + col_name = paste(rain_day_default_col_name, lag_order) + curr_lag_data[[paste(rain_day_default_col_name, lag_order)]] = new_col - lagname=paste(lag_prefix, n) - curr_lag_data[[lagname]] = sapply(lags,function(x) substring(x,2)) + lag_data_frames[[i]] = curr_lag_data + i = i + 1 + } - staion_lag_tables[[i]] <- curr_lag_data - i = i + 1 - } - - - lags_merge = do.call("rbind",staion_lag_tables) - .self$join_data(lags_merge, match="first", type="full", by = c(getvname(date_label), getvname(station_label))) - - for(n in 1:(lag_order)) { - laglabel = paste(rain_day_lag_label, n, sep="_") - lagname = paste(lag_prefix, n, sep="_") - append_to_variables(laglabel, lagname) + lags_merge = do.call("rbind",lag_data_frames) + if(is_present(station_label)) { + .self$join_data(lags_merge, match="first", type="full", by = c(getvname(date_label), getvname(station_label))) + } + else { + .self$join_data(lags_merge, match="first", type="full", by = getvname(date_label)) + } + append_to_variables(paste(rain_day_lag_label, lag_order, sep="_"), col_name) } } ) diff --git a/R/labels_and_defaults.R b/R/labels_and_defaults.R index 14a1ace..8a91833 100644 --- a/R/labels_and_defaults.R +++ b/R/labels_and_defaults.R @@ -43,7 +43,7 @@ dd_label = "dry_day" rd_short_label = "w" dd_short_label = "d" rain_amount_label = "rain_amount" - +rain_day_default_col_name = "Rain Day" start_of_label="start_of" end_of_label="end_of" @@ -342,7 +342,6 @@ add_defaults <- function (imported_from,user) { if(!(wind_speed_label %in% names(merged))) merged[[wind_speed_label]]<-"Wind speed" if(!(wind_direction_label %in% names(merged))) merged[[wind_direction_label]]<-"Wind direction" if(!(rain_day_label %in% names(merged))) merged[[rain_day_label]]<-"Rain Day" - if(!(rain_day_lag_label %in% names(merged))) merged[[rain_day_lag_label]]<-"Rain Day" return(merged) } From 2d0b0214cfd8cedad00e57bb7b9db153e7c27a22 Mon Sep 17 00:00:00 2001 From: dannyparsons Date: Tue, 9 Feb 2016 15:47:35 +0300 Subject: [PATCH 2/3] added harmonic_markov_model methods --- .../Models/harmonic_markov_model.R | 154 ++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 R/ClimateMethods/Models/harmonic_markov_model.R diff --git a/R/ClimateMethods/Models/harmonic_markov_model.R b/R/ClimateMethods/Models/harmonic_markov_model.R new file mode 100644 index 0000000..47ba11a --- /dev/null +++ b/R/ClimateMethods/Models/harmonic_markov_model.R @@ -0,0 +1,154 @@ +#================================================================================================== +# END OF RAINS +#' @title Computational of end of the rain with water balance definition +#' @name add_end_rain +#' @author Danny, Frederic and Fanuel 2015 (AMI) + +#' @description \code{compute.end_rain} +#' compute end of the rains given climate object +#' +#' @param data_list A list containing stations for analysis, the years or periods to be analysed and the required variables from the data. +#' If blank, the system will choose all data appropriate for the analysis. +#' @param earliest_day The earliest possible day for the end of the rains. +#' @param water_balance_col_name The column name to use for the water balance column if it needs to be created. +#' @param col_name The column name to use for the end of the rains. +#' @param capacity_max The maximum water balance. +#' @param evaporation Evaporation per day +#' @param Replace Logical indicating whether the column should be replaced if there is already a column in the data with the value of col_name. +# +#' @examples +#' ClimateObj <- climate( data_tables = list( data ), date_formats = list( "%m/%d/%Y" ) ) +#' Default dateformats: "%Y/%m/%d" +#' # where "data" is a data.frame containing the desired data to be computed. +#' climateObj$add_end_rain() +#' @return return end of the rain +#' +climate$methods(harmonic_probability_rain_model<-function(data_list=list(),fac=NULL,h_order=4,weights=NULL, parallel=FALSE, separate_station_models=TRUE) { + + data_list=add_to_data_info_required_variable_list(data_list, rain_label) + data_list=add_to_data_info_time_period(data_list, daily_label) + if(!separate_station_models || !data_obj$is_present(station_label)) { + data_list=add_to_data_info_merge(data_list, TRUE) + } + climate_data_objs = get_climate_data_objects(data_list) + + for(data_obj in climate_data_objs) { + + #if doy or year/dos season is not in the data frame, create it. + if( !( data_obj$is_present(dos_label) && data_obj$is_present(season_label) ) ) { + data_obj$add_doy_col() + } + dos_col_name = data_obj$getvname(dos_label) + + if(!data_obj$is_present(rain_day_label)) data_obj$add_rain_day_column() + rain_day_col_name = data_obj$getvname(rain_day_label) + + if(is.null(fac)) { + model_string <- paste("factor() ~ ",harmonic(dos_col_name,366,h_order)) #Creates string for default case, no factor. + } + else { + if(parallel) { + model_string <- paste("factor(", rain_day_col_name, ") ~", harmonic(dos_col_name,366,h_order),"+",fac) #The parallel case. + } + else { + model_string <- paste("factor(", rain_day_col_name, ") ~", harmonic(dos_col_name,366,h_order),"*",fac) + } + } + + curr_data_list=data_obj$get_data_for_analysis(data_list) + + for( curr_data in curr_data_list ) { + + } + fit1<-glm(data = ,as.formula(model_string), family=binomial, weights=NULL, na.action=na.exclude) #Glm fits the data by a binomial and computes the coefficients of the model. + + } +} +) + +climate$methods(harmonic_markov_amount_rain_model<-function(data,fac=NULL,h_order=4,weights=NULL, parallel=FALSE) { + + md<-model(h_order,fac,parallel) #This calls the model function below. + + fit1<-glm(as.formula(md), family=binomial, weights=NULL, na.action=na.exclude) #Glm fits the data by a binomial and computes the coefficients of the model. + + + subdata<-subset(data,wet_or_dry=="w") #This subsets a portion of the data. + + n_rain<-subdata$Rain + + attach(subdata, warn.conflicts=FALSE) #This creates a directory with the subset data. + + md<-model1(h_order,fac,parallel) #Calls the model funtion for amounts of rainfall. + + + fit1<-glm(as.formula(md), family=Gamma, weights=NULL, na.action=na.exclude) #This fits the data by a gamma distribution and computes the coefficients of the model. + + detach(subdata) + fit1 #This is the returned model. +} +) + +fit_model<-function(data,fac=NULL,h_order=4,weights=NULL, parallel=FALSE) { + + attach(data,warn.conflicts=FALSE) #This creates a directory with data attached for easy call of variables. + + md<-model(h_order,fac,parallel) #This calls the model function below. + + + fit1<-glm(as.formula(md), family=binomial, weights=NULL, na.action=na.exclude) #Glm fits the data by a binomial and computes the coefficients of the model. + + detach(data) #This deletes the created directory. + fit1 #This returns the fitted model. + } + + #Returns a fitted model for amount of rainfall. + +fit_amts<-function(data,fac=NULL,h_order=3,weights=NULL, parallel=FALSE) { + + subdata<-subset(data,wet_or_dry=="w") #This subsets a portion of the data. + + n_rain<-subdata$Rain + + attach(subdata, warn.conflicts=FALSE) #This creates a directory with the subset data. + + md<-model1(h_order,fac,parallel) #Calls the model funtion for amounts of rainfall. + + + fit1<-glm(as.formula(md), family=Gamma, weights=NULL, na.action=na.exclude) #This fits the data by a gamma distribution and computes the coefficients of the model. + + detach(subdata) + fit1 #This is the returned model. + } + +#These functions generates the string for the model to be fitted. +#There are two options; the parallel model and the interacting model. +#in both cases the functions returns the string for the model to be fitted. +#model1 returns the string for rainfall amounts, and model returns the string for chance of rainfall. + +model1 <- function(h_order=4,fac=NULL,parallel=FALSE) { + + if(is.null(fac)) { + fm1 <- paste("as.numeric(n_rain) ~ ",harmonic(h_order)) #Creates string for default case, no factor. + } + else { + if(parallel) { + fm1 <- paste("as.numeric(n_rain) ~ ",harmonic(h_order),"+",fac) #The parallel case. + } + else { + fm1 <- paste("as.numeric(n_rain) ~ ",harmonic(h_order),"*",fac) #Including the interacting. + } + } + fm1 #The returned string for the model. + +} + +harmonic<-function(circ_col, circ_length,h_order=4){ + sc<-NULL #Initializes sc variable. + for(i in 1:h_order){ + sc<-c(sc,paste("cos(",circ_col,"*",i,"*2*pi/",circ_length,")")) #(a)these two lines generates the harmonic cols for the perodicity of the data. + sc<-c(sc,paste("sin(",circ_col,"*",i,"*2*pi/",circ_length,")")) + } + ret=paste("(",paste(sc,collapse="+"),")") + ret + } \ No newline at end of file From a7acd58e9ed74653fd0feea429674dcf859b969f Mon Sep 17 00:00:00 2001 From: dannyparsons Date: Wed, 10 Feb 2016 18:20:18 +0300 Subject: [PATCH 3/3] added models funtionality --- .../Models/harmonic_markov_model.R | 95 +++++++------------ R/ClimateMethods/Models/model_refclass.R | 24 +++++ R/climate_refclass.R | 16 +++- R/labels_and_defaults.R | 3 + 4 files changed, 77 insertions(+), 61 deletions(-) create mode 100644 R/ClimateMethods/Models/model_refclass.R diff --git a/R/ClimateMethods/Models/harmonic_markov_model.R b/R/ClimateMethods/Models/harmonic_markov_model.R index 47ba11a..a669684 100644 --- a/R/ClimateMethods/Models/harmonic_markov_model.R +++ b/R/ClimateMethods/Models/harmonic_markov_model.R @@ -1,33 +1,33 @@ #================================================================================================== -# END OF RAINS -#' @title Computational of end of the rain with water balance definition -#' @name add_end_rain -#' @author Danny, Frederic and Fanuel 2015 (AMI) +#' @title +#' @name +#' @author -#' @description \code{compute.end_rain} -#' compute end of the rains given climate object +#' @description \code{} #' -#' @param data_list A list containing stations for analysis, the years or periods to be analysed and the required variables from the data. -#' If blank, the system will choose all data appropriate for the analysis. -#' @param earliest_day The earliest possible day for the end of the rains. -#' @param water_balance_col_name The column name to use for the water balance column if it needs to be created. -#' @param col_name The column name to use for the end of the rains. -#' @param capacity_max The maximum water balance. -#' @param evaporation Evaporation per day -#' @param Replace Logical indicating whether the column should be replaced if there is already a column in the data with the value of col_name. +#' @param +#' @param +#' @param +#' @param +#' @param +#' @param +#' @param # #' @examples -#' ClimateObj <- climate( data_tables = list( data ), date_formats = list( "%m/%d/%Y" ) ) -#' Default dateformats: "%Y/%m/%d" -#' # where "data" is a data.frame containing the desired data to be computed. -#' climateObj$add_end_rain() -#' @return return end of the rain #' -climate$methods(harmonic_probability_rain_model<-function(data_list=list(),fac=NULL,h_order=4,weights=NULL, parallel=FALSE, separate_station_models=TRUE) { - +#' +#' @return +#' +climate$methods(harmonic_probability_rain_model = function(data_list=list(),fac=NULL,h_order=4,weights=NULL, parallel=FALSE, separate_station_models=TRUE, save_model_name="") { + + input_parameters=list(data_list=data_list,fac=fac,h_order=h_order,weights=weights, parallel=parallel, separate_station_models=separate_station_models, save_model_name=save_model_name) + model_outputs=list() data_list=add_to_data_info_required_variable_list(data_list, rain_label) data_list=add_to_data_info_time_period(data_list, daily_label) - if(!separate_station_models || !data_obj$is_present(station_label)) { +# if(!separate_station_models || !data_obj$is_present(station_label)) { +# data_list=add_to_data_info_merge(data_list, TRUE) +# } + if(!separate_station_models) { data_list=add_to_data_info_merge(data_list, TRUE) } climate_data_objs = get_climate_data_objects(data_list) @@ -56,17 +56,26 @@ climate$methods(harmonic_probability_rain_model<-function(data_list=list(),fac=N } curr_data_list=data_obj$get_data_for_analysis(data_list) - + i=0 for( curr_data in curr_data_list ) { - + nice_name=data_obj$get_meta(data_name_label) + if (i>0){ + nice_name=paste0(nice_name,i) + } + print(model_string) + model_outputs[[nice_name]]<-glm(data = curr_data,as.formula(model_string), family=binomial, weights=weights, na.action=na.exclude) #Glm fits the data by a binomial and computes the coefficients of the model. + i=i+1 } - fit1<-glm(data = ,as.formula(model_string), family=binomial, weights=NULL, na.action=na.exclude) #Glm fits the data by a binomial and computes the coefficients of the model. } + if (missing(save_model_name)) { + save_model_name=last_model_name + } + append_model_objects(save_model_name, instat_model$new("harmonic_probability_rain_model",input_parameters,"glm",model_outputs)) } ) -climate$methods(harmonic_markov_amount_rain_model<-function(data,fac=NULL,h_order=4,weights=NULL, parallel=FALSE) { +climate$methods(harmonic_markov_amount_rain_model = function(data,fac=NULL,h_order=4,weights=NULL, parallel=FALSE) { md<-model(h_order,fac,parallel) #This calls the model function below. @@ -89,38 +98,6 @@ climate$methods(harmonic_markov_amount_rain_model<-function(data,fac=NULL,h_orde } ) -fit_model<-function(data,fac=NULL,h_order=4,weights=NULL, parallel=FALSE) { - - attach(data,warn.conflicts=FALSE) #This creates a directory with data attached for easy call of variables. - - md<-model(h_order,fac,parallel) #This calls the model function below. - - - fit1<-glm(as.formula(md), family=binomial, weights=NULL, na.action=na.exclude) #Glm fits the data by a binomial and computes the coefficients of the model. - - detach(data) #This deletes the created directory. - fit1 #This returns the fitted model. - } - - #Returns a fitted model for amount of rainfall. - -fit_amts<-function(data,fac=NULL,h_order=3,weights=NULL, parallel=FALSE) { - - subdata<-subset(data,wet_or_dry=="w") #This subsets a portion of the data. - - n_rain<-subdata$Rain - - attach(subdata, warn.conflicts=FALSE) #This creates a directory with the subset data. - - md<-model1(h_order,fac,parallel) #Calls the model funtion for amounts of rainfall. - - - fit1<-glm(as.formula(md), family=Gamma, weights=NULL, na.action=na.exclude) #This fits the data by a gamma distribution and computes the coefficients of the model. - - detach(subdata) - fit1 #This is the returned model. - } - #These functions generates the string for the model to be fitted. #There are two options; the parallel model and the interacting model. #in both cases the functions returns the string for the model to be fitted. @@ -143,7 +120,7 @@ model1 <- function(h_order=4,fac=NULL,parallel=FALSE) { } -harmonic<-function(circ_col, circ_length,h_order=4){ +harmonic <- function(circ_col, circ_length,h_order=4){ sc<-NULL #Initializes sc variable. for(i in 1:h_order){ sc<-c(sc,paste("cos(",circ_col,"*",i,"*2*pi/",circ_length,")")) #(a)these two lines generates the harmonic cols for the perodicity of the data. diff --git a/R/ClimateMethods/Models/model_refclass.R b/R/ClimateMethods/Models/model_refclass.R new file mode 100644 index 0000000..e4c6592 --- /dev/null +++ b/R/ClimateMethods/Models/model_refclass.R @@ -0,0 +1,24 @@ +# Defining the reference class "model" +# This reference class is a wrapper to save models created through instat or climatic object methods. +# The fields are the properties every climate_data object will have. + +# method_name : The name of the instat or climate object method this model came from +# method_parameters : The list parameters used in the method call +# function_name : The name of the R function used, i.e. the type of model outputs +# output_list : A list containing the outputs to all the models created by the method + + +intstat_model <- setRefClass("intstat_model", + fields = list(method_name = "character", method_parameters = "list", + function_name = "character", output_list = "list") +) + +intstat_model$methods(initialize = function(call_name, call_parameters, R_model_type, models) { + + method_name <<- call_name + method_parameters <<- call_parameters + function_name <<- R_model_type + output_list <<- models + +} +) \ No newline at end of file diff --git a/R/climate_refclass.R b/R/climate_refclass.R index f88e6b1..32ed1a2 100644 --- a/R/climate_refclass.R +++ b/R/climate_refclass.R @@ -9,7 +9,7 @@ climate <- setRefClass("climate", fields = list(climate_data_objects = "list", used_data_objects = "list", - meta_data = "list") + meta_data = "list", models = "list") ) # INITIALIZE method @@ -41,6 +41,7 @@ climate$methods(initialize = function(data_tables = list(), climate_obj_meta_dat meta_data <<- climate_obj_meta_data used_data_objects <<- list() + models <<- list() if (missing(data_tables) || length(data_tables) == 0) { climate_data_objects <<- list() @@ -337,6 +338,18 @@ climate$methods(append_used_data_objects = function(data, data_name, meta_data = } ) +climate$methods(append_model_objects = function(name, obj) { + if( !class(name) == "character") { + stop("name must be a character") + } + + if ( !class(obj) == "instat_model") { + stop("obj must be a instat_model object") + } + + climate_data_objects[[name]] <<- obj +} +) # is_present_or_meta can check if a given variable name (or list of variable names) is in the data.frame or the meta_data or neither. # This will be used by other functions particularly related to station level data such as latitude, longditude etc. # TO DO check functionality for missing cols and if there are multiple elements in long format (currently will return true even if there are no instances possibly correct as like returning true when all values are missing?) @@ -352,7 +365,6 @@ climate$methods(is_present_or_meta = function(str, require_all=TRUE, require_in_ } ) - # Other methods ############################################################################################# # All analysis functions will be methods of climate objects and NOT climate_data objects. diff --git a/R/labels_and_defaults.R b/R/labels_and_defaults.R index 8a91833..16b4ceb 100644 --- a/R/labels_and_defaults.R +++ b/R/labels_and_defaults.R @@ -129,6 +129,9 @@ Added_metadata="Added metadata" Converted_col_="Converted column" Replaced_value="Replaced value" +#Other defaults +last_model_name="last_model" + # Try to identify columns automatically # This function is called in climate_data initialize method.