Skip to content

Commit

Permalink
Merge pull request #2 from StatisticalServicesCentre/master
Browse files Browse the repository at this point in the history
Update main branch
  • Loading branch information
stevekogo committed Feb 16, 2016
2 parents 9b7d08e + 18b41d3 commit da3d17d
Show file tree
Hide file tree
Showing 5 changed files with 229 additions and 43 deletions.
131 changes: 131 additions & 0 deletions R/ClimateMethods/Models/harmonic_markov_model.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
#==================================================================================================
#' @title
#' @name
#' @author

#' @description \code{}
#'
#' @param
#' @param
#' @param
#' @param
#' @param
#' @param
#' @param
#
#' @examples
#'
#'
#' @return
#'
climate$methods(harmonic_probability_rain_model = function(data_list=list(),fac=NULL,h_order=4,weights=NULL, parallel=FALSE, separate_station_models=TRUE, save_model_name="") {

input_parameters=list(data_list=data_list,fac=fac,h_order=h_order,weights=weights, parallel=parallel, separate_station_models=separate_station_models, save_model_name=save_model_name)
model_outputs=list()
data_list=add_to_data_info_required_variable_list(data_list, rain_label)
data_list=add_to_data_info_time_period(data_list, daily_label)
# if(!separate_station_models || !data_obj$is_present(station_label)) {
# data_list=add_to_data_info_merge(data_list, TRUE)
# }
if(!separate_station_models) {
data_list=add_to_data_info_merge(data_list, TRUE)
}
climate_data_objs = get_climate_data_objects(data_list)

for(data_obj in climate_data_objs) {

#if doy or year/dos season is not in the data frame, create it.
if( !( data_obj$is_present(dos_label) && data_obj$is_present(season_label) ) ) {
data_obj$add_doy_col()
}
dos_col_name = data_obj$getvname(dos_label)

if(!data_obj$is_present(rain_day_label)) data_obj$add_rain_day_column()
rain_day_col_name = data_obj$getvname(rain_day_label)

if(is.null(fac)) {
model_string <- paste("factor() ~ ",harmonic(dos_col_name,366,h_order)) #Creates string for default case, no factor.
}
else {
if(parallel) {
model_string <- paste("factor(", rain_day_col_name, ") ~", harmonic(dos_col_name,366,h_order),"+",fac) #The parallel case.
}
else {
model_string <- paste("factor(", rain_day_col_name, ") ~", harmonic(dos_col_name,366,h_order),"*",fac)
}
}

curr_data_list=data_obj$get_data_for_analysis(data_list)
i=0
for( curr_data in curr_data_list ) {
nice_name=data_obj$get_meta(data_name_label)
if (i>0){
nice_name=paste0(nice_name,i)
}
print(model_string)
model_outputs[[nice_name]]<-glm(data = curr_data,as.formula(model_string), family=binomial, weights=weights, na.action=na.exclude) #Glm fits the data by a binomial and computes the coefficients of the model.
i=i+1
}

}
if (missing(save_model_name)) {
save_model_name=last_model_name
}
append_model_objects(save_model_name, instat_model$new("harmonic_probability_rain_model",input_parameters,"glm",model_outputs))
}
)

climate$methods(harmonic_markov_amount_rain_model = function(data,fac=NULL,h_order=4,weights=NULL, parallel=FALSE) {

md<-model(h_order,fac,parallel) #This calls the model function below.

fit1<-glm(as.formula(md), family=binomial, weights=NULL, na.action=na.exclude) #Glm fits the data by a binomial and computes the coefficients of the model.


subdata<-subset(data,wet_or_dry=="w") #This subsets a portion of the data.

n_rain<-subdata$Rain

attach(subdata, warn.conflicts=FALSE) #This creates a directory with the subset data.

md<-model1(h_order,fac,parallel) #Calls the model funtion for amounts of rainfall.


fit1<-glm(as.formula(md), family=Gamma, weights=NULL, na.action=na.exclude) #This fits the data by a gamma distribution and computes the coefficients of the model.

detach(subdata)
fit1 #This is the returned model.
}
)

#These functions generates the string for the model to be fitted.
#There are two options; the parallel model and the interacting model.
#in both cases the functions returns the string for the model to be fitted.
#model1 returns the string for rainfall amounts, and model returns the string for chance of rainfall.

model1 <- function(h_order=4,fac=NULL,parallel=FALSE) {

if(is.null(fac)) {
fm1 <- paste("as.numeric(n_rain) ~ ",harmonic(h_order)) #Creates string for default case, no factor.
}
else {
if(parallel) {
fm1 <- paste("as.numeric(n_rain) ~ ",harmonic(h_order),"+",fac) #The parallel case.
}
else {
fm1 <- paste("as.numeric(n_rain) ~ ",harmonic(h_order),"*",fac) #Including the interacting.
}
}
fm1 #The returned string for the model.

}

harmonic <- function(circ_col, circ_length,h_order=4){
sc<-NULL #Initializes sc variable.
for(i in 1:h_order){
sc<-c(sc,paste("cos(",circ_col,"*",i,"*2*pi/",circ_length,")")) #(a)these two lines generates the harmonic cols for the perodicity of the data.
sc<-c(sc,paste("sin(",circ_col,"*",i,"*2*pi/",circ_length,")"))
}
ret=paste("(",paste(sc,collapse="+"),")")
ret
}
24 changes: 24 additions & 0 deletions R/ClimateMethods/Models/model_refclass.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Defining the reference class "model"
# This reference class is a wrapper to save models created through instat or climatic object methods.
# The fields are the properties every climate_data object will have.

# method_name : The name of the instat or climate object method this model came from
# method_parameters : The list parameters used in the method call
# function_name : The name of the R function used, i.e. the type of model outputs
# output_list : A list containing the outputs to all the models created by the method


intstat_model <- setRefClass("intstat_model",
fields = list(method_name = "character", method_parameters = "list",
function_name = "character", output_list = "list")
)

intstat_model$methods(initialize = function(call_name, call_parameters, R_model_type, models) {

method_name <<- call_name
method_parameters <<- call_parameters
function_name <<- R_model_type
output_list <<- models

}
)
95 changes: 56 additions & 39 deletions R/climate_data_refclass.R
Original file line number Diff line number Diff line change
Expand Up @@ -997,6 +997,8 @@ climate_data$methods(add_spell_length_col = function(col_name = "Spell Length",
climate_data$methods(add_rain_day_column = function(col_name = "Rain Day", threshold=0.85)
{

missing_dates_check()

if(!is_present(rain_label)) stop("rain variable is required to calculate Rain day column")
rain_col = getvname(rain_label)
threshold = get_meta(threshold_label, missing(threshold), threshold)
Expand All @@ -1007,61 +1009,76 @@ climate_data$methods(add_rain_day_column = function(col_name = "Rain Day", thres

append_column_to_data(wd,col_name)
append_to_variables(rain_day_label, col_name)
append_to_variables(paste(rain_day_lag_label, "0", sep="_"), col_name)
}
)

climate_data$methods(add_rain_day_lags_column = function(lag_prefix = "Rain Day", lag_order=1)
climate_data$methods(add_rain_day_lags_column = function(lag_order=1)
{

# Complete dates needed for calculations
#Validation
if(!is.numeric(lag_order) || lag_order < 0 || lag_order%%1 != 0) stop("lag_order must be a positive integer")

missing_dates_check()

# rain_day column always needed
if(!is_present(rain_day_label)) add_rain_day_column()
rain_day_col_name = getvname(rain_day_label)

curr_data_list = get_data_for_analysis(data_info = list())
staion_lag_tables = list()
i = 1
for (curr_data in curr_data_list) {

curr_lag_data = as.data.frame(matrix(nrow=nrow(curr_data),ncol=0))

date_col = curr_data[[getvname(date_label)]]
curr_lag_data[[getvname(date_label)]] = date_col


if(!is_present(paste(rain_day_lag_label, lag_order, sep="_"))) {
rain_day_col_name = getvname(rain_day_label)

if(is_present(station_label)) {
station_col = curr_data[[getvname(station_label)]]
curr_lag_data[[getvname(station_label)]] = station_col
for(j in (lag_order-1):0) {
if(!is_present(paste(rain_day_lag_label, j, sep="_"))) {
.self$add_rain_day_lags_column(lag_order = j)
}
}

curr_lag_data[[rain_day_col_name]] = curr_data[[rain_day_col_name]]
curr_data_list = get_data_for_analysis(data_info = list())

wet_dry <- as.data.frame(matrix(nrow=nrow(curr_data),ncol=lag_order+1))
wet_dry[ ,1] = curr_data[[rain_day_col_name]]
#list of data frames to be merged and joined with original data
lag_data_frames = list()

for(n in 1:lag_order) {
i = 1
for (curr_data in curr_data_list) {

# data frame to be added to lag_data_frames
curr_lag_data = as.data.frame(matrix(nrow=nrow(curr_data),ncol=0))

date_col = curr_data[[getvname(date_label)]]
curr_lag_data[[getvname(date_label)]] = date_col

if(is_present(station_label)) {
station_col = curr_data[[getvname(station_label)]]
curr_lag_data[[getvname(station_label)]] = station_col
}

wet_dry[ ,n+1] = c(NA,wet_dry[1:(nrow(curr_data)-1),n])
lags=do.call("paste",c(as.list(wet_dry[,1:n+1]),sep=""))
lags[grep("NA",lags)] <- NA
print(lags)
prev_lag_col = curr_data[[getvname(paste(rain_day_lag_label, lag_order-1, sep="_"))]]
prev_lag_tail = tail(prev_lag_col, nrow(curr_data)-lag_order)

lag_0_col = curr_data[[getvname(paste(rain_day_lag_label, "0", sep="_"))]]
lag_0_head = head(lag_0_col, nrow(curr_data)-lag_order)

new_col = c(rep(NA, lag_order), paste0(prev_lag_tail, lag_0_head))
if(sum(is.na(prev_lag_tail)) > 0 || sum(is.na(lag_0_head))) {
new_col[grep("NA", new_col)] <- NA
}

col_name = paste(rain_day_default_col_name, lag_order)
curr_lag_data[[paste(rain_day_default_col_name, lag_order)]] = new_col

lagname=paste(lag_prefix, n)
curr_lag_data[[lagname]] = sapply(lags,function(x) substring(x,2))
lag_data_frames[[i]] = curr_lag_data
i = i + 1

}

staion_lag_tables[[i]] <- curr_lag_data
i = i + 1
}


lags_merge = do.call("rbind",staion_lag_tables)
.self$join_data(lags_merge, match="first", type="full", by = c(getvname(date_label), getvname(station_label)))

for(n in 1:(lag_order)) {
laglabel = paste(rain_day_lag_label, n, sep="_")
lagname = paste(lag_prefix, n, sep="_")
append_to_variables(laglabel, lagname)
lags_merge = do.call("rbind",lag_data_frames)
if(is_present(station_label)) {
.self$join_data(lags_merge, match="first", type="full", by = c(getvname(date_label), getvname(station_label)))
}
else {
.self$join_data(lags_merge, match="first", type="full", by = getvname(date_label))
}
append_to_variables(paste(rain_day_lag_label, lag_order, sep="_"), col_name)
}
}
)
Expand Down
16 changes: 14 additions & 2 deletions R/climate_refclass.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

climate <- setRefClass("climate",
fields = list(climate_data_objects = "list", used_data_objects = "list",
meta_data = "list")
meta_data = "list", models = "list")
)

# INITIALIZE method
Expand Down Expand Up @@ -41,6 +41,7 @@ climate$methods(initialize = function(data_tables = list(), climate_obj_meta_dat

meta_data <<- climate_obj_meta_data
used_data_objects <<- list()
models <<- list()

if (missing(data_tables) || length(data_tables) == 0) {
climate_data_objects <<- list()
Expand Down Expand Up @@ -337,6 +338,18 @@ climate$methods(append_used_data_objects = function(data, data_name, meta_data =
}
)

climate$methods(append_model_objects = function(name, obj) {
if( !class(name) == "character") {
stop("name must be a character")
}

if ( !class(obj) == "instat_model") {
stop("obj must be a instat_model object")
}

climate_data_objects[[name]] <<- obj
}
)
# is_present_or_meta can check if a given variable name (or list of variable names) is in the data.frame or the meta_data or neither.
# This will be used by other functions particularly related to station level data such as latitude, longditude etc.
# TO DO check functionality for missing cols and if there are multiple elements in long format (currently will return true even if there are no instances possibly correct as like returning true when all values are missing?)
Expand All @@ -352,7 +365,6 @@ climate$methods(is_present_or_meta = function(str, require_all=TRUE, require_in_
}
)


# Other methods
#############################################################################################
# All analysis functions will be methods of climate objects and NOT climate_data objects.
Expand Down
6 changes: 4 additions & 2 deletions R/labels_and_defaults.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ dd_label = "dry_day"
rd_short_label = "w"
dd_short_label = "d"
rain_amount_label = "rain_amount"

rain_day_default_col_name = "Rain Day"

start_of_label="start_of"
end_of_label="end_of"
Expand Down Expand Up @@ -129,6 +129,9 @@ Added_metadata="Added metadata"
Converted_col_="Converted column"
Replaced_value="Replaced value"

#Other defaults
last_model_name="last_model"

# Try to identify columns automatically
# This function is called in climate_data initialize method.

Expand Down Expand Up @@ -342,7 +345,6 @@ add_defaults <- function (imported_from,user) {
if(!(wind_speed_label %in% names(merged))) merged[[wind_speed_label]]<-"Wind speed"
if(!(wind_direction_label %in% names(merged))) merged[[wind_direction_label]]<-"Wind direction"
if(!(rain_day_label %in% names(merged))) merged[[rain_day_label]]<-"Rain Day"
if(!(rain_day_lag_label %in% names(merged))) merged[[rain_day_lag_label]]<-"Rain Day"
return(merged)
}

Expand Down

0 comments on commit da3d17d

Please sign in to comment.