forked from hanhanwu/Hanhan_Data_Science_Practice
-
Notifications
You must be signed in to change notification settings - Fork 0
/
time_series_forecasting_data_preprocessing.R
55 lines (41 loc) · 2.13 KB
/
time_series_forecasting_data_preprocessing.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# DATA PREPRECESSING MODULE
cadairydata <- maml.mapInputPort(1)
# cadairydata <- read.csv("cadairydata.csv", header = TRUE, stringsAsFactors = FALSE)
# Ensure the coding is consistent and convert column to a factor
cadairydata$Month <- as.factor(substr(cadairydata$Month, 1, 3))
# remove the first 2 columns
cadairydata <- cadairydata[, c(-1, -2)]
# add column, for time series forescating, count months from the starting year
num.month <- function(Year, Month) {
## Find the starting year
min.year <- min(Year)
## Compute the number of months from the start of the time series
12 * (Year - min.year) + Month - 1
}
cadairydata$Month.Count <- num.month(cadairydata$Year, cadairydata$Month.Number)
# value transformation with defensive programming
log.transform <- function(invec, multiplier = 1) {
## Function for the transformation, which is the log
## of the input value times a multiplier
warningmessages <- c("ERROR: Non-numeric argument encountered in function log.transform",
"ERROR: Arguments to function log.transform must be greate than zero",
"ERROR: Aggurment multiplier to funcition log.transform must be a scaler",
"ERROR: Invalid time seies value encountered in function log.transform"
)
## Check the input arguments
if(!is.numeric(invec) | !is.numeric(multiplier)) {warning(warningmessages[1]); return(NA)}
if(any(invec < 0.0) | any(multiplier < 0.0)) {warning(warningmessages[2]); return(NA)}
if(length(multiplier) != 1) {{warning(warningmessages[3]); return(NA)}}
## Wrap the multiplication in tryCatch
## If there is an exception, print the warningmessage to
## standard error and return NA
tryCatch(log(multiplier * invec),
error = function(e){warning(warningmessages[4]); NA})
}
## Apply the transformation function to the 4 columns
## of the dataframe with production data
multipliers <- list(1.0, 6.5, 1000.0, 1000.0)
cadairydata[, 4:7] <- Map(log.transform, cadairydata[, 4:7], multipliers)
## Get rid of any rows with NA values
cadairydata <- na.omit(cadairydata)
maml.mapOutputPort('cadairydata')