Early warnings 23_01_24

Prueba alertas tempranas para documentacion
PEM-Humboldt · Jan 23, 2024 · f980597 · f980597
1 parent 91f6e98
commit f980597
Show file tree

Hide file tree

Showing 586 changed files with 9,619 additions and 295 deletions.
diff --git a/pipelines/Early_warnings temperature.json b/pipelines/Early_warnings temperature.json
@@ -0,0 +1,194 @@
+{
+  "nodes": [
+    {
+      "id": "0",
+      "type": "io",
+      "position": {
+        "x": 162.39999389648438,
+        "y": 195.60000610351562
+      },
+      "data": {
+        "descriptionFile": "00_studyarea_to_WKT2>studyarea_to_WKT.yml"
+      }
+    },
+    {
+      "id": "1",
+      "type": "constant",
+      "position": {
+        "x": -149.60000610351562,
+        "y": 154.60000610351562
+      },
+      "dragHandle": ".dragHandle",
+      "data": {
+        "type": "text",
+        "value": "/scripts/00_studyarea_to_WKT/input/Boyaca.gpkg"
+      }
+    },
+    {
+      "id": "2",
+      "type": "constant",
+      "position": {
+        "x": -115.60000610351562,
+        "y": 299.6000061035156
+      },
+      "dragHandle": ".dragHandle",
+      "data": {
+        "type": "int",
+        "value": 3395
+      }
+    },
+    {
+      "id": "4",
+      "type": "constant",
+      "position": {
+        "x": 298.3999938964844,
+        "y": 339.6000061035156
+      },
+      "dragHandle": ".dragHandle",
+      "data": {
+        "type": "text",
+        "value": "/scripts/00_load_collection/input/Temperature_collection"
+      }
+    },
+    {
+      "id": "6",
+      "type": "io",
+      "position": {
+        "x": 1236.3999938964844,
+        "y": 160.60000610351562
+      },
+      "data": {
+        "descriptionFile": "00_Early_Warnings_2>Early_Warnings.yml"
+      }
+    },
+    {
+      "id": "7",
+      "type": "output",
+      "position": {
+        "x": 1673.3999938964844,
+        "y": 160.60000610351562
+      },
+      "data": {
+        "label": "Output"
+      }
+    },
+    {
+      "id": "8",
+      "type": "io",
+      "position": {
+        "x": 768.3999938964844,
+        "y": 235.60000610351562
+      },
+      "data": {
+        "descriptionFile": "00_load_collection2>load_collection.yml"
+      }
+    }
+  ],
+  "edges": [
+    {
+      "source": "1",
+      "sourceHandle": null,
+      "target": "0",
+      "targetHandle": "studyarea_path",
+      "id": "reactflow__edge-1-0studyarea_path"
+    },
+    {
+      "source": "2",
+      "sourceHandle": null,
+      "target": "0",
+      "targetHandle": "studyarea_epsg",
+      "id": "reactflow__edge-2-0studyarea_epsg"
+    },
+    {
+      "source": "6",
+      "sourceHandle": "grafica1_trend",
+      "target": "7",
+      "targetHandle": null,
+      "id": "reactflow__edge-6grafica1_trend-7"
+    },
+    {
+      "source": "0",
+      "sourceHandle": "val_wkt_path",
+      "target": "8",
+      "targetHandle": "wkt_area",
+      "id": "reactflow__edge-0val_wkt_path-8wkt_area"
+    },
+    {
+      "source": "2",
+      "sourceHandle": null,
+      "target": "8",
+      "targetHandle": "epsg",
+      "id": "reactflow__edge-2-8epsg"
+    },
+    {
+      "source": "4",
+      "sourceHandle": null,
+      "target": "8",
+      "targetHandle": "collection_path",
+      "id": "reactflow__edge-4-8collection_path"
+    },
+    {
+      "source": "8",
+      "sourceHandle": "dir_stack",
+      "target": "6",
+      "targetHandle": "dir_collection",
+      "id": "reactflow__edge-8dir_stack-6dir_collection"
+    }
+  ],
+  "inputs": {
+    "00_Early_Warnings_2>Early_Warnings.yml@6|fun_summary": {
+      "description": "fun_summary",
+      "label": "fun_summary",
+      "type": "text",
+      "example": "mean"
+    },
+    "00_Early_Warnings_2>Early_Warnings.yml@6|projection_factor": {
+      "description": "projection_factor",
+      "label": "projection_factor",
+      "type": "int",
+      "example": 2
+    },
+    "00_Early_Warnings_2>Early_Warnings.yml@6|threshold": {
+      "description": "threshold",
+      "label": "threshold",
+      "type": "text"
+    },
+    "00_Early_Warnings_2>Early_Warnings.yml@6|threshold_trend": {
+      "description": "threshold_trend",
+      "label": "threshold_trend",
+      "type": "text",
+      "example": "mean(x[1:20])+1.5"
+    },
+    "00_load_collection2>load_collection.yml@8|resolution": {
+      "description": "resolution",
+      "label": "resolution",
+      "type": "int",
+      "example": 1000
+    },
+    "00_load_collection2>load_collection.yml@8|time_period": {
+      "description": "time_period",
+      "label": "time period",
+      "type": "text",
+      "example": "P1Y"
+    },
+    "00_load_collection2>load_collection.yml@8|time_start": {
+      "description": "time_start",
+      "label": "start of time interval",
+      "type": "text",
+      "example": "NA"
+    },
+    "00_load_collection2>load_collection.yml@8|time_end": {
+      "description": "time_end",
+      "label": "end of time interval",
+      "type": "text",
+      "example": "NA"
+    }
+  },
+  "outputs": {
+    "00_Early_Warnings_2>Early_Warnings.yml@6|grafica1_trend": {
+      "description": "grafica1_trend",
+      "label": "grafica1_trend",
+      "type": "image/jpg"
+    }
+  }
+}
diff --git a/scripts/00_Early_Warnings/Early_Warnings.R b/scripts/00_Early_Warnings/Early_Warnings.R
@@ -0,0 +1,154 @@
+#### Load required packages - libraries to run the script ####
+
+# Install necessary libraries - packages  
+packagesPrev<- installed.packages()[,"Package"] # Check and get a list of installed packages in this machine and R version
+packagesNeed<- list("magrittr", "terra", "raster", "sf", "fasterize", "pbapply", "this.path", "rjson", "ggplot2", "ggpubr", "plotly") # Define the list of required packages to run the script
+lapply(packagesNeed, function(x) {   if ( ! x %in% packagesPrev ) { install.packages(x, force=T)}    }) # Check and install required packages that are not previously installed
+
+# Load libraries
+packagesList<-list("magrittr", "terra", "ggplot2", "raster") # Explicitly list the required packages throughout the entire routine. Explicitly listing the required packages throughout the routine ensures that only the necessary packages are listed. Unlike 'packagesNeed', this list includes packages with functions that cannot be directly called using the '::' syntax. By using '::', specific functions or objects from a package can be accessed directly without loading the entire package. Loading an entire package involves loading all the functions and objects 
+lapply(packagesList, library, character.only = TRUE)  # Load libraries - packages  
+
+
+#### Set enviroment variables ####
+
+# Set the 'outputFolder' environment variables
+# The environment variable refers to the path of the output folder for the entire routine
+# 'outputFolder' allows interaction with the output folder of the routine and access to nearby paths, such as the input folder ('input')
+
+# There are two options to set 'outputFolder'
+# Please select only one of the two options, while silencing the other with the '#' syntaxis: 
+
+# Option 1: Setting for production pipeline purposes. This is designed for use in a production environment or workflow.
+Sys.setenv(outputFolder = "/path/to/output/folder")
+
+# Option 2: Recommended for debugging purposes to be used as a testing environment. This is designed to facilitate script testing and correction
+if ( (!exists("outputFolder"))  ) {
+  outputFolder<- {x<- this.path::this.path();  file_prev<-  paste0(gsub("/scripts.*", "/output", x), gsub("^.*/scripts", "", x)  ); options<- tools::file_path_sans_ext(file_prev) %>% {c(., paste0(., ".R"), paste0(., "_R"))}; folder_out<- options %>% {.[file.exists(.)]} %>% {.[which.max(sapply(., function(info) file.info(info)$mtime))]}; folder_final<- list.files(folder_out, full.names = T) %>% {.[which.max(sapply(., function(info) file.info(info)$mtime))]} }
+}
+
+
+
+# Set the 'input' environment variables. The 'input' environment contains the specified inputs from the ByB platform.
+# The input file 'input.json' is generated by executing the 'Run Script' command in the ByB platform.
+input <- rjson::fromJSON(file=file.path(outputFolder, "input.json")) # Load input file
+
+# This section adjusts the input values based on specific conditions to rectify and prevent errors in the input paths
+input<- lapply(input, function(x) { if (!is.null(x) && length(x) > 0 && grepl("/", x)) { 
+  sub("/output/.*", "/output", outputFolder) %>% dirname() %>%  file.path(x) %>% {gsub("//+", "/", .)}  } else{x} }) 
+
+
+
+
+####  Script body ####
+
+  # Ajustar inputs
+  projection_factor<- input$projection_factor %>%  {if(is.null(.)){1} else {.}}
+  projection_factor<- projection_factor %>%  {if(.<=1){1.2} else {.}}
+  threshold_function<- function(x){ eval(parse(text = input$threshold_trend)) }
+
+
+  # Cargar coleccion
+  setwd(input$dir_collection)
+  files<- list.files(input$dir_collection, pattern= "\\.tif$") %>% {.[!. %in% "area.tif"]}
+
+
+  info_layer<- list.files(input$dir_collection, pattern= "\\info_layer.csv$") %>% read.csv()
+  stack_layers<- terra::rast(files)
+
+  data_time<- data.frame(layer= names(stack_layers)) %>% list(dplyr::select(info_layer, c(layer, start, period))) %>% plyr::join_all(match = "first")
+  times<- as.Date(data_time$start)
+
+  result_table<- terra::global(stack_layers, fun= input$fun_summary, na.rm=T) %>% {dplyr::mutate(data.frame(Period= seq(nrow(.)), time= times), value = .[,1])} 
+
+  #Make the linear regression for the previous data 
+  model<- lm(value~Period , data=result_table,na.action = na.exclude)
+
+  #Add the predicted values to final results table
+  result_table<- result_table %>% dplyr::mutate(fitted.values= model$fitted.values)
+
+
+  #definir umbral
+  threshold<- {if(is.null(input$threshold)){ threshold_function(x= result_table$value) ############# validar con luisfer este valor. Define the threshold of temperature, this is the mean of the first 20 times plus 1.5 
+  } else { input$threshold }
+  }
+
+  #Define the difference between real predicted values and the threshold
+  result_table<- result_table %>% dplyr::mutate(Difference= fitted.values-threshold)
+
+  #Generate the graphic result 
+  grafica1<- ggplot(result_table, aes(time, value))+ geom_point() + geom_smooth(method = "lm")+
+    geom_hline(yintercept = threshold, linetype = 3, color = 10, lwd = 1)
+
+
+  #Prediction to time
+  max_period<- max(result_table$Period)
+  projection_period<- max_period %>%  {  data.frame(Period= seq( .+1, .* projection_factor,   1))  }
+
+
+
+  n_trend<- median(diff(result_table$time), na.rm = T); n_na<- is.na(result_table$time)
+
+  prediction_data<- projection_period %>% {cbind(., data.frame(value= predict(model, newdata = .)))} %>% 
+    dplyr::mutate(time= seq( max(na.omit(result_table$time)) + n_trend,  by= n_trend,  length.out= nrow(.) ) )
+
+  projection_data <- list(dplyr::select(result_table, names(prediction_data)),prediction_data) %>% plyr::rbind.fill()
+
+
+
+
+  grafica1<- ggplot(projection_data, aes(time, value)) +geom_point() + geom_smooth(method = "lm")+
+    geom_hline(yintercept = threshold, linetype = 3, color = 10, lwd = 1)
+
+
+
+  #Convert the stack of raster layers into a dataframe, where the columns are each of the pixels and the columns are each of the years
+  table_raster<- as.data.frame(stack_layers,xy=T)
+
+  #Identify those rows (coordinates) that have more than four data, two for coordinates and two to be able to do the regression
+  table_raster$n_data<- apply(table_raster,1,FUN = function(x){sum(!is.na(x))})
+
+  #Only the rows that have four data are taken and the last row that is the one that records this data is removed
+  table_raster<- table_raster[table_raster$n_data>=4,1:(ncol(table_raster)-1)]
+
+  #Function for estimate trend
+  trend_function <- function(y){ x=1:length(y);
+  reg=lm(y~x, na.action=na.exclude) #apply linear model and exclude NAs;
+  return(reg$coefficients[2])
+  }
+
+  #Generate the tren tif
+  result<- table_raster
+  result$trend<- apply(result[,3:ncol(result)], 1, trend_function)
+
+  #Set the snap raster
+  raster_snap=raster(file.path(input$dir_collection, files[1]))
+  #Generate coordinates for the dataframe
+  coordinates(result)=~x+y
+  #Generate the trend tif
+  trend_raster<- rasterize(result,raster_snap,field="trend")
+
+
+  # Exportar resultados
+
+
+  grafica1_trend_path<- file.path(outputFolder, "trendplot.jpg")
+  ggsave( grafica1_trend_path, grafica1, dpi = 300, bg = "white")
+
+  trend_raster_path<- file.path(outputFolder, "trend.tif")
+  writeRaster(trend_raster, trend_raster_path, options="COMPRESS=DEFLATE", overwrite=T)
+
+  result_table_path<- file.path(outputFolder, "result_table.csv")
+  write.csv( projection_data, result_table_path )
+
+
+  # Define final output list
+  output<- list(grafica1_trend= grafica1_trend_path, result_table = result_table_path , threshold = threshold, trend_raster= trend_raster_path )
+
+
+
+
+#### Outputing result to JSON ####
+# Write the output list to the 'output.json' file in JSON format
+setwd(outputFolder)
+jsonlite::write_json(output, "output.json", auto_unbox = TRUE, pretty = TRUE)