Dashboard updates - 15/04/2024

EVANSTATS · Apr 16, 2024 · 9bcfaa1 · 9bcfaa1
1 parent ca792cd
commit 9bcfaa1
Show file tree

Hide file tree

Showing 15 changed files with 478 additions and 1 deletion.
diff --git a/.Rproj.user/DC486B7A/pcs/windowlayoutstate.pper b/.Rproj.user/DC486B7A/pcs/windowlayoutstate.pper
@@ -0,0 +1,14 @@
+{
+    "left": {
+        "splitterpos": 300,
+        "topwindowstate": "NORMAL",
+        "panelheight": 647,
+        "windowheight": 685
+    },
+    "right": {
+        "splitterpos": 334,
+        "topwindowstate": "NORMAL",
+        "panelheight": 647,
+        "windowheight": 685
+    }
+}
diff --git a/.Rproj.user/DC486B7A/sources/prop/56D2A599 b/.Rproj.user/DC486B7A/sources/prop/56D2A599
@@ -0,0 +1,6 @@
+{
+    "source_window_id": "",
+    "Source": "Source",
+    "cursorPosition": "85,0",
+    "scrollLine": "76"
+}
diff --git a/.Rproj.user/DC486B7A/sources/prop/7A1F73DB b/.Rproj.user/DC486B7A/sources/prop/7A1F73DB
@@ -0,0 +1,6 @@
+{
+    "source_window_id": "",
+    "Source": "Source",
+    "cursorPosition": "118,29",
+    "scrollLine": "115"
+}
diff --git a/.Rproj.user/DC486B7A/sources/prop/DC8F4729 b/.Rproj.user/DC486B7A/sources/prop/DC8F4729
@@ -0,0 +1,6 @@
+{
+    "source_window_id": "",
+    "Source": "Source",
+    "cursorPosition": "32,39",
+    "scrollLine": "24"
+}
diff --git a/.Rproj.user/DC486B7A/sources/session-6291cfb0/119FC69B-contents b/.Rproj.user/DC486B7A/sources/session-6291cfb0/119FC69B-contents
@@ -0,0 +1,71 @@
+# Function to call matomo data
+calling_matomo_data <- function(date_from = "2023-10-25", date_to = "2024-10-25", data = plhdata_org,
+                                type = c("none", "EFM_KE", "EFM_not_KE"),
+                                token = token_matomo){
+  type <- match.arg(type)
+
+  if (type == "EFM_KE"){
+    segment_name <- "segment=pageTitle%3D%3DEarly%252520Family%252520Math;countryCode%3D%3Dke"
+  } else if (type == "EFM_not_KE"){
+    segment_name <- "segment=pageTitle%3D%3DEarly%252520Family%252520Math;countryCode!%3Dke"
+  } else {
+    segment_name <- "segment="
+  }
+
+  json_file <- paste0("https://apps-server.idems.international/analytics/index.php?apiAction=getUsers&apiModule=UserId&date=", date_from, ",", date_to, "&expanded=1&filter_limit=-1&format=JSON&idSite=1&method=API.getProcessedReport&module=API&period=range&", segment_name, "&token_auth=", token)
+
+  json_data <- jsonlite::fromJSON(txt=json_file, flatten = TRUE)
+
+  our_data <- json_data$reportData
+  names(our_data) <- c("UUID", "Visits", "Actions", "C", "D", "Actions per visit", "Avg. Time on Website", "Bounce Rate")
+  our_data <- our_data %>% dplyr::select(-c("C", "D"))
+  our_data$`Bounce proportion` <- as.numeric(as.character(stringr::str_split(our_data$`Bounce Rate`, "%", simplify = TRUE)[,1]))/100
+  our_data <- our_data %>% mutate(Bounce = round(`Bounce proportion` * Visits, 0)) %>% dplyr::select(-c("Bounce Rate"))
+  our_data$`Avg. Time on Website` <- period_to_seconds(hms(x = our_data$`Avg. Time on Website`, format = "%H:%M:%S"))[1:length(our_data$`Avg. Time on Website`)]
+  our_data$`Time on Website` <- our_data$`Avg. Time on Website` * our_data$Visits # this is calculated so can be out by 10 seconds or so
+
+  valid_uuid <- data[["app_user_id"]]
+  #data <- data %>% dplyr::select(c(UUID = app_user_id))
+  our_data <- our_data %>% filter(UUID %in% valid_uuid)
+
+  return(our_data)
+}
+
+
+# Functions
+# function to fix up namings to make it a bit prettier!
+naming_conventions <- function(x, replace, replace_after) {
+  if (!missing(replace)){
+    x <- gsub(paste("^.*?", replace, ".*", sep = ""), "", x)
+  }
+  if (!missing(replace_after)){
+    x <- gsub(paste(replace_after, "$", sep = ""), "", x)
+  }
+  substr(x, 1, 1) <- toupper(substr(x, 1, 1))
+  x <- gsub("_", " ", x)
+  x
+}
+
+# so we can use "add_na_variable" to add into the data variables which are not in the data
+# but will be at some point
+# this function checks if the variable is in the data.
+# If it is not in the data, then it adds it as a new variable with all NAs.
+add_na_variable <- function(data = contacts_unflat, variable){
+  for (names in variable) {
+    if (!names %in% colnames(data)) {
+      data[, names] <- NA
+      warning(paste(names, "does not exist. Adding NAs"))
+    }
+  }
+  return(data)
+}
+
+# Function to count dates in each element of the vector
+count_dates <- function(x) {
+  if (is.na(x)) {
+    return(0)
+  } else {
+    dates <- unlist(strsplit(x, ";"))
+    return(length(dates))
+  }
+}
diff --git a/.Rproj.user/DC486B7A/sources/session-6291cfb0/205BB7E0-contents b/.Rproj.user/DC486B7A/sources/session-6291cfb0/205BB7E0-contents
@@ -0,0 +1,131 @@
+#devtools::install_github("IDEMSInternational/postgresr")
+#devtools::install_github("IDEMSInternational/plhR")
+
+data_l <- import_list("EFM_shiny.xlsx")
+
+# Download data ----------------------------------------------------------------
+#  download EFM app data from Metabase as an RDS file?
+
+plhdata_org <- postgresr::get_user_data(site = plh_con, filter = FALSE)
+
+# plhdata_org <- get_user_data(filter_variable = "app_deployment_name",
+#                              filter_variable_value = "early_family_math",
+#                              site = plh_con, merge_check = FALSE, filter = TRUE)
+#names(plhdata_org) <- gsub(x = names(plhdata_org), pattern = "\\-", replacement = ".")  
+#View(plhdata_org)
+
+mydate <- "2023-12-15"
+plhdata_org <- plhdata_org %>% filter(as.Date(createdAt) > as.Date(mydate))
+
+# filter to web users?
+plhdata_org <- plhdata_org %>% dplyr::filter(nchar(app_user_id) == 16)
+
+# filter to individuals from Kenya
+token_matomo <- read.table("token_matomo", quote="\"", comment.char="")
+efm_kenya <- calling_matomo_data(type = "EFM_KE")
+kenyan_ids <- efm_kenya$UUID
+plhdata_org <- plhdata_org %>% dplyr::filter(app_user_id %in% kenyan_ids)
+
+# COUNTING the number of clicks --------------------------------------------------------------------
+### Creating counts 
+#x <- c("2023-11-24T09:28:59 ; 2023-11-24T09:30:22", NA, "2023-11-27T14:45:52")
+
+# Apply the function to each element of the vector
+
+# METHOD 1: sapply
+# plhdata_org1 <- plhdata_org %>%
+#   mutate(count_activities_button_click_history = 
+#            sapply(`rp-contact-field.activities_button_click_history`, count_dates))
+# plhdata_org1 %>% dplyr::select(count_activities_button_click_history, `rp-contact-field.activities_button_click_history`)
+
+# METHOD 2: purrr
+# plhdata_org2 <- plhdata_org %>%
+#   mutate(count_activities_button_click_history_purrr = 
+#            purrr::map_dbl(.x = `rp-contact-field.activities_button_click_history`,
+#                           .f = ~ count_dates(.x)))
+# 
+# plhdata_org2 %>% dplyr::select(count_activities_button_click_history, count_activities_button_click_history_purrr, rp.contact.field.activities_button_click_history) %>% View()
+# 
+
+# METHOD 3: multiple columns
+plhdata_org <- plhdata_org %>%
+  mutate(across(ends_with("_click_history"), # put in here a set of variables.
+                .names = "{.col}_count",     # rename the new variables
+                ~ sapply(.x, count_dates)))  # apply count_dates to them.
+#plhdata_org_3 %>% View()
+
+
+
+######################################### Hello - fixes and comments #######################################
+
+# we got an error in shiny:
+# Caused by error in `.data[["rp-contact-field.efm_sb_Cat_And_Dog_And_The_Ball_book_click_history"]]`:
+#   ! Column `rp-contact-field.efm_sb_Cat_And_Dog_And_The_Ball_book_click_history` not found in `.data`.
+
+# lets check if these variables exist
+
+vars_to_check <- data_l$storybooks$variable
+plhdata_org <- add_na_variable(data = plhdata_org, variable = vars_to_check)
+
+# WARNING: Please please check when you do this - this code creates the variable and fills with NAs if it
+# is not in the data set.
+# However, it may be that this variable is a typo, and so therefore you need to check the typo and fix it that way.
+# Please make sure of this. I've noticed some typos already - e.g. you said
+# app_last_launch, not rp-contact-field.app_last_launch. I've fixed this on run_plhr_shiny.R
+
+#### Fix namings for rp-contact-field.current_book
+# We just want to remove "data.efm_storybooks.efm_sb_" from our names
+plhdata_org$`rp-contact-field.current_book` <- naming_conventions(plhdata_org$`rp-contact-field.current_book`,
+                                                                  "data.efm_storybooks.efm_sb_")
+
+
+#TODO: remove data column in spreadsheet
+plhdata_org$`rp-contact-field._server_sync_latest` <- lubridate::as_date(plhdata_org$`rp-contact-field._server_sync_latest`)
+plhdata_org$`app_last_launch` <- plhdata_org$`rp-contact-field.app_last_launch`
+plhdata_org$`app_launch_count` <- plhdata_org$`rp-contact-field.app_launch_count`
+
+#App last sync
+plhdata_org <- plhdata_org %>%
+  mutate(synced_7_days = ifelse(`rp-contact-field._server_sync_latest` >= as.Date(lubridate::now(tzone = "UTC")) - 7,
+                                1,0))
+
+plhdata_org <- plhdata_org %>%
+  mutate(synced_7_14_days = ifelse(`rp-contact-field._server_sync_latest` >= as.Date(lubridate::now(tzone = "UTC")) - 14 &
+                                     `rp-contact-field._server_sync_latest` < as.Date(lubridate::now(tzone = "UTC")) - 7,
+                                   1,0))
+
+plhdata_org <- plhdata_org %>%
+  mutate(synced_14_30_days = ifelse(`rp-contact-field._server_sync_latest` >= as.Date(lubridate::now(tzone = "UTC")) - 30 &
+                                     `rp-contact-field._server_sync_latest` < as.Date(lubridate::now(tzone = "UTC")) - 14,
+                                   1,0))
+
+plhdata_org <- plhdata_org %>%
+  mutate(synced_more_than_30_days = ifelse(`rp-contact-field._server_sync_latest` < as.Date(lubridate::now(tzone = "UTC")) - 30,
+                                           1,0))
+
+plhdata_org$app_last_launch <- as.Date(plhdata_org$app_last_launch)
+
+# # App last launch - line graph
+# plhdata_org$app_last_launch <- as.Date(plhdata_org$app_last_launch)
+# 
+# # Creating a data frame of the last lauched dates
+# app_last_launch_data <- plhdata_org %>%
+#   filter(!is.na(app_last_launch)) %>% 
+#   group_by(app_last_launch) %>% 
+#   summarise(frequency = n())
+
+# Creating the line graph
+# ggplot(app_last_launch_data) + 
+#   geom_line(aes(x = app_last_launch, y = frequency)) +
+#   geom_point(aes(x = app_last_launch, y = frequency)) + 
+#   labs(x = "Date", y = "Frequency", title = "Frequency of Values by Date")
+# 
+
+
+# 15/04/2024 - Evans removed "App Launch History" from demographics tab.
+
+
+
+
+
+
diff --git a/.Rproj.user/DC486B7A/sources/session-6291cfb0/5437C517-contents b/.Rproj.user/DC486B7A/sources/session-6291cfb0/5437C517-contents
@@ -0,0 +1,38 @@
+#install.packages("RPostgres")
+library(RPostgres)
+library(DBI)
+
+#' AIM: We want to be able to call the original data, clean it / remove the irrelevant ones
+#' and then save that so we can call just this reduced data set.
+
+#Connect to Database to get original data
+plh_con <- DBI::dbConnect(RPostgres::Postgres(),
+                          dbname = 'early_family_math',
+                          host = 'apps-server.idems.international',
+                          port = 5432,
+                          user = 'early_family_math',
+                          password = 'nPmtPjhi2HuQDz')
+
+# TODO: explore do a query (filter)
+#DBI::dbReadTable(conn = plh_con, name = "app_users")
+
+
+#Connect to Database to write cleaned data
+# parent_app_con <- dbConnect(RPostgres::Postgres(),
+#                             dbname = 'parent_app',
+#                             host = 'apps-server.idems.international',
+#                             port = 5432,
+#                             user = 'parent_app',
+#                             password = 'parent_app')
+# 
+# parent_app_tables <- dbListTables(plh_con)
+
+# x <- system.time(DBI::dbReadTable(conn = plh_con, name = "app_users"))
+# 
+# y <- system.time(DBI::dbReadTable(conn = parent_app_con, name = "plhdata_org_clean"))
+# 
+# x; y
+
+#dbListTables(parent_app_con)
+#plhdata_org <- DBI::dbReadTable(conn = parent_app_con, name = "plhdata_org_clean")
+#plhdata_org <- DBI::dbReadTable(conn = parent_app_con, name = "Cleaned PLH data")
diff --git a/.Rproj.user/DC486B7A/sources/session-6291cfb0/578901C3 b/.Rproj.user/DC486B7A/sources/session-6291cfb0/578901C3
@@ -0,0 +1,26 @@
+{
+    "id": "578901C3",
+    "path": "~/GitHub/EFMDataScripts/app.R",
+    "project_path": "app.R",
+    "type": "r_source",
+    "hash": "0",
+    "contents": "",
+    "dirty": false,
+    "created": 1712756115410.0,
+    "source_on_save": false,
+    "relative_order": 1,
+    "properties": {
+        "source_window_id": "",
+        "Source": "Source",
+        "cursorPosition": "32,39",
+        "scrollLine": "24"
+    },
+    "folds": "",
+    "lastKnownWriteTime": 1712821990,
+    "encoding": "UTF-8",
+    "collab_server": "",
+    "source_window": "",
+    "last_content_update": 1712821990792,
+    "read_only": false,
+    "read_only_alternatives": []
+}
diff --git a/.Rproj.user/DC486B7A/sources/session-6291cfb0/578901C3-contents b/.Rproj.user/DC486B7A/sources/session-6291cfb0/578901C3-contents
@@ -0,0 +1,40 @@
+# Template file to run PLHR changes.
+library(rio)
+library(plhR)
+library(shiny)
+#library(shinythemes)
+library(shinyjs)
+library(plotly)
+library(shinydashboard)
+library(jsonlite)
+library(rjson)
+library(here)     
+library(ggplot2)
+library(tibble)
+library(stringr)
+library(forcats)
+library(lubridate)
+library(purrr)
+library(tidyr)
+library(dplyr)
+library(gt)
+library(readxl)
+library(postgresr)
+library(ggthemes)
+library(shinyauthr)
+
+# R files where we call and tidy the data
+source("Personal Setup.R")
+source("Functions.R")
+source("EFM_loading_data.R")
+source("Credentials_data.R")
+
+# Excel file with the specifications in it
+data_l <- import_list("EFM_shiny.xlsx")
+#data_l$contents <- data_l$contents[1:3,]
+
+# Run the shiny dashboard
+PLH_shiny(title = "EFM Research",
+          data_list = data_l,
+          data_frame = plhdata_org,
+          status = "primary")