add function to read and merge Philips Health Band xlsx file pairs #68

wadpac · Sep 27, 2024 · 33a8b2c · 33a8b2c
1 parent 43428d4
commit 33a8b2c
Show file tree

Hide file tree

Showing 8 changed files with 135 additions and 3 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -22,7 +22,7 @@ URL: https://github.com/wadpac/GGIRread/
 BugReports: https://github.com/wadpac/GGIRread/issues
 License: Apache License (== 2.0)
 Suggests: testthat
-Imports: matlab, bitops, Rcpp (>= 0.12.10), data.table
+Imports: matlab, bitops, Rcpp (>= 0.12.10), data.table, readxl
 Depends: stats, utils, R (>= 3.5.0)
 NeedsCompilation: yes
 LinkingTo: Rcpp

diff --git a/NAMESPACE b/NAMESPACE
@@ -6,5 +6,5 @@ useDynLib(GGIRread, .registration = TRUE)
 importFrom(Rcpp, sourceCpp)
 importFrom(data.table, fread)
 importFrom("utils", "setTxtProgressBar", "txtProgressBar")
-importFrom("utils", "read.csv")
+importFrom("utils", "read.csv", "write.csv")
 importFrom("utils", "available.packages")
diff --git a/NEWS.md b/NEWS.md
@@ -3,12 +3,15 @@
 - Added a `NEWS.md` file to track changes to the package.
 - Stops interactive calling of `chooseCRANmirror` on `.onAttach` if interactive and CRAN mirror not set GGIR #1141.
 - Migrate read function for ActiGraph (csv) and Actiwatch (csv/awd) count data to GGIRread #68.
-- Add read function for Actical (csv) count data #68.
+- Add function for reading Actical (csv) count data #68.
+- Add function to read and merge Philips Health Band file pairs (xlsx) #68.
+
 
 # Changes in version 1.0.1 (release date:03-06-2024)
 
 - Progress bar fixed, issue #63 (credits: John Muschelli)
 
+
 # Changes in version 1.0.0 (release date:27-03-2024)
 
 - GENEActiv no longer prints error to console when more data is requested 

diff --git a/R/mergePHBfilePairs.R b/R/mergePHBfilePairs.R
@@ -0,0 +1,66 @@
+mergePHBfilePairs = function(inputPath = ".", outputPath = ".") {
+  # merges Philips Health Band xlsx files per participant
+  # as there can be multiple files per participant.
+
+  fnames = dir(inputPath, recursive = FALSE, full.names = TRUE, pattern = "[.]xlsx")
+  fileOverview = data.frame(filename = fnames)
+
+  extractID = function(x) {
+    x = basename(x)
+    x = gsub(pattern = "sleep_wake", replacement = "sleepwake", x = tolower(x))
+    ID = unlist(strsplit(x, "_"))[2]
+    return(ID)
+  }
+  fileOverview$ID = unlist(lapply(fileOverview$filename, FUN = extractID))
+
+  uids = unique(fileOverview$ID)
+  for (uid in uids) {
+    filesForThisPerson = fileOverview$filename[which(fileOverview$ID == uid)]
+    # Identify both file
+    file1 = grep(pattern = "datalist", x = filesForThisPerson, ignore.case = TRUE)
+    file2 = grep(pattern = "sleep_wake", x = filesForThisPerson, ignore.case = TRUE)
+    if (length(file1) == 0 && length(file2) == 0) {
+      next
+    }
+    # Data
+    if (length(file1) > 0) {
+      data1 = as.data.frame(readxl::read_excel(path = filesForThisPerson[file1], 
+                                               col_types = "text", skip = 8),
+                            row.names = FALSE)
+      header = as.data.frame(readxl::read_excel(path = filesForThisPerson[file1], 
+                                                col_types = "text", n_max = 8,
+                                                .name_repair = "unique_quiet"),
+                             row.names = FALSE)[, 1]
+      SNlocation = grep(pattern = "deviceSN", x = header)
+      if (length(SNlocation) > 0) {
+        deviceSN = unlist(strsplit(header[grep(pattern = "deviceSN", x = header)], " "))
+        deviceSN = deviceSN[length(deviceSN)]
+      } else {
+        deviceSN = NULL
+      }
+      colnames(data1)[grep(pattern = "counts", x = colnames(data1), ignore.case = TRUE)] = "counts"
+      colnames(data1)[grep(pattern = "offWrist", x = colnames(data1), ignore.case = TRUE)] = "nonwear"
+    }
+    # Sleep wake scores
+    if (length(file2) > 0) {
+      data2 = as.data.frame(readxl::read_excel(path = filesForThisPerson[file2], col_types = "text", skip = 8), row.names = FALSE)
+      colnames(data2)[grep(pattern = "sleepWake", x = colnames(data2), ignore.case = TRUE)] = "sleep"
+    }
+    if (length(file1) > 0 && length(file2) > 0) {
+      data2 = data2[, which(colnames(data2) != "sleepEventMarker")]
+      data = merge(data1, data2, by = "timeStamp")
+    } else {
+      if (length(file1) > 0) {
+        data = data1
+      } else {
+        data = data2
+      }
+    }
+    colnames(data)[grep(pattern = "timeStamp", x = colnames(data))] = "timestamp"
+    newName = gsub(pattern = "Sleep_Wake", replacement = "def", x =  basename(filesForThisPerson[file2]))
+    newName = paste0(unlist(strsplit(newName, "[.]")) , collapse = paste0("_", deviceSN, "."))
+    newName = gsub(pattern = "xlsx", replacement = "csv", x = newName)
+    outputfile = paste0(outputPath, "/", newName)
+    write.csv(x = data, file = outputfile, row.names = FALSE)
+  }
+}
diff --git a/inst/testfiles/DataList_AH1234567890_PhilipsHealthBand.xlsx b/inst/testfiles/DataList_AH1234567890_PhilipsHealthBand.xlsx
diff --git a/inst/testfiles/Sleep_Wake_AH1234567890_PhilipsHealthBand.xlsx b/inst/testfiles/Sleep_Wake_AH1234567890_PhilipsHealthBand.xlsx
diff --git a/man/mergePHBfilePairs.Rd b/man/mergePHBfilePairs.Rd
@@ -0,0 +1,30 @@
+\name{mergePHBfilePairs}
+\alias{mergePHBfilePairs}
+\title{
+  Merge Philips Health Band xlsx file pairs into one csv file
+}
+\description{
+  Read, merge, and save Philips Health Band xlsx file pairs to csv.
+  If the pair is incomplete the function will still convert the available
+  xlsx file to csv.
+}
+\usage{
+  mergePHBfilePairs(inputPath = ".", outputPath = ".")
+}
+\arguments{
+  \item{inputPath}{
+    Character, file path where xlsx files are located that need to be merged. 
+    Assumption is that there is either one file starting with "Datalist_", one starting
+    with "Sleep_Wake_", or both for each recording.
+  }
+  \item{outputPath}{
+    Character, path to store the merge output
+  }
+}
+\value{
+  Function does not output values. Instead it stores the merged data as file in the location
+  as specified by outputPath.
+}
+\author{
+  Vincent T van Hees <v.vanhees@accelting.com>
+}
diff --git a/tests/testthat/test_mergePHBfilePairs.R b/tests/testthat/test_mergePHBfilePairs.R
@@ -0,0 +1,33 @@
+library(GGIRread)
+context("merge PHB files")
+test_that("merging of PHB files goes correctly", {
+  # prepare folder with test files
+  file1 = system.file("testfiles/DataList_AH1234567890_PhilipsHealthBand.xlsx", package = "GGIRread")
+  file2 = system.file("testfiles/Sleep_Wake_AH1234567890_PhilipsHealthBand.xlsx", package = "GGIRread")
+  path = "./phb_test_folder"
+  if (!dir.exists(path)) {
+    dir.create(path)
+  }
+  invisible(file.copy(from = file1, to = path, overwrite = TRUE))
+  invisible(file.copy(from = file2, to = path, overwrite = TRUE))
+
+  # apply function to merge the files
+  mergePHBfilePairs(inputPath = path, outputPath = path)
+
+  # check whether merged file exists
+  newfiles = dir(path, full.names = TRUE)
+  newFile = newfiles[grep(pattern = "def_AH1234567890_PhilipsHealthBand_000100621938.csv", x = newfiles)]
+  expect_true(length(newFile) == 1)
+
+  # check file content
+  data = read.csv(newFile)
+  expect_equal(nrow(data), 246)
+  expect_equal(ncol(data), 19)
+  expect_equal(sum(data$counts), 50898)
+  expect_equal(sum(data$steps), 1)
+  expect_equal(data$timestamp[1], "11-05-2022 01:10:00")
+
+  # clean up
+  if (dir.exists(path))  unlink(path, recursive = TRUE)
+})
+