Skip to content

Commit

Permalink
add function to read and merge Philips Health Band xlsx file pairs #68
Browse files Browse the repository at this point in the history
  • Loading branch information
vincentvanhees committed Sep 27, 2024
1 parent 43428d4 commit 33a8b2c
Show file tree
Hide file tree
Showing 8 changed files with 135 additions and 3 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ URL: https://github.com/wadpac/GGIRread/
BugReports: https://github.com/wadpac/GGIRread/issues
License: Apache License (== 2.0)
Suggests: testthat
Imports: matlab, bitops, Rcpp (>= 0.12.10), data.table
Imports: matlab, bitops, Rcpp (>= 0.12.10), data.table, readxl
Depends: stats, utils, R (>= 3.5.0)
NeedsCompilation: yes
LinkingTo: Rcpp
Expand Down
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ useDynLib(GGIRread, .registration = TRUE)
importFrom(Rcpp, sourceCpp)
importFrom(data.table, fread)
importFrom("utils", "setTxtProgressBar", "txtProgressBar")
importFrom("utils", "read.csv")
importFrom("utils", "read.csv", "write.csv")
importFrom("utils", "available.packages")
5 changes: 4 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
- Added a `NEWS.md` file to track changes to the package.
- Stops interactive calling of `chooseCRANmirror` on `.onAttach` if interactive and CRAN mirror not set GGIR #1141.
- Migrate read function for ActiGraph (csv) and Actiwatch (csv/awd) count data to GGIRread #68.
- Add read function for Actical (csv) count data #68.
- Add function for reading Actical (csv) count data #68.
- Add function to read and merge Philips Health Band file pairs (xlsx) #68.


# Changes in version 1.0.1 (release date:03-06-2024)

- Progress bar fixed, issue #63 (credits: John Muschelli)


# Changes in version 1.0.0 (release date:27-03-2024)

- GENEActiv no longer prints error to console when more data is requested
Expand Down
66 changes: 66 additions & 0 deletions R/mergePHBfilePairs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
mergePHBfilePairs = function(inputPath = ".", outputPath = ".") {
# merges Philips Health Band xlsx files per participant
# as there can be multiple files per participant.

fnames = dir(inputPath, recursive = FALSE, full.names = TRUE, pattern = "[.]xlsx")
fileOverview = data.frame(filename = fnames)

extractID = function(x) {
x = basename(x)
x = gsub(pattern = "sleep_wake", replacement = "sleepwake", x = tolower(x))
ID = unlist(strsplit(x, "_"))[2]
return(ID)
}
fileOverview$ID = unlist(lapply(fileOverview$filename, FUN = extractID))

uids = unique(fileOverview$ID)
for (uid in uids) {
filesForThisPerson = fileOverview$filename[which(fileOverview$ID == uid)]
# Identify both file
file1 = grep(pattern = "datalist", x = filesForThisPerson, ignore.case = TRUE)
file2 = grep(pattern = "sleep_wake", x = filesForThisPerson, ignore.case = TRUE)
if (length(file1) == 0 && length(file2) == 0) {
next
}
# Data
if (length(file1) > 0) {
data1 = as.data.frame(readxl::read_excel(path = filesForThisPerson[file1],
col_types = "text", skip = 8),
row.names = FALSE)
header = as.data.frame(readxl::read_excel(path = filesForThisPerson[file1],
col_types = "text", n_max = 8,
.name_repair = "unique_quiet"),
row.names = FALSE)[, 1]
SNlocation = grep(pattern = "deviceSN", x = header)
if (length(SNlocation) > 0) {
deviceSN = unlist(strsplit(header[grep(pattern = "deviceSN", x = header)], " "))
deviceSN = deviceSN[length(deviceSN)]
} else {
deviceSN = NULL
}
colnames(data1)[grep(pattern = "counts", x = colnames(data1), ignore.case = TRUE)] = "counts"
colnames(data1)[grep(pattern = "offWrist", x = colnames(data1), ignore.case = TRUE)] = "nonwear"
}
# Sleep wake scores
if (length(file2) > 0) {
data2 = as.data.frame(readxl::read_excel(path = filesForThisPerson[file2], col_types = "text", skip = 8), row.names = FALSE)
colnames(data2)[grep(pattern = "sleepWake", x = colnames(data2), ignore.case = TRUE)] = "sleep"
}
if (length(file1) > 0 && length(file2) > 0) {
data2 = data2[, which(colnames(data2) != "sleepEventMarker")]
data = merge(data1, data2, by = "timeStamp")
} else {
if (length(file1) > 0) {
data = data1
} else {
data = data2
}
}
colnames(data)[grep(pattern = "timeStamp", x = colnames(data))] = "timestamp"
newName = gsub(pattern = "Sleep_Wake", replacement = "def", x = basename(filesForThisPerson[file2]))
newName = paste0(unlist(strsplit(newName, "[.]")) , collapse = paste0("_", deviceSN, "."))
newName = gsub(pattern = "xlsx", replacement = "csv", x = newName)
outputfile = paste0(outputPath, "/", newName)
write.csv(x = data, file = outputfile, row.names = FALSE)
}
}
Binary file not shown.
Binary file not shown.
30 changes: 30 additions & 0 deletions man/mergePHBfilePairs.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
\name{mergePHBfilePairs}
\alias{mergePHBfilePairs}
\title{
Merge Philips Health Band xlsx file pairs into one csv file
}
\description{
Read, merge, and save Philips Health Band xlsx file pairs to csv.
If the pair is incomplete the function will still convert the available
xlsx file to csv.
}
\usage{
mergePHBfilePairs(inputPath = ".", outputPath = ".")
}
\arguments{
\item{inputPath}{
Character, file path where xlsx files are located that need to be merged.
Assumption is that there is either one file starting with "Datalist_", one starting
with "Sleep_Wake_", or both for each recording.
}
\item{outputPath}{
Character, path to store the merge output
}
}
\value{
Function does not output values. Instead it stores the merged data as file in the location
as specified by outputPath.
}
\author{
Vincent T van Hees <v.vanhees@accelting.com>
}
33 changes: 33 additions & 0 deletions tests/testthat/test_mergePHBfilePairs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
library(GGIRread)
context("merge PHB files")
test_that("merging of PHB files goes correctly", {
# prepare folder with test files
file1 = system.file("testfiles/DataList_AH1234567890_PhilipsHealthBand.xlsx", package = "GGIRread")
file2 = system.file("testfiles/Sleep_Wake_AH1234567890_PhilipsHealthBand.xlsx", package = "GGIRread")
path = "./phb_test_folder"
if (!dir.exists(path)) {
dir.create(path)
}
invisible(file.copy(from = file1, to = path, overwrite = TRUE))
invisible(file.copy(from = file2, to = path, overwrite = TRUE))

# apply function to merge the files
mergePHBfilePairs(inputPath = path, outputPath = path)

# check whether merged file exists
newfiles = dir(path, full.names = TRUE)
newFile = newfiles[grep(pattern = "def_AH1234567890_PhilipsHealthBand_000100621938.csv", x = newfiles)]
expect_true(length(newFile) == 1)

# check file content
data = read.csv(newFile)
expect_equal(nrow(data), 246)
expect_equal(ncol(data), 19)
expect_equal(sum(data$counts), 50898)
expect_equal(sum(data$steps), 1)
expect_equal(data$timestamp[1], "11-05-2022 01:10:00")

# clean up
if (dir.exists(path)) unlink(path, recursive = TRUE)
})

0 comments on commit 33a8b2c

Please sign in to comment.