Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue53 checksum fail #54

Merged
merged 2 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 160 additions & 32 deletions R/readAxivity.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desiredtz = "",
configtz = c(), interpolationType = 1, loadbattery = FALSE,
header = NULL, frequency_tol = 0.1) {
header = NULL, frequency_tol = 0.1,
maxAllowedCorruptBlocks = 20) {
if (length(configtz) == 0) configtz = desiredtz
blockBytes = 512
headerBytes = 1024

# Credits: The original version of this code developed outside GitHub was
# contributed by Dr. Evgeny Mirkes (Leicester University, UK)
#========================================================================
Expand Down Expand Up @@ -75,6 +77,28 @@
return(NULL)
}

# sampling rate in one of file format U8 at offset 24
samplerate_dynrange = readBin(block[25], integer(), size = 1, signed = FALSE)

if (samplerate_dynrange != 0) { # Very old files that have zero at offset 24 don't have a checksum
checksum = sum(readBin(block, n = 256,
integer(),
size = 2,
signed = FALSE,
endian = "little"))
checksum = checksum %% 65536 # 65536 = 2^16; the checksum is calculated as a 16-bit integer
if (checksum != 0) {
# Checksum doesn't match. This means some bits in this block got corrupted.
# We don't know which, so we can't trust this block. We skip it, and impute it later.
rawdata_list = list(
struc = struc,
parameters = parameters,
checksum_pass = FALSE
)
return(invisible(rawdata_list))
}
}

idstr = readChar(block, 2, useBytes = TRUE)
if (idstr != "AX") {
stop("Packet header is incorrect. First two characters must be AX.")
Expand Down Expand Up @@ -114,22 +138,6 @@
} else {
battery = 0
}
# sampling rate in one of file format U8 at offset 24
samplerate_dynrange = readBin(block[25], integer(), size = 1, signed = FALSE)

checksum_pass = TRUE
if (samplerate_dynrange != 0) { # Very old files that have zero at offset 24 don't have a checksum
# Perform checksum
checksum = sum(readBin(block, n = 256,
integer(),
size = 2,
signed = FALSE,
endian = "little"))
checksum = checksum %% 65536 # equals 2^16 the checksum is calculated on a 16bit integer
if (checksum != 0) {
checksum_pass = FALSE
}
}

# offset 25, per documentation:
# "top nibble: number of axes, 3=Axyz, 6=Gxyz/Axyz, 9=Gxyz/Axyz/Mxyz;
Expand Down Expand Up @@ -244,7 +252,7 @@
length = blockLength,
struc = struc,
parameters = parameters,
checksum_pass = checksum_pass,
checksum_pass = TRUE,
blockID = blockID
)
if (complete) {
Expand Down Expand Up @@ -302,14 +310,33 @@
accrange = bitwShiftR(16, (bitwShiftR(samplerate_dynrange, 6)))
version = readBin(block[42], integer(), size = 1, signed = FALSE) #offset 41

# Read the first data block without data
datas = readDataBlock(fid, complete = FALSE)
if (is.null(datas)) {
stop("Error reading the first data block.")
# Read the first data block without data.
# Skip any corrupt blocks, up to maxAllowedCorruptBlocks in number.
is_corrupt = TRUE
for (ii in 0:min(numDBlocks-1, maxAllowedCorruptBlocks)) {
datas = readDataBlock(fid, complete = FALSE)

if (is.null(datas)) {
stop("Error reading the first data block.")

Check warning on line 320 in R/readAxivity.R

View check run for this annotation

Codecov / codecov/patch

R/readAxivity.R#L320

Added line #L320 was not covered by tests
}

if (datas$checksum_pass) {
is_corrupt = FALSE
break
}

warning("Skipping corrupt block #", ii)
}
if (is_corrupt) {
if (ii==numDBlocks-1) {
stop("Error reading file. Every block is corrupt.")

Check warning on line 332 in R/readAxivity.R

View check run for this annotation

Codecov / codecov/patch

R/readAxivity.R#L331-L332

Added lines #L331 - L332 were not covered by tests
}
stop("Error reading file. The first ", maxAllowedCorruptBlocks+1, " blocks are corrupt.")

Check warning on line 334 in R/readAxivity.R

View check run for this annotation

Codecov / codecov/patch

R/readAxivity.R#L334

Added line #L334 was not covered by tests
}

if (frequency_header != datas$frequency) {
warning("Inconsistent value of measurement frequency: there is ",
frequency_header, " in header and ", datas$frequency, " in the first data block ")
frequency_header, " in header and ", datas$frequency, " in the first data block. ")

Check warning on line 339 in R/readAxivity.R

View check run for this annotation

Codecov / codecov/patch

R/readAxivity.R#L339

Added line #L339 was not covered by tests
}

blockLength = datas$length # number of samples in a block
Expand Down Expand Up @@ -340,6 +367,9 @@
on.exit({
close(fid)
})

QClog = NULL # Initialise log of data quality issues

#############################################################################
# read header
if (is.null(header)) {
Expand Down Expand Up @@ -368,14 +398,59 @@
struc = list(0,0L,0)
if (end < numDBlocks) { # the end block isn't part of the data we'll read, but its start will be our ending timestamp
seek(fid, headerBytes + blockBytes * end, origin = 'start')
endBlock = readDataBlock(fid, struc = struc)

# Skip any corrupt blocks, up to maxAllowedCorruptBlocks in number.
# Skip forward, so we'll end up reading more blocks than requested.
# This is important because when corrupt blocks are in the beginning of the requested interval,
# we also skip forward, ending up with fewer blocks than requested.
# for example, if we have a file with corrupt blocks 8-12, and we read the file 10 blocks at a time,
# then the 1st request will end up with blocks 1-12, and the 2nd request with blocks 13-20,
# and between the two requests, all 20 blocks will be accounted for.
is_corrupt = TRUE
for (ii in end : min(numDBlocks-1, end+maxAllowedCorruptBlocks)) {
endBlock = readDataBlock(fid, struc = struc)

if (endBlock$checksum_pass) {
is_corrupt = FALSE
break
}

warning("Skipping corrupt end block #", ii)
end = end + 1
}
if (is_corrupt && ii == end+maxAllowedCorruptBlocks) {
stop("Error reading file. The last ", maxAllowedCorruptBlocks+1, " blocks are corrupt.")

Check warning on line 422 in R/readAxivity.R

View check run for this annotation

Codecov / codecov/patch

R/readAxivity.R#L422

Added line #L422 was not covered by tests
}
endTimestamp = as.numeric(endBlock$start)
} else {
}

if (end == numDBlocks) {
# end == numDBlocks, meaning we'll be reading all the remaining blocks.
# There is no block #numDBlocks, so we can't get the ending timestamp from the start of that block.
# Instead read the very last block of the file, and project what the ending timestamp should be.
seek(fid, headerBytes + blockBytes * (end-1), origin = 'start')
lastBlock = readDataBlock(fid, struc = struc)
# Instead read the very last block of the file (if the last block is corrupt, fing the last non-corrupt one),
# then project what the ending timestamp should be.

# Skip any corrupt blocks, up to maxAllowedCorruptBlocks in number.
# SInce we are at the last block, we have to go backwards.
is_corrupt = TRUE
for (ii in (end-1) : max(start, end-maxAllowedCorruptBlocks-1)) {
seek(fid, headerBytes + blockBytes * ii, origin = 'start')
lastBlock = readDataBlock(fid, struc = struc)

if (lastBlock$checksum_pass) {
is_corrupt = FALSE
break
}

warning("Skipping corrupt end block #", ii)
end = end - 1
}
if (is_corrupt) {
if (start == end) {
stop("Error reading file. All requested blocks are corrupt.")
}
stop("Error reading file. The last ", maxAllowedCorruptBlocks+1, " blocks are corrupt.")
}
# the end timestamp should fall right after the actual very last timestamp of the file
endTimestamp = as.numeric(lastBlock$start) + blockLength * step
# now pad it generously in case there are gaps in the last block
Expand All @@ -386,7 +461,36 @@
# Reinitiate file and skip header as well as the initial start-1 blocks
seek(fid, headerBytes + blockBytes * start, origin = 'start')
pos = 1 # position of the first element to complete in data
prevRaw = readDataBlock(fid, struc = struc)

# Skip any corrupt blocks, up to maxAllowedCorruptBlocks in number.
is_corrupt = TRUE
for (ii in 1 : min((end-start), maxAllowedCorruptBlocks+1)) {
prevRaw = readDataBlock(fid, struc = struc)

if (prevRaw$checksum_pass) {
is_corrupt = FALSE
break
}
QClog = rbind(QClog, data.frame(checksum_pass = FALSE,
blockID_current = start, # the actual block ID wasn't read b/c the block is corrupt
blockID_next = start+1,
start = 0,
end = 0,
blockLengthSeconds = 0,
frequency_blockheader = 0,
frequency_observed = 0,
imputed = FALSE))

warning("Skipping corrupt start block #", start)
start = start + 1
}
if (is_corrupt) {
if (start == end) {
stop("Error reading file. All requested blocks are corrupt.")
}
stop("Error reading file. The first ", maxAllowedCorruptBlocks+1, " blocks are corrupt.")

Check warning on line 491 in R/readAxivity.R

View check run for this annotation

Codecov / codecov/patch

R/readAxivity.R#L491

Added line #L491 was not covered by tests
}

if (is.null(prevRaw)) {
return(invisible(list(header = header, data = NULL)))
}
Expand Down Expand Up @@ -414,7 +518,7 @@
rawTime = vector(mode = "numeric", blockLength + 2)
rawPos = 1

QClog = NULL # Initialise log of data quality issues
consecutiveCorrupt = 0

# Read the data
for (ii in (start+1):end) {
Expand All @@ -431,6 +535,31 @@
}
else { # read a new block
raw = readDataBlock(fid, struc = struc, parameters = prevRaw$parameters)
if (!raw$checksum_pass) {
# If the checksum doesn't match, we can't trust any of this block's data,
# so we have to completely skip the block.
# Depending on the nature of the faulty block, the data for the time period it represented
# will probably get imputed later, once we encounter a block with a valid checksum.
QClog = rbind(QClog, data.frame(checksum_pass = FALSE,
blockID_current = ii, # the actual block ID wasn't read b/c the block is corrupt
blockID_next = ii + 1,
start = 0,
end = 0,
blockLengthSeconds = 0,
frequency_blockheader = 0,
frequency_observed = 0,
imputed = FALSE))
warning("Skipping corrupt block #", ii)

consecutiveCorrupt = consecutiveCorrupt + 1
if (consecutiveCorrupt > maxAllowedCorruptBlocks) {
stop("Error reading file. ", maxAllowedCorruptBlocks+1, " consecutive blocks are corrupt.")
}

next
}

consecutiveCorrupt = 0

if (is.null(raw)) {
# this shouldn't happen
Expand Down Expand Up @@ -472,7 +601,6 @@
if ((ii < numDBlocks &&
raw$blockID != 0 &&
raw$blockID - prevRaw$blockID != 1) ||
prevRaw$checksum_pass == FALSE ||
frequency_bias > frequency_tol) {
# Log and impute this event
doQClog = TRUE
Expand All @@ -499,7 +627,7 @@
}
if (doQClog == TRUE) {
# Note: This is always a description of the previous block
QClog = rbind(QClog, data.frame(checksum_pass = prevRaw$checksum_pass,
QClog = rbind(QClog, data.frame(checksum_pass = TRUE,
blockID_current = prevRaw$blockID,
blockID_next = raw$blockID,
start = prevRaw$start,
Expand Down
Binary file not shown.
5 changes: 4 additions & 1 deletion man/readAxivity.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
\usage{
readAxivity(filename, start = 0, end = 0, progressBar = FALSE,
desiredtz = "", configtz = c(), interpolationType=1, loadbattery = FALSE,
header = NULL, frequency_tol = 0.1)
header = NULL, frequency_tol = 0.1, maxAllowedCorruptBlocks = 20)
}
\arguments{
\item{filename}{
Expand Down Expand Up @@ -55,6 +55,9 @@
differs by more than 5\%, but if this is less than frequency_tol the block will
not be imputed.
}
\item{maxAllowedCorruptBlocks}{
Max number of consecutive blocks with a failed checksum that we'll tolerate.
}

}
\value{
Expand Down
Loading
Loading