Skip to content

Commit

Permalink
Merge pull request #84 from chorus-ai/format-dicom
Browse files Browse the repository at this point in the history
DICOM Fidelity Check failure
  • Loading branch information
briangow authored Jul 25, 2024
2 parents 93ce7aa + 74778e8 commit c1c5c93
Show file tree
Hide file tree
Showing 6 changed files with 463 additions and 257 deletions.
4 changes: 3 additions & 1 deletion BENCHMARK.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ The benchmarking script includes a simple set of metrics to evaluate the perform
The [waveform_benchmark.py](./waveform_benchmark.py) script is the entrypoint for running benchmarks. This script calls functions in the `waveform_benchmark` package. The syntax for running waveform_benchmark.py from the command line is:

```
./waveform_benchmark.py -r <PATH_TO_RECORD> -f <PATH_TO_BENCHMARK_CLASS> -p <PHYSIONET_DATABASE_PATH> [-m]
./waveform_benchmark.py -r <PATH_TO_RECORD> -f <PATH_TO_BENCHMARK_CLASS> -p <PHYSIONET_DATABASE_PATH> [--test_only] [-m]
```

The `-p` argument can be used to pull a file directly from a PhysioNet database but isn't needed when running on a local file. For example, to run the `WFDBFormat516` benchmark on local record `data/waveforms/mimic_iv/waves/p100/p10079700/85594648/85594648`:
Expand All @@ -18,6 +18,8 @@ The `-p` argument can be used to pull a file directly from a PhysioNet database
./waveform_benchmark.py -r ./data/waveforms/mimic_iv/waves/p100/p10079700/85594648/85594648 -f waveform_benchmark.formats.wfdb.WFDBFormat516
```

The `--test_only` flag can be used to turn off the read performance benchmarking thus run only the fidelity tests.

An example output is provided below:

```
Expand Down
5 changes: 5 additions & 0 deletions waveform_benchmark/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ def main():
ap.add_argument('--waveform_suite_summary_file', '-w',
default='waveform_suite_benchmark_summary.csv',
help='Save a CSV summary of the waveform suite run to this path/file')
ap.add_argument('--test_only',
default=False, action='store_true',
help='Run only the tests, do not run the benchmarks')
ap.add_argument('--memory_profiling', '-m',
default=False, type=bool, action=argparse.BooleanOptionalAction,
help='Run memory profiling on the benchmarking process')
Expand Down Expand Up @@ -117,6 +120,7 @@ def main():
waveform_list=waveform_list,
test_list=test_list,
result_list=result_list,
test_only = opts.test_only,
mem_profile = opts.memory_profile)

save_summary(format_list, waveform_list, test_list, result_list, opts.waveform_suite_summary_file)
Expand All @@ -126,6 +130,7 @@ def main():
run_benchmarks(input_record=opts.input_record,
format_class=opts.format_class,
pn_dir=opts.physionet_directory,
test_only = opts.test_only,
mem_profile = opts.memory_profiling)

# Close the log file after the run is complete
Expand Down
187 changes: 94 additions & 93 deletions waveform_benchmark/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,14 @@ def _run_read_test(fmt, path, total_length, all_channels, block_length, block_co
def _run_read_test_1channel(fmt, path, total_length, all_channels, block_length, block_count,
test_min_dur = 10, test_min_iter = 3, mem_profile = False):
counters = []
r = random.Random(12345)
for i in repeat_test(test_min_dur, test_min_iter):
r = random.Random(12345)
with PerformanceCounter(mem_profile = mem_profile) as pc:
for j in range(block_count):
t0 = r.random() * (total_length - block_length)
t1 = t0 + block_length
fmt().read_waveforms(path, t0, t1, all_channels)
c = r.choice(all_channels)
fmt().read_waveforms(path, t0, t1, [c])
counters.append(pc)
return counters

Expand Down Expand Up @@ -87,7 +88,7 @@ def compute_snr(reference_signal, output_signal):


def run_benchmarks(input_record, format_class, pn_dir=None, format_list=None, waveform_list=None, test_list=None,
result_list=None, mem_profile = False):
result_list=None, mem_profile = False, test_only = False):

# Load the class we will be testing
module_name, class_name = format_class.rsplit('.', 1)
Expand Down Expand Up @@ -249,7 +250,7 @@ def run_benchmarks(input_record, format_class, pn_dir=None, format_list=None, wa
# use numpy's isclose to determine floating point equality
isgood = np.isclose(filedata_nonan, data_nonan, atol=0.5/chunk['gain'])
numgood = np.sum(isgood)
fpeq_rel = numgood/len(data_nonan)
fpeq_rel = numgood/len(data_nonan) if len(data_nonan) != 0 else np.nan

# compute SNR to quantify signal fidelity
snr = compute_snr(data_nonan, filedata_nonan)
Expand All @@ -265,97 +266,97 @@ def run_benchmarks(input_record, format_class, pn_dir=None, format_list=None, wa
print(filedata_nonan[~isgood][:10])
print(f"(Gain: {chunk['gain']})")
# print('_' * 64)
print('_' * 64)
print('Read performance (median of N trials):')
if (mem_profile):
print(' #seek #read KiB CPU(s) Wall(s) Mem(MB)(used/maxrss/malloced) [N]')
else:
print(' #seek #read KiB CPU(s) Wall(s) [N]')

for block_length, block_count in TEST_BLOCK_LENGTHS:

# time1 = time.time()
counters = []
if (mem_profile):
mem_usage, counters = memory_usage((_run_read_test, (fmt, path, total_length, all_channels, block_length, block_count), {'test_min_dur': TEST_MIN_DURATION, 'test_min_iter': TEST_MIN_ITERATIONS, 'mem_profile': mem_profile}), include_children = True, max_usage = True, retval = True)
else:
counters = _run_read_test(fmt, path, total_length, all_channels, block_length, block_count, test_min_dur = TEST_MIN_DURATION, test_min_iter = TEST_MIN_ITERATIONS, mem_profile = mem_profile)
# walltime = time.time() - time1

if (mem_profile):
print('%6.0f %6.0f %8.0f %8.4f %8.4f %8.4f/%8.4f/%8.4f %6s read %d x %.0fs, all channels'
% (median_attr(counters, 'n_seek_calls'),
median_attr(counters, 'n_read_calls'),
median_attr(counters, 'n_bytes_read') / 1024,
median_attr(counters, 'cpu_seconds'),
median_attr(counters, 'walltime'),
# walltime / len(counters),
mem_usage,
median_attr(counters, 'max_rss'),
median_attr(counters, 'malloced'),
'[%d]' % len(counters),
block_count,
block_length))
else:
print('%6.0f %6.0f %8.0f %8.4f %8.4f %6s read %d x %.0fs, all channels'
% (median_attr(counters, 'n_seek_calls'),
median_attr(counters, 'n_read_calls'),
median_attr(counters, 'n_bytes_read') / 1024,
median_attr(counters, 'cpu_seconds'),
median_attr(counters, 'walltime'),
'[%d]' % len(counters),
block_count,
block_length))
if format_list is not None:
# Append read time result
format_list, waveform_list, test_list, result_list = append_result(format_class, input_record,
f'{block_count}_all',
median_attr(counters, 'cpu_seconds'),
format_list,
waveform_list, test_list,
result_list)

for block_length, block_count in TEST_BLOCK_LENGTHS:
# time1 = time.time()
counters = []
if not test_only:
print('_' * 64)
print('Read performance (median of N trials):')
if (mem_profile):
mem_usage, counters = memory_usage((_run_read_test_1channel, (fmt, path, total_length, all_channels, block_length, block_count), {'test_min_dur': TEST_MIN_DURATION, 'test_min_iter': TEST_MIN_ITERATIONS, 'mem_profile': mem_profile}), include_children = True, max_usage = True, retval = True)
print(' #seek #read KiB CPU(s) Wall(s) Mem(MB)(used/maxrss/malloced) [N]')
else:
counters = _run_read_test_1channel(fmt, path, total_length, all_channels, block_length, block_count, test_min_dur = TEST_MIN_DURATION, test_min_iter = TEST_MIN_ITERATIONS, mem_profile = mem_profile)
# walltime = time.time() - time1

if (mem_profile):
print('%6.0f %6.0f %8.0f %8.4f %8.4f %8.4f/%8.4f/%8.4f %6s read %d x %.0fs, one channel'
% (median_attr(counters, 'n_seek_calls'),
median_attr(counters, 'n_read_calls'),
median_attr(counters, 'n_bytes_read') / 1024,
median_attr(counters, 'cpu_seconds'),
# walltime / len(counters),
median_attr(counters, 'walltime'),
mem_usage,
median_attr(counters, 'max_rss'),
median_attr(counters, 'malloced'),
'[%d]' % len(counters),
block_count,
block_length))
else:
print('%6.0f %6.0f %8.0f %8.4f %8.4f %6s read %d x %.0fs, one channel'
% (median_attr(counters, 'n_seek_calls'),
median_attr(counters, 'n_read_calls'),
median_attr(counters, 'n_bytes_read') / 1024,
median_attr(counters, 'cpu_seconds'),
median_attr(counters, 'walltime'),
# walltime / len(counters),
'[%d]' % len(counters),
block_count,
block_length))
if format_list:
format_list, waveform_list, test_list, result_list = append_result(format_class, input_record,
f'{block_count}_one',
median_attr(counters, 'cpu_seconds'),
format_list,
waveform_list, test_list,
result_list)
print(' #seek #read KiB CPU(s) Wall(s) [N]')

for block_length, block_count in TEST_BLOCK_LENGTHS:

# time1 = time.time()
counters = []
if (mem_profile):
mem_usage, counters = memory_usage((_run_read_test, (fmt, path, total_length, all_channels, block_length, block_count), {'test_min_dur': TEST_MIN_DURATION, 'test_min_iter': TEST_MIN_ITERATIONS, 'mem_profile': mem_profile}), include_children = True, max_usage = True, retval = True)
else:
counters = _run_read_test(fmt, path, total_length, all_channels, block_length, block_count, test_min_dur = TEST_MIN_DURATION, test_min_iter = TEST_MIN_ITERATIONS, mem_profile = mem_profile)
# walltime = time.time() - time1

if (mem_profile):
print('%6.0f %6.0f %8.0f %8.4f %8.4f %8.4f/%8.4f/%8.4f %6s read %d x %.0fs, all channels'
% (median_attr(counters, 'n_seek_calls'),
median_attr(counters, 'n_read_calls'),
median_attr(counters, 'n_bytes_read') / 1024,
median_attr(counters, 'cpu_seconds'),
median_attr(counters, 'walltime'),
# walltime / len(counters),
mem_usage,
median_attr(counters, 'max_rss'),
median_attr(counters, 'malloced'),
'[%d]' % len(counters),
block_count,
block_length))
else:
print('%6.0f %6.0f %8.0f %8.4f %8.4f %6s read %d x %.0fs, all channels'
% (median_attr(counters, 'n_seek_calls'),
median_attr(counters, 'n_read_calls'),
median_attr(counters, 'n_bytes_read') / 1024,
median_attr(counters, 'cpu_seconds'),
median_attr(counters, 'walltime'),
'[%d]' % len(counters),
block_count,
block_length))
if format_list is not None:
# Append read time result
format_list, waveform_list, test_list, result_list = append_result(format_class, input_record,
f'{block_count}_all',
median_attr(counters, 'cpu_seconds'),
format_list,
waveform_list, test_list,
result_list)

for block_length, block_count in TEST_BLOCK_LENGTHS:
counters = []
if (mem_profile):
mem_usage, counters = memory_usage((_run_read_test_1channel, (fmt, path, total_length, all_channels, block_length, block_count), {'test_min_dur': TEST_MIN_DURATION, 'test_min_iter': TEST_MIN_ITERATIONS, 'mem_profile': mem_profile}), include_children = True, max_usage = True, retval = True)
else:
counters = _run_read_test_1channel(fmt, path, total_length, all_channels, block_length, block_count, test_min_dur = TEST_MIN_DURATION, test_min_iter = TEST_MIN_ITERATIONS, mem_profile = mem_profile)
# walltime = time.time() - time1

if (mem_profile):
print('%6.0f %6.0f %8.0f %8.4f %8.4f %8.4f/%8.4f/%8.4f %6s read %d x %.0fs, one channels'
% (median_attr(counters, 'n_seek_calls'),
median_attr(counters, 'n_read_calls'),
median_attr(counters, 'n_bytes_read') / 1024,
median_attr(counters, 'cpu_seconds'),
median_attr(counters, 'walltime'),
# walltime / len(counters),
mem_usage,
median_attr(counters, 'max_rss'),
median_attr(counters, 'malloced'),
'[%d]' % len(counters),
block_count,
block_length))
else:
print('%6.0f %6.0f %8.0f %8.4f %8.4f %6s read %d x %.0fs, one channels'
% (median_attr(counters, 'n_seek_calls'),
median_attr(counters, 'n_read_calls'),
median_attr(counters, 'n_bytes_read') / 1024,
median_attr(counters, 'cpu_seconds'),
median_attr(counters, 'walltime'),
'[%d]' % len(counters),
block_count,
block_length))

if format_list:
format_list, waveform_list, test_list, result_list = append_result(format_class, input_record,
f'{block_count}_one',
median_attr(counters, 'cpu_seconds'),
format_list,
waveform_list, test_list,
result_list)

print('_' * 64)

Expand Down
24 changes: 16 additions & 8 deletions waveform_benchmark/formats/dcm_utils/dcm_waveform_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,21 +175,29 @@ def get_multiplex_array(fileobj : BinaryIO,
chseq = cast(list["Dataset"], channel_seqs)

# Apply correction factor (if possible)
padd_loc = (arr == padding_value)
arr = arr.astype("float")
for ch_idx, ch in enumerate(chseq):
baseline = float(ch.ChannelBaseline)
sensitivity = float(ch.ChannelSensitivity)
correction = float(ch.ChannelSensitivityCorrectionFactor)

# print("baseline " + str(baseline) + " sensitivity " + str(sensitivity) + " correction " + str(correction))

adjustment = sensitivity * correction
if (adjustment != 1.0) and (baseline != 0.0):
arr[ch_idx, ...] = np.where(arr[ch_idx, ...] == padding_value, np.nan, arr[ch_idx, ...] * adjustment + baseline)
elif (adjustment != 1.0):
arr[ch_idx, ...] = np.where(arr[ch_idx, ...] == padding_value, np.nan, arr[ch_idx, ...] * adjustment)
elif (baseline != 0.0):
arr[ch_idx, ...] = np.where(arr[ch_idx, ...] == padding_value, np.nan, arr[ch_idx, ...] + baseline)
base = baseline
# print(" reading ", ch_idx, sensitivity, baseline, correction, adjustment, base)
if (adjustment != 1.0):
if (base != 0.0):
arr[ch_idx, ...] = np.where(padd_loc[ch_idx, ...], np.nan, arr[ch_idx, ...] * adjustment - base)
else:
arr[ch_idx, ...] = np.where(padd_loc[ch_idx, ...], np.nan, arr[ch_idx, ...] * adjustment)
else:
arr[ch_idx, ...] = np.where(arr[ch_idx, ...] == padding_value, np.nan, arr[ch_idx, ...])

if (base != 0.0):
arr[ch_idx, ...] = np.where(padd_loc[ch_idx, ...], np.nan, arr[ch_idx, ...] - base)
else:
arr[ch_idx, ...] = np.where(padd_loc[ch_idx, ...], np.nan, arr[ch_idx, ...])
# print(ch.ChannelSourceSequence[0].CodeMeaning + " sensitivity " + str(sensitivity) + " gain = " + str(1.0 / sensitivity) + " baseline = " + str(baseline) + " correction = " + str(correction))

return cast("np.ndarray", arr)

Loading

0 comments on commit c1c5c93

Please sign in to comment.