From 0905a61579e24a81d746bcf5d04d94c807d8cede Mon Sep 17 00:00:00 2001 From: Carl Johnsen <cjjohnsen@nbi.ku.dk> Date: Fri, 10 Jun 2022 15:49:12 +0200 Subject: [PATCH 001/136] Restructuring pybind kernels --- src/pybind_kernels/Makefile | 14 +++---- src/pybind_kernels/Readme.md | 1 + src/pybind_kernels/__init__.py | 0 src/pybind_kernels/cpu/__init__.py | 0 src/pybind_kernels/{ => cpu}/geometry.cc | 0 src/pybind_kernels/{ => cpu}/histograms.cc | 0 src/pybind_kernels/{ => cpu}/label.cc | 0 src/pybind_kernels/{ => include}/datatypes.hh | 0 src/pybind_kernels/{ => include}/parallel.hh | 0 src/pybind_kernels/opencv_pybind.cc | 34 ---------------- src/pybind_kernels/opencv_tester.cc | 26 ------------ .../{ => pybind}/geometry-pybind.cc | 0 src/pybind_kernels/uk_pybind11.py | 40 ------------------- 13 files changed, 8 insertions(+), 107 deletions(-) create mode 100644 src/pybind_kernels/Readme.md create mode 100644 src/pybind_kernels/__init__.py create mode 100644 src/pybind_kernels/cpu/__init__.py rename src/pybind_kernels/{ => cpu}/geometry.cc (100%) rename src/pybind_kernels/{ => cpu}/histograms.cc (100%) rename src/pybind_kernels/{ => cpu}/label.cc (100%) rename src/pybind_kernels/{ => include}/datatypes.hh (100%) rename src/pybind_kernels/{ => include}/parallel.hh (100%) delete mode 100644 src/pybind_kernels/opencv_pybind.cc delete mode 100644 src/pybind_kernels/opencv_tester.cc rename src/pybind_kernels/{ => pybind}/geometry-pybind.cc (100%) delete mode 100644 src/pybind_kernels/uk_pybind11.py diff --git a/src/pybind_kernels/Makefile b/src/pybind_kernels/Makefile index 8b69a8b..49f49b8 100644 --- a/src/pybind_kernels/Makefile +++ b/src/pybind_kernels/Makefile @@ -1,16 +1,16 @@ -PYBIND_FLAGS += $(shell python3 -m pybind11 --include) -O3 -march=native -Wall -shared -fPIC -fopenmp -g -std=c++17 +PYBIND_FLAGS += $(shell python3 -m pybind11 --include) -march=native -Wall -shared -fPIC -fopenmp -g -std=c++17 PYBIND_SUFFIX = $(shell python3-config --extension-suffix) OPENCV_INCLUDE=$(shell pkg-config opencv4 --cflags) OPENCV_LIB=$(shell pkg-config opencv4 --libs) # Detect if OpenACC can be used -ifneq (, $(shell which nvc++)) -CXX = nvc++ -CXXFLAGS += -acc=gpu -Minfo=accel -tp=native -else -$(info OpenACC compiler nvc++ not found. Compiling without) -endif +#ifneq (, $(shell which nvc++)) +#CXX = nvc++ +#CXXFLAGS += -acc=gpu -Minfo=accel -tp=native +#else +#$(info OpenACC compiler nvc++ not found. Compiling without) +#endif CXXFLAGS += -I../contrib/cpptqdm/ diff --git a/src/pybind_kernels/Readme.md b/src/pybind_kernels/Readme.md new file mode 100644 index 0000000..27d0412 --- /dev/null +++ b/src/pybind_kernels/Readme.md @@ -0,0 +1 @@ +# TODO :) \ No newline at end of file diff --git a/src/pybind_kernels/__init__.py b/src/pybind_kernels/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/pybind_kernels/cpu/__init__.py b/src/pybind_kernels/cpu/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/pybind_kernels/geometry.cc b/src/pybind_kernels/cpu/geometry.cc similarity index 100% rename from src/pybind_kernels/geometry.cc rename to src/pybind_kernels/cpu/geometry.cc diff --git a/src/pybind_kernels/histograms.cc b/src/pybind_kernels/cpu/histograms.cc similarity index 100% rename from src/pybind_kernels/histograms.cc rename to src/pybind_kernels/cpu/histograms.cc diff --git a/src/pybind_kernels/label.cc b/src/pybind_kernels/cpu/label.cc similarity index 100% rename from src/pybind_kernels/label.cc rename to src/pybind_kernels/cpu/label.cc diff --git a/src/pybind_kernels/datatypes.hh b/src/pybind_kernels/include/datatypes.hh similarity index 100% rename from src/pybind_kernels/datatypes.hh rename to src/pybind_kernels/include/datatypes.hh diff --git a/src/pybind_kernels/parallel.hh b/src/pybind_kernels/include/parallel.hh similarity index 100% rename from src/pybind_kernels/parallel.hh rename to src/pybind_kernels/include/parallel.hh diff --git a/src/pybind_kernels/opencv_pybind.cc b/src/pybind_kernels/opencv_pybind.cc deleted file mode 100644 index 1b0a2d5..0000000 --- a/src/pybind_kernels/opencv_pybind.cc +++ /dev/null @@ -1,34 +0,0 @@ -#include <pybind11/pybind11.h> -#include <pybind11/numpy.h> - -#include <opencv2/core.hpp> -#include <opencv2/imgcodecs.hpp> -#include <opencv2/highgui.hpp> -#include <iostream> -#include <stdio.h> -#include <inttypes.h> - -using namespace std; -using namespace cv; -namespace py = pybind11; - -void opencv_tester(const py::array_t<uint8_t>& np_image) -{ - py::buffer_info image_info = np_image.request(); - uint64_t - Ny = image_info.shape[0], - Nx = image_info.shape[1]; - - Mat img(Ny, Nx, CV_8UC1, image_info.ptr); - - imshow("opencv_tester window",img); - int k = waitKey(0); -} - - -PYBIND11_MODULE(opencv_pybind, m) { - m.doc() = "Test of C++ OpenCV through pybind"; // optional module docstring - - m.def("tester", &opencv_tester); -} - diff --git a/src/pybind_kernels/opencv_tester.cc b/src/pybind_kernels/opencv_tester.cc deleted file mode 100644 index 0a39a42..0000000 --- a/src/pybind_kernels/opencv_tester.cc +++ /dev/null @@ -1,26 +0,0 @@ -#include <opencv2/core.hpp> -#include <opencv2/imgcodecs.hpp> -#include <opencv2/highgui.hpp> -#include <iostream> -#include <stdio.h> -#include <inttypes.h> - -using namespace std; - -int main(int ac, char **av) -{ - fprintf(stderr,"Just starting up, doing nothing.\n"); - - if(ac<2) return -1; - - cv::Mat img = cv::imread(av[1], cv::IMREAD_COLOR); - - if(img.empty()) return -2; - - cv::imshow("Window",img); - - int k = cv::waitKey(0); - - return 0; - -} diff --git a/src/pybind_kernels/geometry-pybind.cc b/src/pybind_kernels/pybind/geometry-pybind.cc similarity index 100% rename from src/pybind_kernels/geometry-pybind.cc rename to src/pybind_kernels/pybind/geometry-pybind.cc diff --git a/src/pybind_kernels/uk_pybind11.py b/src/pybind_kernels/uk_pybind11.py deleted file mode 100644 index 50b5abd..0000000 --- a/src/pybind_kernels/uk_pybind11.py +++ /dev/null @@ -1,40 +0,0 @@ -import histograms, numpy as np -from time import time; -import sys - -# TODO: Currently specialized to uint16_t -def masked_minmax(voxels): - return histograms.masked_minmax(voxels) - -def axes_histogram(voxels, ranges=None, voxel_bins=256): - (Nz,Ny,Nx) = voxels.shape - Nr = int(np.sqrt((Nx//2)**2 + (Ny//2)**2))+1 - - x_bins = np.zeros((Nx,voxel_bins),dtype=np.uint64) - y_bins = np.zeros((Ny,voxel_bins),dtype=np.uint64) - z_bins = np.zeros((Nz,voxel_bins),dtype=np.uint64) - r_bins = np.zeros((Nr,voxel_bins),dtype=np.uint64) - - if ranges is None: - vmin, vmax = 1, 4095 - else: - vmin, vmax = ranges - - histograms.axis_histogram(voxels, x_bins, y_bins, z_bins, r_bins, vmin, vmax); - return x_bins, y_bins, z_bins, r_bins - - -def field_histogram(voxels, field, ranges=None,field_bins=256, voxel_bins=256): - assert(voxels.dtype == np.uint16) - - bins = np.zeros((field_bins,voxel_bins),dtype=np.uint64) - - if ranges is None: - vmin, vmax = masked_minmax(voxels) - else: - (vmin,vmax) = ranges - - print("Calculating field histogram",flush=True); - histograms.field_histogram(voxels,field,bins,vmin,vmax) - - return bins From 1d8f0bafc985d5f5b5458e2067b55d2264be2e54 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <cjjohnsen@nbi.ku.dk> Date: Sun, 12 Jun 2022 09:28:56 +0200 Subject: [PATCH 002/136] Moved I/O functions to its own library in the new format --- src/pybind_kernels/Makefile | 33 ++++++++++----- src/pybind_kernels/cpu/histograms.cc | 53 +------------------------ src/pybind_kernels/cpu/io.cc | 22 ++++++++++ src/pybind_kernels/cpu_seq/io.cc | 40 +++++++++++++++++++ src/pybind_kernels/gpu/io.cc | 22 ++++++++++ src/pybind_kernels/include/datatypes.hh | 4 ++ src/pybind_kernels/include/io.hh | 12 ++++++ src/pybind_kernels/pybind/io-pybind.cc | 48 ++++++++++++++++++++++ 8 files changed, 173 insertions(+), 61 deletions(-) create mode 100644 src/pybind_kernels/cpu/io.cc create mode 100644 src/pybind_kernels/cpu_seq/io.cc create mode 100644 src/pybind_kernels/gpu/io.cc create mode 100644 src/pybind_kernels/include/io.hh create mode 100644 src/pybind_kernels/pybind/io-pybind.cc diff --git a/src/pybind_kernels/Makefile b/src/pybind_kernels/Makefile index 49f49b8..0de8d95 100644 --- a/src/pybind_kernels/Makefile +++ b/src/pybind_kernels/Makefile @@ -5,16 +5,20 @@ OPENCV_INCLUDE=$(shell pkg-config opencv4 --cflags) OPENCV_LIB=$(shell pkg-config opencv4 --libs) # Detect if OpenACC can be used -#ifneq (, $(shell which nvc++)) -#CXX = nvc++ -#CXXFLAGS += -acc=gpu -Minfo=accel -tp=native -#else -#$(info OpenACC compiler nvc++ not found. Compiling without) -#endif +ifneq (, $(shell which nvc++)) +CXX = nvc++ +CXXFLAGS += -acc=gpu -Minfo=accel -tp=native +else +$(info OpenACC compiler nvc++ not found. Compiling without) +endif -CXXFLAGS += -I../contrib/cpptqdm/ +CXXFLAGS += -I../contrib/cpptqdm/ -Iinclude -all: histograms$(PYBIND_SUFFIX) geometry$(PYBIND_SUFFIX) label$(PYBIND_SUFFIX) +PLATFORMS=cpu_seq cpu gpu +LIBS=io +TARGETS = $(foreach PLATFORM, $(PLATFORMS), $(foreach LIB, $(LIBS), $(PLATFORM)/$(LIB)$(PYBIND_SUFFIX))) + +all: $(TARGETS) histograms$(PYBIND_SUFFIX): histograms.cc $(CXX) $(CXXFLAGS) $(PYBIND_FLAGS) $< -o histograms$(PYBIND_SUFFIX) @@ -31,8 +35,19 @@ opencv_pybind$(PYBIND_SUFFIX): opencv_pybind.cc opencv_tester: opencv_tester.cc $(CXX) $(CXXFLAGS) $(OPENCV_INCLUDE) $(OPENCV_LIB) $< -o opencv_tester +define GEN_RULE +$(PLATFORM)/$(LIB)$(PYBIND_SUFFIX): pybind/$(LIB)-pybind.cc $(PLATFORM)/$(LIB).cc + $(CXX) $(CXXFLAGS) $(PYBIND_FLAGS) -I$(PLATFORM) $$< -o $(PLATFORM)/$(LIB)$(PYBIND_SUFFIX) +endef + +$(foreach PLATFORM, $(PLATFORMS), \ + $(foreach LIB, $(LIBS), \ + $(eval $(GEN_RULE)) \ + ) \ +) + clean: - rm -f histograms.o histograms$(PYBIND_SUFFIX) opencv_pybind$(PYBIND_SUFFIX) opencv_tester + rm -f $(TARGETS) diff --git a/src/pybind_kernels/cpu/histograms.cc b/src/pybind_kernels/cpu/histograms.cc index cf1b115..73862ba 100644 --- a/src/pybind_kernels/cpu/histograms.cc +++ b/src/pybind_kernels/cpu/histograms.cc @@ -5,17 +5,11 @@ #include <stdio.h> #include <omp.h> #include <chrono> -#include <iostream> -#include <fstream> #include <tqdm.h> using namespace std; namespace py = pybind11; -typedef uint16_t voxel_type; -//typedef float field_type; -typedef uint16_t field_type; -typedef uint8_t mask_type; -typedef float gauss_type; +#include "datatypes.hh" #define INLINE __attribute__((always_inline)) inline @@ -302,48 +296,6 @@ pair<float,float> float_minmax(const py::array_t<float> np_field) { return make_pair(voxel_min,voxel_max); } -void load_slice(py::array_t<voxel_type> &np_data, string filename, - const tuple<uint64_t, uint64_t, uint64_t> offset, - const tuple<uint64_t, uint64_t, uint64_t> shape) { - auto data_info = np_data.request(); - voxel_type *data = static_cast<voxel_type*>(data_info.ptr); - ifstream file; - file.open(filename.c_str(), ios::binary); - if(!file.is_open()){ - fprintf(stderr,"load_slice: Error opening %s for reading.\n",filename.c_str()); - exit(-1); - } - auto [Nz, Ny, Nx] = shape; - auto [oz, oy, ox] = offset; - uint64_t flat_offset = (oz*Ny*Nx + oy*Nx + ox) * sizeof(voxel_type); - file.seekg(flat_offset, ios::beg); - file.read((char*) data, data_info.size * sizeof(voxel_type)); - file.close(); -} - -void write_slice(py::array_t<voxel_type> &np_data, uint64_t offset, string filename) { - auto data_info = np_data.request(); - const voxel_type *data = static_cast<const voxel_type*>(data_info.ptr); - ofstream file; - file.open(filename.c_str(), ios::binary | ios::in); - if (!file.is_open()) { - file.clear(); - file.open(filename.c_str(), ios::binary); - } - file.seekp(offset * sizeof(voxel_type), ios::beg); - file.write((char*) data, data_info.size * sizeof(voxel_type)); - file.close(); -} - -void append_slice(py::array_t<voxel_type> &np_data, string filename) { - auto data_info = np_data.request(); - const voxel_type *data = static_cast<const voxel_type*>(data_info.ptr); - ofstream file; - file.open(filename.c_str(), ios::binary | ios::app); - file.write((char*) data, data_info.size * sizeof(voxel_type)); - file.close(); -} - // On entry, np_*_bins are assumed to be pre allocated and zeroed. void axis_histogram_par_cpu(const py::array_t<voxel_type> np_voxels, const tuple<uint64_t,uint64_t,uint64_t> offset, @@ -1133,9 +1085,6 @@ void otsu( PYBIND11_MODULE(histograms, m) { m.doc() = "2D histogramming plugin"; // optional module docstring - m.def("load_slice", &load_slice); - m.def("append_slice", &append_slice); - m.def("write_slice", &write_slice); m.def("axis_histogram_seq_cpu", &axis_histogram_seq_cpu); m.def("axis_histogram_par_cpu", &axis_histogram_par_cpu); m.def("axis_histogram_par_gpu", &axis_histogram_par_gpu); diff --git a/src/pybind_kernels/cpu/io.cc b/src/pybind_kernels/cpu/io.cc new file mode 100644 index 0000000..41b56ec --- /dev/null +++ b/src/pybind_kernels/cpu/io.cc @@ -0,0 +1,22 @@ +#include <iostream> +#include <fstream> + +#include "io.hh" + +using namespace std; + +template <typename T> +void load_contiguous_slice(T *data, + const string filename, + const uint64_t offset, + const uint64_t size) { + throw runtime_error(string("Library doesn't have a parallel cpu implementation of ") + __FUNCTION__); +} + +template <typename T> +void write_contiguous_slice(const T *data, + const string filename, + const uint64_t offset, + const uint64_t size) { + throw runtime_error(string("Library doesn't have a parallel cpu implementation of ") + __FUNCTION__); +} diff --git a/src/pybind_kernels/cpu_seq/io.cc b/src/pybind_kernels/cpu_seq/io.cc new file mode 100644 index 0000000..01cf2f8 --- /dev/null +++ b/src/pybind_kernels/cpu_seq/io.cc @@ -0,0 +1,40 @@ +#include <iostream> +#include <fstream> + +#include "io.hh" + +using namespace std; + +template <typename T> +void load_contiguous_slice(T *data, + const string filename, + const uint64_t offset, + const uint64_t size) { + ifstream file; + file.open(filename.c_str(), ios::binary); + if (!file.is_open()) { + fprintf(stderr, "load_slice: Error opening %s for reading.\n", filename.c_str()); + exit(-1); + } + file.seekg(offset * sizeof(T), ios::beg); + file.read((char*) data, size * sizeof(T)); + file.close(); +} + +template <typename T> +void write_contiguous_slice(const T *data, + const string filename, + const uint64_t offset, + const uint64_t size) { + ofstream file; + file.open(filename.c_str(), ios::binary | ios::in); + if (!file.is_open()) { + file.clear(); + file.open(filename.c_str(), ios::binary); + } + file.seekp(offset * sizeof(T), ios::beg); + file.write((char*) data, size * sizeof(T)); + file.close(); +} + +// TODO non-contiguous diff --git a/src/pybind_kernels/gpu/io.cc b/src/pybind_kernels/gpu/io.cc new file mode 100644 index 0000000..4eb196a --- /dev/null +++ b/src/pybind_kernels/gpu/io.cc @@ -0,0 +1,22 @@ +#include <iostream> +#include <fstream> + +#include "io.hh" + +using namespace std; + +template <typename T> +void load_contiguous_slice(T *data, + const string filename, + const uint64_t offset, + const uint64_t size) { + throw runtime_error(string("Library doesn't have a gpu implementation of ") + __FUNCTION__); +} + +template <typename T> +void write_contiguous_slice(const T *data, + const string filename, + const uint64_t offset, + const uint64_t size) { + throw runtime_error(string("Library doesn't have a gpu implementation of ") + __FUNCTION__); +} diff --git a/src/pybind_kernels/include/datatypes.hh b/src/pybind_kernels/include/datatypes.hh index f2e121a..88a068d 100644 --- a/src/pybind_kernels/include/datatypes.hh +++ b/src/pybind_kernels/include/datatypes.hh @@ -2,6 +2,10 @@ #include <vector> typedef uint8_t mask_type; // TODO: Template + explicit instantiation +typedef uint16_t voxel_type; +//typedef float field_type; +typedef uint16_t field_type; +typedef float gauss_type; typedef double real_t; constexpr ssize_t acc_block_size = 1024 * 1024 * 1024/sizeof(mask_type); // 1 GB diff --git a/src/pybind_kernels/include/io.hh b/src/pybind_kernels/include/io.hh new file mode 100644 index 0000000..fae2cbf --- /dev/null +++ b/src/pybind_kernels/include/io.hh @@ -0,0 +1,12 @@ +#ifndef io_h +#define io_h + +#include <pybind11/pybind11.h> +#include <pybind11/numpy.h> + +template <typename T> +void load_contiguous_slice(T *data, const string filename, const uint64_t offset, const uint64_t size); +template <typename T> +void write_contiguous_slice(T *np_data, const string filename, const uint64_t offset, const uint64_t size); + +#endif \ No newline at end of file diff --git a/src/pybind_kernels/pybind/io-pybind.cc b/src/pybind_kernels/pybind/io-pybind.cc new file mode 100644 index 0000000..d7c370a --- /dev/null +++ b/src/pybind_kernels/pybind/io-pybind.cc @@ -0,0 +1,48 @@ +#include <pybind11/pybind11.h> +#include <pybind11/numpy.h> + +using namespace std; +namespace py = pybind11; + +#include "datatypes.hh" +#include "io.cc" + +template <typename T> +void load_slice(py::array_t<T> &np_data, const string filename, + const tuple<uint64_t, uint64_t, uint64_t> offset, + const tuple<uint64_t, uint64_t, uint64_t> shape) { + auto data_info = np_data.request(); + T *data = static_cast<T*>(data_info.ptr); + auto [Nz, Ny, Nx] = shape; + auto [oz, oy, ox] = offset; + uint64_t flat_offset = oz*Ny*Nx + oy*Nx + ox; + load_contiguous_slice<T>(data, filename, flat_offset, data_info.size); +} + +template <typename T> +void write_slice(const py::array_t<T> &np_data, + const string filename, + const tuple<uint64_t, uint64_t, uint64_t> offset, + const tuple<uint64_t, uint64_t, uint64_t> shape) { + auto data_info = np_data.request(); + const T *data = static_cast<const T*>(data_info.ptr); + auto [Nz, Ny, Nx] = shape; + auto [oz, oy, ox] = offset; + uint64_t flat_offset = oz*Ny*Nx + oy*Nx + ox; + write_contiguous_slice<T>(data, filename, flat_offset, data_info.size); +} + +PYBIND11_MODULE(histograms, m) { + m.doc() = "I/O functions for handling flat binary format files."; // optional module docstring + m.def("load_slice", &load_slice<mask_type>); + m.def("load_slice", &load_slice<voxel_type>); + m.def("load_slice", &load_slice<field_type>); + m.def("load_slice", &load_slice<gauss_type>); + m.def("load_slice", &load_slice<real_t>); + + m.def("write_slice", &write_slice<mask_type>); + m.def("write_slice", &write_slice<voxel_type>); + m.def("write_slice", &write_slice<field_type>); + m.def("write_slice", &write_slice<gauss_type>); + m.def("write_slice", &write_slice<real_t>); +} \ No newline at end of file From 616672820a642a86db49697e3c2de64ae2b4539e Mon Sep 17 00:00:00 2001 From: Carl Johnsen <cjjohnsen@nbi.ku.dk> Date: Sun, 12 Jun 2022 10:03:49 +0200 Subject: [PATCH 003/136] Moved morphology to its own files --- src/pybind_kernels/Makefile | 17 ++- src/pybind_kernels/cpu/histograms.cc | 116 ------------------ src/pybind_kernels/cpu/morphology.cc | 46 +++++++ src/pybind_kernels/cpu_seq/morphology.cc | 45 +++++++ src/pybind_kernels/gpu/morphology.cc | 52 ++++++++ src/pybind_kernels/include/datatypes.hh | 9 +- src/pybind_kernels/include/io.hh | 3 - src/pybind_kernels/include/morphology.hh | 14 +++ src/pybind_kernels/pybind/io-pybind.cc | 2 +- .../pybind/morphology-pybind.cc | 33 +++++ 10 files changed, 203 insertions(+), 134 deletions(-) create mode 100644 src/pybind_kernels/cpu/morphology.cc create mode 100644 src/pybind_kernels/cpu_seq/morphology.cc create mode 100644 src/pybind_kernels/gpu/morphology.cc create mode 100644 src/pybind_kernels/include/morphology.hh create mode 100644 src/pybind_kernels/pybind/morphology-pybind.cc diff --git a/src/pybind_kernels/Makefile b/src/pybind_kernels/Makefile index 0de8d95..59c9911 100644 --- a/src/pybind_kernels/Makefile +++ b/src/pybind_kernels/Makefile @@ -5,17 +5,17 @@ OPENCV_INCLUDE=$(shell pkg-config opencv4 --cflags) OPENCV_LIB=$(shell pkg-config opencv4 --libs) # Detect if OpenACC can be used -ifneq (, $(shell which nvc++)) -CXX = nvc++ -CXXFLAGS += -acc=gpu -Minfo=accel -tp=native -else -$(info OpenACC compiler nvc++ not found. Compiling without) -endif +#ifneq (, $(shell which nvc++)) +#CXX = nvc++ +#CXXFLAGS += -acc=gpu -Minfo=accel -tp=native +#else +#$(info OpenACC compiler nvc++ not found. Compiling without) +#endif CXXFLAGS += -I../contrib/cpptqdm/ -Iinclude PLATFORMS=cpu_seq cpu gpu -LIBS=io +LIBS=io morphology TARGETS = $(foreach PLATFORM, $(PLATFORMS), $(foreach LIB, $(LIBS), $(PLATFORM)/$(LIB)$(PYBIND_SUFFIX))) all: $(TARGETS) @@ -48,6 +48,3 @@ $(foreach PLATFORM, $(PLATFORMS), \ clean: rm -f $(TARGETS) - - - diff --git a/src/pybind_kernels/cpu/histograms.cc b/src/pybind_kernels/cpu/histograms.cc index 73862ba..b876840 100644 --- a/src/pybind_kernels/cpu/histograms.cc +++ b/src/pybind_kernels/cpu/histograms.cc @@ -55,118 +55,6 @@ template <typename T> void convolve1d(const py::array_t<T> np_kernel, } -template <typename Op, bool neutral> void morphology_3d_sphere_cpu( - const py::array_t<mask_type> &np_voxels, - const int64_t radius, - const py::array_t<mask_type> np_result -) { - auto - voxels_info = np_voxels.request(), - result_info = np_result.request(); - - int32_t Nz = voxels_info.shape[0], Ny = voxels_info.shape[1], Nx = voxels_info.shape[2]; - int64_t N[3] = {Nz, Ny, Nx}; - int64_t strides[3] = {Ny*Nx, Nx, 1}; - - const mask_type *voxels = static_cast<const mask_type*>(voxels_info.ptr); - mask_type *result = static_cast<mask_type*>(result_info.ptr); - - Op op; - - int64_t sqradius = radius * radius; - - #pragma omp parallel for collapse(3) - for (int64_t z = 0; z < N[0]; z++) { - for (int64_t y = 0; y < N[1]; y++) { - for (int64_t x = 0; x < N[2]; x++) { - // Compute boundaries - int64_t flat_index = z*strides[0] + y*strides[1] + x*strides[2]; - int64_t X[3] = {z, y, x}; - int64_t limits[6]; - for (int axis = 0; axis < 3; axis++) { - limits[(axis*2)] = -min(radius, X[axis]); - limits[(axis*2)+1] = min(radius, N[axis] - X[axis] - 1); - } - - // Apply the spherical kernel - bool value = neutral; - //#pragma omp simd collapse(3) reduction(op:value) - for (int64_t pz = limits[0]; pz <= limits[1]; pz++) { - for (int64_t py = limits[2]; py <= limits[3]; py++) { - for (int64_t px = limits[4]; px <= limits[5]; px++) { - // TODO exact match with ndimage - bool within = px*px + py*py + pz*pz <= sqradius; // sphere kernel - int64_t offset = pz*strides[0] + py*strides[1] + px*strides[2]; - value = within? op(value, voxels[flat_index+offset]) : value; - } - } - } - - // Store the results - result[flat_index] = value; - } - } - } -} - -template <typename Op, bool neutral> void morphology_3d_sphere_gpu( - const py::array_t<mask_type> &np_voxels, - const int64_t radius, - const py::array_t<mask_type> np_result) { -#ifdef _OPENACC - auto - voxels_info = np_voxels.request(), - result_info = np_result.request(); - - int32_t Nz = voxels_info.shape[0], Ny = voxels_info.shape[1], Nx = voxels_info.shape[2]; - int64_t N[3] = {Nz, Ny, Nx}; - int64_t strides[3] = {Ny*Nx, Nx, 1}; - - const mask_type *voxels = static_cast<const mask_type*>(voxels_info.ptr); - mask_type *result = static_cast<mask_type*>(result_info.ptr); - - Op op; - int64_t sqradius = radius * radius; - - #pragma acc data copyin(voxels[:Nz*Ny*Nx], N[:3], strides[:3], sqradius) copyout(result[:Nz*Ny*Nx]) - { - #pragma acc parallel loop collapse(3) - for (int64_t z = 0; z < N[0]; z++) { - for (int64_t y = 0; y < N[1]; y++) { - for (int64_t x = 0; x < N[2]; x++) { - // Compute boundaries - int64_t flat_index = z*strides[0] + y*strides[1] + x*strides[2]; - int64_t X[3] = {z, y, x}; - int64_t limits[6]; - for (int axis = 0; axis < 3; axis++) { - limits[(axis*2)] = -min(radius, X[axis]); - limits[(axis*2)+1] = min(radius, N[axis] - X[axis] - 1); - } - - // Apply the spherical kernel - bool value = neutral; - //#pragma omp simd collapse(3) reduction(op:value) - for (int64_t pz = limits[0]; pz <= limits[1]; pz++) { - for (int64_t py = limits[2]; py <= limits[3]; py++) { - for (int64_t px = limits[4]; px <= limits[5]; px++) { - bool within = px*px + py*py + pz*pz <= sqradius; // sphere kernel - int64_t offset = pz*strides[0] + py*strides[1] + px*strides[2]; - value = within? op(value, voxels[flat_index+offset]) : value; - } - } - } - - // Store the results - result[flat_index] = value; - } - } - } - } -#else - throw runtime_error("Library wasn't compiled with OpenACC."); -#endif -} - void gauss_filter_par_cpu(const py::array_t<mask_type> np_mask, const tuple<uint64_t, uint64_t, uint64_t> shape, const py::array_t<gauss_type> np_kernel, @@ -1094,9 +982,5 @@ PYBIND11_MODULE(histograms, m) { m.def("masked_minmax", &masked_minmax); m.def("float_minmax", &float_minmax); m.def("gauss_filter_par_cpu", &gauss_filter_par_cpu); - m.def("dilate_3d_sphere_cpu", &morphology_3d_sphere_cpu<std::bit_or<mask_type>, false>); - m.def("erode_3d_sphere_cpu", &morphology_3d_sphere_cpu<std::bit_and<mask_type>, true>); - m.def("dilate_3d_sphere_gpu", &morphology_3d_sphere_gpu<std::bit_or<mask_type>, false>); - m.def("erode_3d_sphere_gpu", &morphology_3d_sphere_gpu<std::bit_and<mask_type>, true>); m.def("otsu", &otsu); } diff --git a/src/pybind_kernels/cpu/morphology.cc b/src/pybind_kernels/cpu/morphology.cc new file mode 100644 index 0000000..d706fff --- /dev/null +++ b/src/pybind_kernels/cpu/morphology.cc @@ -0,0 +1,46 @@ +#include "morphology.hh" +#include "datatypes.hh" + +template <typename Op, bool neutral> +void morphology_3d_sphere( + const mask_type *voxels, + const int64_t radius, + const int64_t N[3], + const int64_t strides[3], + mask_type *result) { + Op op; + int64_t sqradius = radius * radius; + + #pragma omp parallel for collapse(3) + for (int64_t z = 0; z < N[0]; z++) { + for (int64_t y = 0; y < N[1]; y++) { + for (int64_t x = 0; x < N[2]; x++) { + // Compute boundaries + int64_t flat_index = z*strides[0] + y*strides[1] + x*strides[2]; + int64_t X[3] = {z, y, x}; + int64_t limits[6]; + for (int axis = 0; axis < 3; axis++) { + limits[(axis*2)] = -min(radius, X[axis]); + limits[(axis*2)+1] = min(radius, N[axis] - X[axis] - 1); + } + + // Apply the spherical kernel + bool value = neutral; + //#pragma omp simd collapse(3) reduction(op:value) + for (int64_t pz = limits[0]; pz <= limits[1]; pz++) { + for (int64_t py = limits[2]; py <= limits[3]; py++) { + for (int64_t px = limits[4]; px <= limits[5]; px++) { + // TODO exact match with ndimage + bool within = px*px + py*py + pz*pz <= sqradius; // sphere kernel + int64_t offset = pz*strides[0] + py*strides[1] + px*strides[2]; + value = within? op(value, voxels[flat_index+offset]) : value; + } + } + } + + // Store the results + result[flat_index] = value; + } + } + } +} \ No newline at end of file diff --git a/src/pybind_kernels/cpu_seq/morphology.cc b/src/pybind_kernels/cpu_seq/morphology.cc new file mode 100644 index 0000000..60cea00 --- /dev/null +++ b/src/pybind_kernels/cpu_seq/morphology.cc @@ -0,0 +1,45 @@ +#include "morphology.hh" +#include "datatypes.hh" + +template <typename Op, bool neutral> +void morphology_3d_sphere( + const mask_type *voxels, + const int64_t radius, + const int64_t N[3], + const int64_t strides[3], + mask_type *result) { + Op op; + int64_t sqradius = radius * radius; + + for (int64_t z = 0; z < N[0]; z++) { + for (int64_t y = 0; y < N[1]; y++) { + for (int64_t x = 0; x < N[2]; x++) { + // Compute boundaries + int64_t flat_index = z*strides[0] + y*strides[1] + x*strides[2]; + int64_t X[3] = {z, y, x}; + int64_t limits[6]; + for (int axis = 0; axis < 3; axis++) { + limits[(axis*2)] = -min(radius, X[axis]); + limits[(axis*2)+1] = min(radius, N[axis] - X[axis] - 1); + } + + // Apply the spherical kernel + bool value = neutral; + //#pragma omp simd collapse(3) reduction(op:value) + for (int64_t pz = limits[0]; pz <= limits[1]; pz++) { + for (int64_t py = limits[2]; py <= limits[3]; py++) { + for (int64_t px = limits[4]; px <= limits[5]; px++) { + // TODO exact match with ndimage + bool within = px*px + py*py + pz*pz <= sqradius; // sphere kernel + int64_t offset = pz*strides[0] + py*strides[1] + px*strides[2]; + value = within? op(value, voxels[flat_index+offset]) : value; + } + } + } + + // Store the results + result[flat_index] = value; + } + } + } +} \ No newline at end of file diff --git a/src/pybind_kernels/gpu/morphology.cc b/src/pybind_kernels/gpu/morphology.cc new file mode 100644 index 0000000..6230924 --- /dev/null +++ b/src/pybind_kernels/gpu/morphology.cc @@ -0,0 +1,52 @@ +#include "morphology.hh" +#include "datatypes.hh" + +template <typename Op, bool neutral> +void morphology_3d_sphere( + const mask_type *voxels, + const int64_t radius, + const int64_t N[3], + const int64_t strides[3], + mask_type *result) { +#ifdef _OPENACC + Op op; + int64_t sqradius = radius * radius; + + #pragma acc data copyin(voxels[:Nz*Ny*Nx], N[:3], strides[:3], sqradius) copyout(result[:Nz*Ny*Nx]) + { + #pragma acc parallel loop collapse(3) + for (int64_t z = 0; z < N[0]; z++) { + for (int64_t y = 0; y < N[1]; y++) { + for (int64_t x = 0; x < N[2]; x++) { + // Compute boundaries + int64_t flat_index = z*strides[0] + y*strides[1] + x*strides[2]; + int64_t X[3] = {z, y, x}; + int64_t limits[6]; + for (int axis = 0; axis < 3; axis++) { + limits[(axis*2)] = -min(radius, X[axis]); + limits[(axis*2)+1] = min(radius, N[axis] - X[axis] - 1); + } + + // Apply the spherical kernel + bool value = neutral; + //#pragma omp simd collapse(3) reduction(op:value) + for (int64_t pz = limits[0]; pz <= limits[1]; pz++) { + for (int64_t py = limits[2]; py <= limits[3]; py++) { + for (int64_t px = limits[4]; px <= limits[5]; px++) { + bool within = px*px + py*py + pz*pz <= sqradius; // sphere kernel + int64_t offset = pz*strides[0] + py*strides[1] + px*strides[2]; + value = within? op(value, voxels[flat_index+offset]) : value; + } + } + } + + // Store the results + result[flat_index] = value; + } + } + } + } +#else + throw runtime_error("Library wasn't compiled with OpenACC."); +#endif +} \ No newline at end of file diff --git a/src/pybind_kernels/include/datatypes.hh b/src/pybind_kernels/include/datatypes.hh index 88a068d..91c7490 100644 --- a/src/pybind_kernels/include/datatypes.hh +++ b/src/pybind_kernels/include/datatypes.hh @@ -1,3 +1,5 @@ +#ifndef datatypes_h +#define datatypes_h #include <array> #include <vector> @@ -19,7 +21,7 @@ template <typename T> struct input_ndarray { const vector<ssize_t> shape; input_ndarray(const T *data, const vector<ssize_t> &shape): data(data), shape(shape) {} - input_ndarray(const void *data, const vector<ssize_t> &shape): data(static_cast<const T*>(data)), shape(shape) {} + input_ndarray(const void *data, const vector<ssize_t> &shape): data(static_cast<const T*>(data)), shape(shape) {} }; template <typename T> struct output_ndarray { @@ -27,8 +29,7 @@ template <typename T> struct output_ndarray { const vector<ssize_t> shape; output_ndarray(T *data, const vector<ssize_t> &shape): data(data), shape(shape) {} - output_ndarray(void *data, const vector<ssize_t> &shape): data(static_cast<T*>(data)), shape(shape) {} + output_ndarray(void *data, const vector<ssize_t> &shape): data(static_cast<T*>(data)), shape(shape) {} }; - - +#endif \ No newline at end of file diff --git a/src/pybind_kernels/include/io.hh b/src/pybind_kernels/include/io.hh index fae2cbf..a28da76 100644 --- a/src/pybind_kernels/include/io.hh +++ b/src/pybind_kernels/include/io.hh @@ -1,9 +1,6 @@ #ifndef io_h #define io_h -#include <pybind11/pybind11.h> -#include <pybind11/numpy.h> - template <typename T> void load_contiguous_slice(T *data, const string filename, const uint64_t offset, const uint64_t size); template <typename T> diff --git a/src/pybind_kernels/include/morphology.hh b/src/pybind_kernels/include/morphology.hh new file mode 100644 index 0000000..66a28e4 --- /dev/null +++ b/src/pybind_kernels/include/morphology.hh @@ -0,0 +1,14 @@ +#ifndef morphology_h +#define morphology_h + +#include "datatypes.hh" + +template <typename Op, bool neutral> +void morphology_3d_sphere( + const mask_type *voxels, + const int64_t radius, + const int64_t N[3], + const int64_t strides[3], + mask_type *result); + +#endif \ No newline at end of file diff --git a/src/pybind_kernels/pybind/io-pybind.cc b/src/pybind_kernels/pybind/io-pybind.cc index d7c370a..6d1c4e0 100644 --- a/src/pybind_kernels/pybind/io-pybind.cc +++ b/src/pybind_kernels/pybind/io-pybind.cc @@ -32,7 +32,7 @@ void write_slice(const py::array_t<T> &np_data, write_contiguous_slice<T>(data, filename, flat_offset, data_info.size); } -PYBIND11_MODULE(histograms, m) { +PYBIND11_MODULE(io, m) { m.doc() = "I/O functions for handling flat binary format files."; // optional module docstring m.def("load_slice", &load_slice<mask_type>); m.def("load_slice", &load_slice<voxel_type>); diff --git a/src/pybind_kernels/pybind/morphology-pybind.cc b/src/pybind_kernels/pybind/morphology-pybind.cc new file mode 100644 index 0000000..f9c7891 --- /dev/null +++ b/src/pybind_kernels/pybind/morphology-pybind.cc @@ -0,0 +1,33 @@ +#include <pybind11/pybind11.h> +#include <pybind11/numpy.h> + +using namespace std; +namespace py = pybind11; + +#include "morphology.cc" +#include "datatypes.hh" + +template <typename Op, bool neutral> +void morphology_3d_sphere_wrapper( + const py::array_t<mask_type> &np_voxels, + const int64_t radius, + py::array_t<mask_type> np_result) { + auto + voxels_info = np_voxels.request(), + result_info = np_result.request(); + + int32_t Nz = voxels_info.shape[0], Ny = voxels_info.shape[1], Nx = voxels_info.shape[2]; + int64_t N[3] = {Nz, Ny, Nx}; + int64_t strides[3] = {Ny*Nx, Nx, 1}; + + const mask_type *voxels = static_cast<const mask_type*>(voxels_info.ptr); + mask_type *result = static_cast<mask_type*>(result_info.ptr); + + morphology_3d_sphere<Op, neutral>(voxels, radius, N, strides, result); +} + +PYBIND11_MODULE(morphology, m) { + m.doc() = "Morphology operations."; // optional module docstring + m.def("dilate_3d_sphere", &morphology_3d_sphere_wrapper<std::bit_or<mask_type>, false>); + m.def("erode_3d_sphere", &morphology_3d_sphere_wrapper<std::bit_and<mask_type>, true>); +} \ No newline at end of file From 507725941e56e42d88a83df7331c2214c6b04425 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <cjjohnsen@nbi.ku.dk> Date: Mon, 13 Jun 2022 18:39:08 +0200 Subject: [PATCH 004/136] #16 Added unit test for io --- src/pybind_kernels/Makefile | 3 ++ src/pybind_kernels/cpu/morphology.cc | 2 +- src/pybind_kernels/cpu_seq/morphology.cc | 3 +- src/pybind_kernels/gpu/morphology.cc | 4 +- src/pybind_kernels/pybind/io-pybind.cc | 22 ++++---- src/pybind_kernels/test/test_io.py | 66 ++++++++++++++++++++++++ 6 files changed, 85 insertions(+), 15 deletions(-) create mode 100644 src/pybind_kernels/test/test_io.py diff --git a/src/pybind_kernels/Makefile b/src/pybind_kernels/Makefile index 59c9911..7612608 100644 --- a/src/pybind_kernels/Makefile +++ b/src/pybind_kernels/Makefile @@ -46,5 +46,8 @@ $(foreach PLATFORM, $(PLATFORMS), \ ) \ ) +test: all + python3 -m pytest test + clean: rm -f $(TARGETS) diff --git a/src/pybind_kernels/cpu/morphology.cc b/src/pybind_kernels/cpu/morphology.cc index d706fff..a180aa5 100644 --- a/src/pybind_kernels/cpu/morphology.cc +++ b/src/pybind_kernels/cpu/morphology.cc @@ -33,7 +33,7 @@ void morphology_3d_sphere( // TODO exact match with ndimage bool within = px*px + py*py + pz*pz <= sqradius; // sphere kernel int64_t offset = pz*strides[0] + py*strides[1] + px*strides[2]; - value = within? op(value, voxels[flat_index+offset]) : value; + value = within ? op(value, voxels[flat_index+offset]) : value; } } } diff --git a/src/pybind_kernels/cpu_seq/morphology.cc b/src/pybind_kernels/cpu_seq/morphology.cc index 60cea00..50c16bf 100644 --- a/src/pybind_kernels/cpu_seq/morphology.cc +++ b/src/pybind_kernels/cpu_seq/morphology.cc @@ -25,14 +25,13 @@ void morphology_3d_sphere( // Apply the spherical kernel bool value = neutral; - //#pragma omp simd collapse(3) reduction(op:value) for (int64_t pz = limits[0]; pz <= limits[1]; pz++) { for (int64_t py = limits[2]; py <= limits[3]; py++) { for (int64_t px = limits[4]; px <= limits[5]; px++) { // TODO exact match with ndimage bool within = px*px + py*py + pz*pz <= sqradius; // sphere kernel int64_t offset = pz*strides[0] + py*strides[1] + px*strides[2]; - value = within? op(value, voxels[flat_index+offset]) : value; + value = within ? op(value, voxels[flat_index+offset]) : value; } } } diff --git a/src/pybind_kernels/gpu/morphology.cc b/src/pybind_kernels/gpu/morphology.cc index 6230924..c42ec13 100644 --- a/src/pybind_kernels/gpu/morphology.cc +++ b/src/pybind_kernels/gpu/morphology.cc @@ -12,7 +12,7 @@ void morphology_3d_sphere( Op op; int64_t sqradius = radius * radius; - #pragma acc data copyin(voxels[:Nz*Ny*Nx], N[:3], strides[:3], sqradius) copyout(result[:Nz*Ny*Nx]) + #pragma acc data copyin(voxels[:N[0]*N[1]*N[2]], N[:3], strides[:3], sqradius) copyout(result[:N[0]*N[1]*N[2]]) { #pragma acc parallel loop collapse(3) for (int64_t z = 0; z < N[0]; z++) { @@ -35,7 +35,7 @@ void morphology_3d_sphere( for (int64_t px = limits[4]; px <= limits[5]; px++) { bool within = px*px + py*py + pz*pz <= sqradius; // sphere kernel int64_t offset = pz*strides[0] + py*strides[1] + px*strides[2]; - value = within? op(value, voxels[flat_index+offset]) : value; + value = within ? op(value, voxels[flat_index+offset]) : value; } } } diff --git a/src/pybind_kernels/pybind/io-pybind.cc b/src/pybind_kernels/pybind/io-pybind.cc index 6d1c4e0..496b990 100644 --- a/src/pybind_kernels/pybind/io-pybind.cc +++ b/src/pybind_kernels/pybind/io-pybind.cc @@ -34,15 +34,17 @@ void write_slice(const py::array_t<T> &np_data, PYBIND11_MODULE(io, m) { m.doc() = "I/O functions for handling flat binary format files."; // optional module docstring - m.def("load_slice", &load_slice<mask_type>); - m.def("load_slice", &load_slice<voxel_type>); - m.def("load_slice", &load_slice<field_type>); - m.def("load_slice", &load_slice<gauss_type>); - m.def("load_slice", &load_slice<real_t>); + m.def("load_slice", &load_slice<uint8_t>); + m.def("load_slice", &load_slice<uint16_t>); + m.def("load_slice", &load_slice<uint32_t>); + m.def("load_slice", &load_slice<uint64_t>); + m.def("load_slice", &load_slice<float>); + m.def("load_slice", &load_slice<double>); - m.def("write_slice", &write_slice<mask_type>); - m.def("write_slice", &write_slice<voxel_type>); - m.def("write_slice", &write_slice<field_type>); - m.def("write_slice", &write_slice<gauss_type>); - m.def("write_slice", &write_slice<real_t>); + m.def("write_slice", &write_slice<uint8_t>); + m.def("write_slice", &write_slice<uint16_t>); + m.def("write_slice", &write_slice<uint32_t>); + m.def("write_slice", &write_slice<uint64_t>); + m.def("write_slice", &write_slice<float>); + m.def("write_slice", &write_slice<double>); } \ No newline at end of file diff --git a/src/pybind_kernels/test/test_io.py b/src/pybind_kernels/test/test_io.py new file mode 100644 index 0000000..64df23f --- /dev/null +++ b/src/pybind_kernels/test/test_io.py @@ -0,0 +1,66 @@ +''' +Unittests for the I/O pybind kernels. +''' +import sys +sys.path.append(sys.path[0]+"/../") +import cpu_seq.io as io +import numpy as np +import tempfile +import os +import pytest + +# TODO np.bool doesn't work. It works when writing, but numpy doesn't recognize that the memory has been updated. It works fine if data_read is a np.uint8 array, even though an np.bool array has been written. +dtypes_to_test = [np.uint8, np.uint16, np.uint32, np.uint64, np.float32, np.float64] +tmp_folder = tempfile._get_default_tempdir() +tmp_filename = next(tempfile._get_candidate_names()) +tmp_file = f'{tmp_folder}/{tmp_filename}' +dim_size = 16 +dim_shape = (dim_size, dim_size, dim_size) +partial_factor = 4 + +def random(shape, dtype): + rnds = np.random.random(shape) * 100 + return rnds > .5 if dtype == np.bool else rnds.astype(dtype) + +@pytest.mark.parametrize("dtype", dtypes_to_test) +def test_dtype(dtype): + individual_tmp_file = f'{tmp_file}.{dtype.__name__}' + data = random(dim_shape, dtype) + data[0,0,1] = False + partial = dim_size // partial_factor + + # Write out a new file + io.write_slice(data, individual_tmp_file, (0,0,0), dim_shape) + assert os.path.getsize(individual_tmp_file) == data.nbytes + + # Read back and verify in chunks + read_data = np.zeros((partial, dim_size, dim_size), dtype=dtype) + for i in range(partial_factor): + io.load_slice(read_data, individual_tmp_file, (i*partial,0,0), read_data.shape) + assert np.allclose(data[i*partial:(i+1)*partial], read_data) + + # Append another layer + data = np.append(data, random((partial, dim_size, dim_size), dtype), axis=0) + io.write_slice(data[dim_size:], individual_tmp_file, (dim_size,0,0), data.shape) + assert os.path.getsize(individual_tmp_file) == data.nbytes + + # Read back and verify in chunks + for i in range(partial_factor+1): + io.load_slice(read_data, individual_tmp_file, (i*partial,0,0), read_data.shape) + assert np.allclose(data[i*partial:(i+1)*partial], read_data) + + # Overwrite one of the "middle" chunks + data[partial:2*partial] = random((partial, dim_size, dim_size), dtype) + io.write_slice(data[partial:partial*2], individual_tmp_file, (partial,0,0), data.shape) + + # Read back and verify in chunks + for i in range(partial_factor+1): + io.load_slice(read_data, individual_tmp_file, (i*partial,0,0), read_data.shape) + assert np.allclose(data[i*partial:(i+1)*partial], read_data) + + os.remove(individual_tmp_file) + +if __name__ == '__main__': + for dtype in dtypes_to_test: + print (f'Testing {dtype.__name__}') + test_dtype(dtype) \ No newline at end of file From c4c833aff41d27b5d47fd89aec11228d21d4125c Mon Sep 17 00:00:00 2001 From: Carl Johnsen <cjjohnsen@nbi.ku.dk> Date: Mon, 13 Jun 2022 20:05:23 +0200 Subject: [PATCH 005/136] #16 added unittest for morphology --- src/pybind_kernels/Makefile | 12 +++--- src/pybind_kernels/test/test_morphology.py | 50 ++++++++++++++++++++++ 2 files changed, 56 insertions(+), 6 deletions(-) create mode 100644 src/pybind_kernels/test/test_morphology.py diff --git a/src/pybind_kernels/Makefile b/src/pybind_kernels/Makefile index 7612608..dfd0d09 100644 --- a/src/pybind_kernels/Makefile +++ b/src/pybind_kernels/Makefile @@ -5,12 +5,12 @@ OPENCV_INCLUDE=$(shell pkg-config opencv4 --cflags) OPENCV_LIB=$(shell pkg-config opencv4 --libs) # Detect if OpenACC can be used -#ifneq (, $(shell which nvc++)) -#CXX = nvc++ -#CXXFLAGS += -acc=gpu -Minfo=accel -tp=native -#else -#$(info OpenACC compiler nvc++ not found. Compiling without) -#endif +ifneq (, $(shell which nvc++)) +CXX = nvc++ +CXXFLAGS += -acc=gpu -Minfo=accel -tp=native +else +$(info OpenACC compiler nvc++ not found. Compiling without) +endif CXXFLAGS += -I../contrib/cpptqdm/ -Iinclude diff --git a/src/pybind_kernels/test/test_morphology.py b/src/pybind_kernels/test/test_morphology.py new file mode 100644 index 0000000..cb562d5 --- /dev/null +++ b/src/pybind_kernels/test/test_morphology.py @@ -0,0 +1,50 @@ +''' +Unittests for the morphology pybind kernels. +''' +import sys +sys.path.append(sys.path[0]+"/../") +import cpu_seq.morphology as m_cpu_seq +import cpu.morphology as m_cpu +import gpu.morphology as m_gpu +import numpy as np +from scipy import ndimage as ndi +import pytest + +# Parameters +implant_dims = 32 +cross_width = 8 +# TODO if implant_dims doesn't divide by radius, it doesn't work. Except for 2, which also fails. +rs = [4, 8, 16] +impls = [m_cpu_seq, m_cpu, m_gpu] +funcs = [('dilate', ndi.binary_dilation), ('erode', ndi.binary_erosion)] + +def sphere(n): + xs = np.linspace(-1,1,n) + return (xs[:,np.newaxis,np.newaxis]**2 + xs[np.newaxis,:,np.newaxis]**2 + xs[np.newaxis,np.newaxis,:]**2) <= 1 + +@pytest.mark.parametrize('r', rs) +@pytest.mark.parametrize('m', impls) +@pytest.mark.parametrize('op,nd', funcs) +def test_morphology(r, m, op, nd): + implant_mask = np.zeros((implant_dims,implant_dims,implant_dims), dtype=np.uint8) + c = implant_dims // 2 + cross_start, cross_end = c - (cross_width // 2), c + (cross_width // 2) + + implant_mask[:,cross_start:cross_end,cross_start:cross_end] = True + implant_mask[cross_start:cross_end,:,cross_start:cross_end] = True + implant_mask[cross_start:cross_end,cross_start:cross_end,:] = True + + result = np.empty_like(implant_mask) + f = getattr(m, f'{op}_3d_sphere') + f(implant_mask, r, result) + + verification = nd(implant_mask, sphere((2*r)+1)) + + assert np.allclose(verification, result) + +if __name__ == '__main__': + for r in rs: + for m in impls: + for op, nd in funcs: + print (f'Testing the {m.__name__} implementation of {op}') + test_morphology(r, m, op, nd) \ No newline at end of file From c2a7b2f0d0ef737595c733680573c62a94d5a892 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Wed, 21 Dec 2022 11:29:33 +0100 Subject: [PATCH 006/136] Added mac as a target in pybind --- src/pybind_kernels/Makefile | 46 +++++++++++++++---------------------- 1 file changed, 18 insertions(+), 28 deletions(-) diff --git a/src/pybind_kernels/Makefile b/src/pybind_kernels/Makefile index dfd0d09..aeba001 100644 --- a/src/pybind_kernels/Makefile +++ b/src/pybind_kernels/Makefile @@ -1,40 +1,30 @@ -PYBIND_FLAGS += $(shell python3 -m pybind11 --include) -march=native -Wall -shared -fPIC -fopenmp -g -std=c++17 +# Define constants and collections +PYBIND_FLAGS += $(shell python3 -m pybind11 --include) -march=native -Wall -shared -fPIC -fopenmp -g -std=c++17 -O3 PYBIND_SUFFIX = $(shell python3-config --extension-suffix) - -OPENCV_INCLUDE=$(shell pkg-config opencv4 --cflags) -OPENCV_LIB=$(shell pkg-config opencv4 --libs) +#CXXFLAGS += -I../contrib/cpptqdm/ -Iinclude +CXXFLAGS += -Iinclude +PLATFORMS=cpu_seq cpu gpu +LIBS=io morphology +TARGETS = $(foreach PLATFORM, $(PLATFORMS), $(foreach LIB, $(LIBS), $(PLATFORM)/$(LIB)$(PYBIND_SUFFIX))) +CLEANUP = $(TARGETS) $(foreach PLATFORM, $(PLATFORMS), $(PLATFORM)/__pycache__) # Detect if OpenACC can be used ifneq (, $(shell which nvc++)) CXX = nvc++ CXXFLAGS += -acc=gpu -Minfo=accel -tp=native -else -$(info OpenACC compiler nvc++ not found. Compiling without) +else +$(info OpenACC compiler nvc++ not found. Compiling without.) endif -CXXFLAGS += -I../contrib/cpptqdm/ -Iinclude - -PLATFORMS=cpu_seq cpu gpu -LIBS=io morphology -TARGETS = $(foreach PLATFORM, $(PLATFORMS), $(foreach LIB, $(LIBS), $(PLATFORM)/$(LIB)$(PYBIND_SUFFIX))) +# Detect OS for OS specific changes +ifeq ($(shell uname -s), Darwin) # Mac OSX +CXX = g++-12 # Use homebrew gcc, as system gcc is an alias for clang +CXXFLAGS += -undefined dynamic_lookup # https://pybind11.readthedocs.io/en/stable/compiling.html#building-manually +CLEANUP += $(TARGETS) $(foreach TARGET, $(TARGETS), $(TARGET).dSYM) # These are also generated on Mac +endif all: $(TARGETS) -histograms$(PYBIND_SUFFIX): histograms.cc - $(CXX) $(CXXFLAGS) $(PYBIND_FLAGS) $< -o histograms$(PYBIND_SUFFIX) - -geometry$(PYBIND_SUFFIX): geometry-pybind.cc geometry.cc - $(CXX) $(CXXFLAGS) $(PYBIND_FLAGS) $< -o geometry$(PYBIND_SUFFIX) - -label$(PYBIND_SUFFIX): label.cc - $(CXX) $(CXXFLAGS) $(PYBIND_FLAGS) $< -o label$(PYBIND_SUFFIX) - -opencv_pybind$(PYBIND_SUFFIX): opencv_pybind.cc - $(CXX) $(CXXFLAGS) $(PYBIND_FLAGS) $(OPENCV_INCLUDE) $(OPENCV_LIB) $< -o opencv_pybind$(PYBIND_SUFFIX) - -opencv_tester: opencv_tester.cc - $(CXX) $(CXXFLAGS) $(OPENCV_INCLUDE) $(OPENCV_LIB) $< -o opencv_tester - define GEN_RULE $(PLATFORM)/$(LIB)$(PYBIND_SUFFIX): pybind/$(LIB)-pybind.cc $(PLATFORM)/$(LIB).cc $(CXX) $(CXXFLAGS) $(PYBIND_FLAGS) -I$(PLATFORM) $$< -o $(PLATFORM)/$(LIB)$(PYBIND_SUFFIX) @@ -47,7 +37,7 @@ $(foreach PLATFORM, $(PLATFORMS), \ ) test: all - python3 -m pytest test + python3 -m pytest -n auto test clean: - rm -f $(TARGETS) + rm -rf $(CLEANUP) test/__pycache__ .pytest_cache From f75f53681df05c8e9189466429427f96c003cd2a Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Wed, 21 Dec 2022 11:30:17 +0100 Subject: [PATCH 007/136] Added compiled files to gitignore --- .gitignore | 4 ++++ src/pybind_kernels/cpu/__init__.py | 0 2 files changed, 4 insertions(+) delete mode 100644 src/pybind_kernels/cpu/__init__.py diff --git a/.gitignore b/.gitignore index b647bd7..6e48464 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,7 @@ src/meow/meow_triggers/* src/meow/runner_data/* src/meow/runner_output/* src/meow/runner_processing/* + +# Compiled files +*.so +*.so.dSYM \ No newline at end of file diff --git a/src/pybind_kernels/cpu/__init__.py b/src/pybind_kernels/cpu/__init__.py deleted file mode 100644 index e69de29..0000000 From e10541416a0725618dd4ba6780c610cf259afad6 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Wed, 21 Dec 2022 11:31:33 +0100 Subject: [PATCH 008/136] Fixed verification error in erode near the borders --- src/pybind_kernels/test/test_morphology.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/pybind_kernels/test/test_morphology.py b/src/pybind_kernels/test/test_morphology.py index cb562d5..a38f5ed 100644 --- a/src/pybind_kernels/test/test_morphology.py +++ b/src/pybind_kernels/test/test_morphology.py @@ -9,6 +9,7 @@ import numpy as np from scipy import ndimage as ndi import pytest +from functools import partial # Parameters implant_dims = 32 @@ -16,7 +17,7 @@ # TODO if implant_dims doesn't divide by radius, it doesn't work. Except for 2, which also fails. rs = [4, 8, 16] impls = [m_cpu_seq, m_cpu, m_gpu] -funcs = [('dilate', ndi.binary_dilation), ('erode', ndi.binary_erosion)] +funcs = [('dilate', ndi.binary_dilation), ('erode', partial(ndi.binary_erosion, border_value=1))] def sphere(n): xs = np.linspace(-1,1,n) @@ -26,7 +27,7 @@ def sphere(n): @pytest.mark.parametrize('m', impls) @pytest.mark.parametrize('op,nd', funcs) def test_morphology(r, m, op, nd): - implant_mask = np.zeros((implant_dims,implant_dims,implant_dims), dtype=np.uint8) + implant_mask = np.random.randint(0, 2, (implant_dims, implant_dims, implant_dims), dtype=np.uint8) c = implant_dims // 2 cross_start, cross_end = c - (cross_width // 2), c + (cross_width // 2) From 306f21fb9f185555b2151cb3a810ed2f4ea23388 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Wed, 21 Dec 2022 11:39:42 +0100 Subject: [PATCH 009/136] Remove numpy bool deprecation warning --- src/pybind_kernels/test/test_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pybind_kernels/test/test_io.py b/src/pybind_kernels/test/test_io.py index 64df23f..6b351db 100644 --- a/src/pybind_kernels/test/test_io.py +++ b/src/pybind_kernels/test/test_io.py @@ -20,7 +20,7 @@ def random(shape, dtype): rnds = np.random.random(shape) * 100 - return rnds > .5 if dtype == np.bool else rnds.astype(dtype) + return rnds > .5 if dtype == bool else rnds.astype(dtype) @pytest.mark.parametrize("dtype", dtypes_to_test) def test_dtype(dtype): From ea80d3b26492ef3071db69978e262c6933245fa7 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Tue, 7 Feb 2023 15:46:24 +0100 Subject: [PATCH 010/136] Added manual specification of root paths --- src/config/threadripper00/paths.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/config/threadripper00/paths.py b/src/config/threadripper00/paths.py index 199d562..00a8685 100644 --- a/src/config/threadripper00/paths.py +++ b/src/config/threadripper00/paths.py @@ -1,5 +1,8 @@ -hdf5_root = "/data/MAXIBONE/Goats/tomograms" -hdf5_root_fast = "/mnt/shared/MAXIBONE/Goats/tomograms" +data_root = "/data" +fast_root = "/data_fast" + +hdf5_root = f"{data_root}/MAXIBONE/Goats/tomograms" +hdf5_root_fast = f"{fast_root}/MAXIBONE/Goats/tomograms" binary_root = f"{hdf5_root_fast}/binary" esrf_data_local= f"{hdf5_root}/ESRF/" From 55ef3de24d602dca3d2898f13831f08d613d230e Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Tue, 7 Feb 2023 15:46:38 +0100 Subject: [PATCH 011/136] Remove IO test file, if exists, before running test --- src/pybind_kernels/test/test_io.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/pybind_kernels/test/test_io.py b/src/pybind_kernels/test/test_io.py index 6b351db..3ece7e4 100644 --- a/src/pybind_kernels/test/test_io.py +++ b/src/pybind_kernels/test/test_io.py @@ -25,6 +25,8 @@ def random(shape, dtype): @pytest.mark.parametrize("dtype", dtypes_to_test) def test_dtype(dtype): individual_tmp_file = f'{tmp_file}.{dtype.__name__}' + if os.path.exists(individual_tmp_file): + os.remove(individual_tmp_file) data = random(dim_shape, dtype) data[0,0,1] = False partial = dim_size // partial_factor From 01a0d698549ecafdeb68454057a5462b8804d230 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Tue, 7 Feb 2023 15:47:02 +0100 Subject: [PATCH 012/136] Added option to specify python interpreter for pybind --- src/pybind_kernels/Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/pybind_kernels/Makefile b/src/pybind_kernels/Makefile index aeba001..a8f7f9a 100644 --- a/src/pybind_kernels/Makefile +++ b/src/pybind_kernels/Makefile @@ -1,6 +1,7 @@ +PYTHON = python3.11 # Define constants and collections -PYBIND_FLAGS += $(shell python3 -m pybind11 --include) -march=native -Wall -shared -fPIC -fopenmp -g -std=c++17 -O3 -PYBIND_SUFFIX = $(shell python3-config --extension-suffix) +PYBIND_FLAGS += $(shell $(PYTHON) -m pybind11 --include) -march=native -Wall -shared -fPIC -fopenmp -g -std=c++17 -O3 +PYBIND_SUFFIX = $(shell $(PYTHON)-config --extension-suffix) #CXXFLAGS += -I../contrib/cpptqdm/ -Iinclude CXXFLAGS += -Iinclude PLATFORMS=cpu_seq cpu gpu @@ -37,7 +38,7 @@ $(foreach PLATFORM, $(PLATFORMS), \ ) test: all - python3 -m pytest -n auto test + $(PYTHON) -m pytest -n auto test clean: rm -rf $(CLEANUP) test/__pycache__ .pytest_cache From a60cb2dc13e7efa00e615bb319551ac58f725b07 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Tue, 7 Feb 2023 15:47:12 +0100 Subject: [PATCH 013/136] Added notes on how to restructure --- src/struktur.md | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 src/struktur.md diff --git a/src/struktur.md b/src/struktur.md new file mode 100644 index 0000000..f8e10b1 --- /dev/null +++ b/src/struktur.md @@ -0,0 +1,42 @@ +src/ + __init__.py + config/ + constants.py + paths.py + threadripper00.json + lib/ + __init__.py + cpp/ + cpu/ + cpu_seq/ + gpu/ + best/ + include/ + py/ # TODO tænk over hvordan de vælger implementation -- gerne hvordan det trickler "nedad" + Istedet for at loade al data ind i ram og så køre blokvist over på GPU, så udnyt async yield til at lave en generator! + async memmap! + geometry/ + FoR_me.py + debug-explore/ + *.ipynb + processing_steps/ # kun cli ting der kører af sig selv (+rapport ting over hvad der skete) + 100-.py + 200- + pybind/ + *-pybind.cc + test/ + pybind-*.py + større-test(s).py + utils/ + io/ + histograms/ + alternative_processing_steps/ + doitall.sh + +sæt ci op som test lokalt > generer fil > github action tjekker om fil rapporten matcher git commit hash og melder korrekt test kørsel (eller noget i den dur!) + +under oprydning, hold til samme argument interface som de andre! (i.e. compute_ridges gør ikke ( ͡° ͜ʖ ͡°) ) + +gennemgå doitall og hiv de relevante ud i processing_steps. Dertil kør alt igennem! + +doitall skal også lave en rapport tex. (tænk applied ML small assignment rapporten) \ No newline at end of file From 91f75bc0e754d02203172c1d36595c1ce1b9737f Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Feb 2023 11:41:47 +0100 Subject: [PATCH 014/136] Ubuntu doesn't have python3.11 --- src/pybind_kernels/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pybind_kernels/Makefile b/src/pybind_kernels/Makefile index a8f7f9a..857ce2e 100644 --- a/src/pybind_kernels/Makefile +++ b/src/pybind_kernels/Makefile @@ -1,5 +1,5 @@ -PYTHON = python3.11 -# Define constants and collections +PYTHON = python3.10 +# Define constants and collections PYBIND_FLAGS += $(shell $(PYTHON) -m pybind11 --include) -march=native -Wall -shared -fPIC -fopenmp -g -std=c++17 -O3 PYBIND_SUFFIX = $(shell $(PYTHON)-config --extension-suffix) #CXXFLAGS += -I../contrib/cpptqdm/ -Iinclude @@ -13,7 +13,7 @@ CLEANUP = $(TARGETS) $(foreach PLATFORM, $(PLATFORMS), $(PLATFORM)/__pycache__) ifneq (, $(shell which nvc++)) CXX = nvc++ CXXFLAGS += -acc=gpu -Minfo=accel -tp=native -else +else $(info OpenACC compiler nvc++ not found. Compiling without.) endif From 7e491570d65703132c9d86bc7a613370e4571c84 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Feb 2023 11:42:06 +0100 Subject: [PATCH 015/136] Added lightweight benchmarking to morphology tests --- src/pybind_kernels/test/test_morphology.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/pybind_kernels/test/test_morphology.py b/src/pybind_kernels/test/test_morphology.py index a38f5ed..a28c8a6 100644 --- a/src/pybind_kernels/test/test_morphology.py +++ b/src/pybind_kernels/test/test_morphology.py @@ -10,6 +10,7 @@ from scipy import ndimage as ndi import pytest from functools import partial +import datetime # Parameters implant_dims = 32 @@ -38,14 +39,21 @@ def test_morphology(r, m, op, nd): result = np.empty_like(implant_mask) f = getattr(m, f'{op}_3d_sphere') f(implant_mask, r, result) + fsta = datetime.datetime.now() + f(implant_mask, r, result) + fend = datetime.datetime.now() + vsta = datetime.datetime.now() verification = nd(implant_mask, sphere((2*r)+1)) + vend = datetime.datetime.now() assert np.allclose(verification, result) + return fend - fsta, (vend - vsta) / (fend - fsta) + if __name__ == '__main__': + # TDOO move the data generation and ndi verification out to speed up running for r in rs: for m in impls: for op, nd in funcs: - print (f'Testing the {m.__name__} implementation of {op}') - test_morphology(r, m, op, nd) \ No newline at end of file + print (f'Testing the {m.__name__} implementation of {op}', test_morphology(r, m, op, nd)) From 0f586c016c8fe76826a80456aea4c8ea31694479 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Feb 2023 12:55:31 +0100 Subject: [PATCH 016/136] Started restructuring by moving all of the old scripts into its own folder --- .../analysis/cylinder_surface.py | 0 .../analysis/cylinder_surface2.py | 0 {src => pre-cleanup-src}/analysis/ellipsoids.py | 0 {src => pre-cleanup-src}/analysis/getthebone.py | 0 {src => pre-cleanup-src}/analysis/orientation.py | 0 {src => pre-cleanup-src}/analysis/osteocytes.py | 0 .../analysis/video1-segmentation.py | 0 .../analysis/video2-implant_contact.py | 0 .../bh-kernels/axes_histogram.c | 0 .../bh-kernels/centres-of-mass-3xn.c | 0 .../bh-kernels/centres-of-mass.c | 0 .../bh-kernels/collect-labeled.c | 0 .../bh-kernels/collect-nonzero.c | 0 .../bh-kernels/count-labeled.c | 0 {src => pre-cleanup-src}/bh-kernels/fft-cpu.c | 0 {src => pre-cleanup-src}/bh-kernels/fft-kernel.c | 0 .../bh-kernels/inertia-matrix.c | 0 .../bh-kernels/inertia-matrix.o | Bin {src => pre-cleanup-src}/bh-kernels/lookup-cpu.c | 0 .../bh-kernels/matrix3x3/eigenvalues.c | 0 .../bh-kernels/matrix3x3/eigenvalues.o | Bin .../bh-kernels/matrix3x3/eigenvalues.py | 0 .../bh-kernels/matrix3x3/eigenvectors.c | 0 {src => pre-cleanup-src}/bh-kernels/ndi_label.c | 0 {src => pre-cleanup-src}/bh-kernels/ndi_label.py | 0 .../bh-kernels/principal-axes.c | 0 {src => pre-cleanup-src}/bh-kernels/reduceat.c | 0 {src => pre-cleanup-src}/bh-kernels/reduceat.o | Bin {src => pre-cleanup-src}/bh-kernels/rfft-cpu.c | 0 .../bh-kernels/select_segments.c | 0 .../bh-kernels/select_segments.o | Bin .../bh-kernels/select_segments.py | 0 .../bh-kernels/sliding-kernel-opencl.c | 0 .../bh-kernels/sliding-kernel.c | 0 .../bh-kernels/sliding-kernel_2D.c | 0 .../bh-kernels/sparse_label.c | 0 {src => pre-cleanup-src}/config/__init__.py | 0 {src => pre-cleanup-src}/config/constants.py | 0 {src => pre-cleanup-src}/config/kakapo/paths.py | 0 {src => pre-cleanup-src}/config/nautilus/paths.py | 0 {src => pre-cleanup-src}/config/paths.py | 0 .../config/threadripper00/paths.py | 0 .../config/threadripper00/paths.py~ | 0 .../config/threadripper01/paths.py | 0 {src => pre-cleanup-src}/contrib/cpptqdm/LICENSE | 0 .../contrib/cpptqdm/README.md | 0 {src => pre-cleanup-src}/contrib/cpptqdm/tqdm.h | 0 {src => pre-cleanup-src}/convert-to-hdf5.py | 0 {src => pre-cleanup-src}/doitall.py | 0 .../experimental/histogram.cc | 0 {src => pre-cleanup-src}/figures/fig_bic.py | 0 {src => pre-cleanup-src}/figures/figures.py | 0 {src => pre-cleanup-src}/figures/vedo_blood.py | 0 {src => pre-cleanup-src}/generate-Igauss.py | 0 .../generate-absorption-classes.py | 0 {src => pre-cleanup-src}/generate-byte-files.py | 0 {src => pre-cleanup-src}/generate_gauss_c.py | 0 {src => pre-cleanup-src}/helper_functions.py | 0 .../histogram_processing/compute_distributions.py | 0 .../histogram_processing/compute_histograms.py | 0 .../histogram_processing/compute_probabilities.py | 0 .../histogram_processing/compute_ridges.py | 0 .../histogram_processing/cubic2.py | 0 .../histogram_processing/distributions.py | 0 .../histogram_processing/material_correction.py | 0 .../optimize_distributions_flat.py | 0 .../histogram_processing/piecewise_cubic.py | 0 .../histogram_processing/piecewise_quadratic.py | 0 .../histogram_processing/pybind_kernels | 0 .../histogram_processing/test.py | 0 {src => pre-cleanup-src}/imaging/bitmaps.py | 0 {src => pre-cleanup-src}/imaging/clustering.py | 0 {src => pre-cleanup-src}/imaging/distributions.py | 0 {src => pre-cleanup-src}/imaging/sparse_labels.py | 0 {src => pre-cleanup-src}/imaging/sparse_ndi.py | 0 {src => pre-cleanup-src}/imaging/uk_ndi.py | 0 {src => pre-cleanup-src}/io_modules/blockmap.py | 0 .../io_modules/cache_esrf2013.py | 0 {src => pre-cleanup-src}/io_modules/esrf2011.py | 0 {src => pre-cleanup-src}/io_modules/esrf_read.py | 0 .../io_modules/h5-blockmap.cc | 0 {src => pre-cleanup-src}/io_modules/h5tomo.py | 0 .../io_modules/write_video.py | 0 {src => pre-cleanup-src}/limbo/datasources.py | 0 .../limbo/rescale-everything.py | 0 {src => pre-cleanup-src}/limbo/volm.py | 0 {src => pre-cleanup-src}/meow/config | 0 {src => pre-cleanup-src}/meow/meow_variables.py | 0 .../meow/notebooks/00_generate_byte_data.ipynb | 0 .../meow/notebooks/01_volume_matcher.ipynb | 0 .../meow/notebooks/02_generate_scales.ipynb | 0 .../meow/notebooks/03_implant_analysis.ipynb | 0 .../notebooks/04_generate_implant_diffusion.ipynb | 0 .../meow/notebooks/05_generate_implant_edt.ipynb | 0 .../meow/notebooks/06_compute_histograms.ipynb | 0 .../meow/notebooks/07_compute_ridges.ipynb | 0 .../meow/notebooks/08_compute_probabilities.ipynb | 0 .../meow/notebooks/09_compute_segmentation.ipynb | 0 .../meow/notebooks/10_compute_bone_area.ipynb | 0 .../11_repeat_histogram_with_constraints.ipynb | 0 {src => pre-cleanup-src}/meow/notebooks/config | 0 {src => pre-cleanup-src}/meow/run_workflow.py | 0 .../meow/update_live_runner.py | 0 .../obsolete/generate-histograms-axes.py | 0 .../obsolete/generate-radial-histograms.py | 0 .../obsolete/generate-y-histograms.py | 0 .../preprocess/generate-implant-diffusion.py | 0 .../preprocess/generate-implant-edt.py | 0 {src => pre-cleanup-src}/preprocess/resample.py | 0 .../preprocess/rescale-cupy-bin.py | 0 .../preprocess/rescale-cupy.py | 0 .../pybind_kernels/cpu/geometry.cc | 0 .../pybind_kernels/cpu/histograms.cc | 0 .../pybind_kernels/cpu/label.cc | 0 .../pybind_kernels/include/parallel.hh | 0 .../pybind_kernels/pybind/geometry-pybind.cc | 0 {src => pre-cleanup-src}/scripts/bin2npy.py | 0 .../scripts/closing_mask.ipynb | 0 {src => pre-cleanup-src}/scripts/closing_mask.py | 0 {src => pre-cleanup-src}/scripts/config | 0 .../scripts/display_partial_segment.py | 0 .../scripts/generate-byte-hdf5.py | 0 .../scripts/generate-scales.py | 0 .../scripts/generate_gimp_probabilities.py | 0 .../scripts/generate_otsu_probabilities.py | 0 {src => pre-cleanup-src}/scripts/h5tobin.py | 0 {src => pre-cleanup-src}/scripts/otsu.ipynb | 0 .../scripts/segment_from_distributions.py | 0 .../scripts/volume_matcher.py | 0 .../segmentation/airandbone-fn.py | 0 .../segmentation/airandbone.py | 0 {src => pre-cleanup-src}/segmentation/bone.py | 0 .../segmentation/hiresboneregion.py | 0 .../segmentation/implant-FoR.py | 0 .../segmentation/implant-data.py | 0 .../segmentation/segment-air-cc.py | 0 .../segmentation/segment-blood-cc.py | 0 .../segmentation/segment-blood-cc2.py | 0 .../segmentation/segment-implant-cc.py | 0 .../segmentation/segment-implant.py | 0 {src => pre-cleanup-src}/struktur.md | 0 {src => pre-cleanup-src}/test.py | 0 src/Makefile | 14 -------------- 143 files changed, 14 deletions(-) rename {src => pre-cleanup-src}/analysis/cylinder_surface.py (100%) rename {src => pre-cleanup-src}/analysis/cylinder_surface2.py (100%) rename {src => pre-cleanup-src}/analysis/ellipsoids.py (100%) rename {src => pre-cleanup-src}/analysis/getthebone.py (100%) rename {src => pre-cleanup-src}/analysis/orientation.py (100%) rename {src => pre-cleanup-src}/analysis/osteocytes.py (100%) rename {src => pre-cleanup-src}/analysis/video1-segmentation.py (100%) rename {src => pre-cleanup-src}/analysis/video2-implant_contact.py (100%) rename {src => pre-cleanup-src}/bh-kernels/axes_histogram.c (100%) rename {src => pre-cleanup-src}/bh-kernels/centres-of-mass-3xn.c (100%) rename {src => pre-cleanup-src}/bh-kernels/centres-of-mass.c (100%) rename {src => pre-cleanup-src}/bh-kernels/collect-labeled.c (100%) rename {src => pre-cleanup-src}/bh-kernels/collect-nonzero.c (100%) rename {src => pre-cleanup-src}/bh-kernels/count-labeled.c (100%) rename {src => pre-cleanup-src}/bh-kernels/fft-cpu.c (100%) rename {src => pre-cleanup-src}/bh-kernels/fft-kernel.c (100%) rename {src => pre-cleanup-src}/bh-kernels/inertia-matrix.c (100%) rename {src => pre-cleanup-src}/bh-kernels/inertia-matrix.o (100%) rename {src => pre-cleanup-src}/bh-kernels/lookup-cpu.c (100%) rename {src => pre-cleanup-src}/bh-kernels/matrix3x3/eigenvalues.c (100%) rename {src => pre-cleanup-src}/bh-kernels/matrix3x3/eigenvalues.o (100%) rename {src => pre-cleanup-src}/bh-kernels/matrix3x3/eigenvalues.py (100%) rename {src => pre-cleanup-src}/bh-kernels/matrix3x3/eigenvectors.c (100%) rename {src => pre-cleanup-src}/bh-kernels/ndi_label.c (100%) rename {src => pre-cleanup-src}/bh-kernels/ndi_label.py (100%) rename {src => pre-cleanup-src}/bh-kernels/principal-axes.c (100%) rename {src => pre-cleanup-src}/bh-kernels/reduceat.c (100%) rename {src => pre-cleanup-src}/bh-kernels/reduceat.o (100%) rename {src => pre-cleanup-src}/bh-kernels/rfft-cpu.c (100%) rename {src => pre-cleanup-src}/bh-kernels/select_segments.c (100%) rename {src => pre-cleanup-src}/bh-kernels/select_segments.o (100%) rename {src => pre-cleanup-src}/bh-kernels/select_segments.py (100%) rename {src => pre-cleanup-src}/bh-kernels/sliding-kernel-opencl.c (100%) rename {src => pre-cleanup-src}/bh-kernels/sliding-kernel.c (100%) rename {src => pre-cleanup-src}/bh-kernels/sliding-kernel_2D.c (100%) rename {src => pre-cleanup-src}/bh-kernels/sparse_label.c (100%) rename {src => pre-cleanup-src}/config/__init__.py (100%) rename {src => pre-cleanup-src}/config/constants.py (100%) rename {src => pre-cleanup-src}/config/kakapo/paths.py (100%) rename {src => pre-cleanup-src}/config/nautilus/paths.py (100%) rename {src => pre-cleanup-src}/config/paths.py (100%) rename {src => pre-cleanup-src}/config/threadripper00/paths.py (100%) rename {src => pre-cleanup-src}/config/threadripper00/paths.py~ (100%) rename {src => pre-cleanup-src}/config/threadripper01/paths.py (100%) rename {src => pre-cleanup-src}/contrib/cpptqdm/LICENSE (100%) rename {src => pre-cleanup-src}/contrib/cpptqdm/README.md (100%) rename {src => pre-cleanup-src}/contrib/cpptqdm/tqdm.h (100%) rename {src => pre-cleanup-src}/convert-to-hdf5.py (100%) rename {src => pre-cleanup-src}/doitall.py (100%) rename {src => pre-cleanup-src}/experimental/histogram.cc (100%) rename {src => pre-cleanup-src}/figures/fig_bic.py (100%) rename {src => pre-cleanup-src}/figures/figures.py (100%) rename {src => pre-cleanup-src}/figures/vedo_blood.py (100%) rename {src => pre-cleanup-src}/generate-Igauss.py (100%) rename {src => pre-cleanup-src}/generate-absorption-classes.py (100%) rename {src => pre-cleanup-src}/generate-byte-files.py (100%) rename {src => pre-cleanup-src}/generate_gauss_c.py (100%) rename {src => pre-cleanup-src}/helper_functions.py (100%) rename {src => pre-cleanup-src}/histogram_processing/compute_distributions.py (100%) rename {src => pre-cleanup-src}/histogram_processing/compute_histograms.py (100%) rename {src => pre-cleanup-src}/histogram_processing/compute_probabilities.py (100%) rename {src => pre-cleanup-src}/histogram_processing/compute_ridges.py (100%) rename {src => pre-cleanup-src}/histogram_processing/cubic2.py (100%) rename {src => pre-cleanup-src}/histogram_processing/distributions.py (100%) rename {src => pre-cleanup-src}/histogram_processing/material_correction.py (100%) rename {src => pre-cleanup-src}/histogram_processing/optimize_distributions_flat.py (100%) rename {src => pre-cleanup-src}/histogram_processing/piecewise_cubic.py (100%) rename {src => pre-cleanup-src}/histogram_processing/piecewise_quadratic.py (100%) rename {src => pre-cleanup-src}/histogram_processing/pybind_kernels (100%) rename {src => pre-cleanup-src}/histogram_processing/test.py (100%) rename {src => pre-cleanup-src}/imaging/bitmaps.py (100%) rename {src => pre-cleanup-src}/imaging/clustering.py (100%) rename {src => pre-cleanup-src}/imaging/distributions.py (100%) rename {src => pre-cleanup-src}/imaging/sparse_labels.py (100%) rename {src => pre-cleanup-src}/imaging/sparse_ndi.py (100%) rename {src => pre-cleanup-src}/imaging/uk_ndi.py (100%) rename {src => pre-cleanup-src}/io_modules/blockmap.py (100%) rename {src => pre-cleanup-src}/io_modules/cache_esrf2013.py (100%) rename {src => pre-cleanup-src}/io_modules/esrf2011.py (100%) rename {src => pre-cleanup-src}/io_modules/esrf_read.py (100%) rename {src => pre-cleanup-src}/io_modules/h5-blockmap.cc (100%) rename {src => pre-cleanup-src}/io_modules/h5tomo.py (100%) rename {src => pre-cleanup-src}/io_modules/write_video.py (100%) rename {src => pre-cleanup-src}/limbo/datasources.py (100%) rename {src => pre-cleanup-src}/limbo/rescale-everything.py (100%) rename {src => pre-cleanup-src}/limbo/volm.py (100%) rename {src => pre-cleanup-src}/meow/config (100%) rename {src => pre-cleanup-src}/meow/meow_variables.py (100%) rename {src => pre-cleanup-src}/meow/notebooks/00_generate_byte_data.ipynb (100%) rename {src => pre-cleanup-src}/meow/notebooks/01_volume_matcher.ipynb (100%) rename {src => pre-cleanup-src}/meow/notebooks/02_generate_scales.ipynb (100%) rename {src => pre-cleanup-src}/meow/notebooks/03_implant_analysis.ipynb (100%) rename {src => pre-cleanup-src}/meow/notebooks/04_generate_implant_diffusion.ipynb (100%) rename {src => pre-cleanup-src}/meow/notebooks/05_generate_implant_edt.ipynb (100%) rename {src => pre-cleanup-src}/meow/notebooks/06_compute_histograms.ipynb (100%) rename {src => pre-cleanup-src}/meow/notebooks/07_compute_ridges.ipynb (100%) rename {src => pre-cleanup-src}/meow/notebooks/08_compute_probabilities.ipynb (100%) rename {src => pre-cleanup-src}/meow/notebooks/09_compute_segmentation.ipynb (100%) rename {src => pre-cleanup-src}/meow/notebooks/10_compute_bone_area.ipynb (100%) rename {src => pre-cleanup-src}/meow/notebooks/11_repeat_histogram_with_constraints.ipynb (100%) rename {src => pre-cleanup-src}/meow/notebooks/config (100%) rename {src => pre-cleanup-src}/meow/run_workflow.py (100%) rename {src => pre-cleanup-src}/meow/update_live_runner.py (100%) rename {src => pre-cleanup-src}/obsolete/generate-histograms-axes.py (100%) rename {src => pre-cleanup-src}/obsolete/generate-radial-histograms.py (100%) rename {src => pre-cleanup-src}/obsolete/generate-y-histograms.py (100%) rename {src => pre-cleanup-src}/preprocess/generate-implant-diffusion.py (100%) rename {src => pre-cleanup-src}/preprocess/generate-implant-edt.py (100%) rename {src => pre-cleanup-src}/preprocess/resample.py (100%) rename {src => pre-cleanup-src}/preprocess/rescale-cupy-bin.py (100%) rename {src => pre-cleanup-src}/preprocess/rescale-cupy.py (100%) rename {src => pre-cleanup-src}/pybind_kernels/cpu/geometry.cc (100%) rename {src => pre-cleanup-src}/pybind_kernels/cpu/histograms.cc (100%) rename {src => pre-cleanup-src}/pybind_kernels/cpu/label.cc (100%) rename {src => pre-cleanup-src}/pybind_kernels/include/parallel.hh (100%) rename {src => pre-cleanup-src}/pybind_kernels/pybind/geometry-pybind.cc (100%) rename {src => pre-cleanup-src}/scripts/bin2npy.py (100%) rename {src => pre-cleanup-src}/scripts/closing_mask.ipynb (100%) rename {src => pre-cleanup-src}/scripts/closing_mask.py (100%) rename {src => pre-cleanup-src}/scripts/config (100%) rename {src => pre-cleanup-src}/scripts/display_partial_segment.py (100%) rename {src => pre-cleanup-src}/scripts/generate-byte-hdf5.py (100%) rename {src => pre-cleanup-src}/scripts/generate-scales.py (100%) rename {src => pre-cleanup-src}/scripts/generate_gimp_probabilities.py (100%) rename {src => pre-cleanup-src}/scripts/generate_otsu_probabilities.py (100%) rename {src => pre-cleanup-src}/scripts/h5tobin.py (100%) rename {src => pre-cleanup-src}/scripts/otsu.ipynb (100%) rename {src => pre-cleanup-src}/scripts/segment_from_distributions.py (100%) rename {src => pre-cleanup-src}/scripts/volume_matcher.py (100%) rename {src => pre-cleanup-src}/segmentation/airandbone-fn.py (100%) rename {src => pre-cleanup-src}/segmentation/airandbone.py (100%) rename {src => pre-cleanup-src}/segmentation/bone.py (100%) rename {src => pre-cleanup-src}/segmentation/hiresboneregion.py (100%) rename {src => pre-cleanup-src}/segmentation/implant-FoR.py (100%) rename {src => pre-cleanup-src}/segmentation/implant-data.py (100%) rename {src => pre-cleanup-src}/segmentation/segment-air-cc.py (100%) rename {src => pre-cleanup-src}/segmentation/segment-blood-cc.py (100%) rename {src => pre-cleanup-src}/segmentation/segment-blood-cc2.py (100%) rename {src => pre-cleanup-src}/segmentation/segment-implant-cc.py (100%) rename {src => pre-cleanup-src}/segmentation/segment-implant.py (100%) rename {src => pre-cleanup-src}/struktur.md (100%) rename {src => pre-cleanup-src}/test.py (100%) delete mode 100644 src/Makefile diff --git a/src/analysis/cylinder_surface.py b/pre-cleanup-src/analysis/cylinder_surface.py similarity index 100% rename from src/analysis/cylinder_surface.py rename to pre-cleanup-src/analysis/cylinder_surface.py diff --git a/src/analysis/cylinder_surface2.py b/pre-cleanup-src/analysis/cylinder_surface2.py similarity index 100% rename from src/analysis/cylinder_surface2.py rename to pre-cleanup-src/analysis/cylinder_surface2.py diff --git a/src/analysis/ellipsoids.py b/pre-cleanup-src/analysis/ellipsoids.py similarity index 100% rename from src/analysis/ellipsoids.py rename to pre-cleanup-src/analysis/ellipsoids.py diff --git a/src/analysis/getthebone.py b/pre-cleanup-src/analysis/getthebone.py similarity index 100% rename from src/analysis/getthebone.py rename to pre-cleanup-src/analysis/getthebone.py diff --git a/src/analysis/orientation.py b/pre-cleanup-src/analysis/orientation.py similarity index 100% rename from src/analysis/orientation.py rename to pre-cleanup-src/analysis/orientation.py diff --git a/src/analysis/osteocytes.py b/pre-cleanup-src/analysis/osteocytes.py similarity index 100% rename from src/analysis/osteocytes.py rename to pre-cleanup-src/analysis/osteocytes.py diff --git a/src/analysis/video1-segmentation.py b/pre-cleanup-src/analysis/video1-segmentation.py similarity index 100% rename from src/analysis/video1-segmentation.py rename to pre-cleanup-src/analysis/video1-segmentation.py diff --git a/src/analysis/video2-implant_contact.py b/pre-cleanup-src/analysis/video2-implant_contact.py similarity index 100% rename from src/analysis/video2-implant_contact.py rename to pre-cleanup-src/analysis/video2-implant_contact.py diff --git a/src/bh-kernels/axes_histogram.c b/pre-cleanup-src/bh-kernels/axes_histogram.c similarity index 100% rename from src/bh-kernels/axes_histogram.c rename to pre-cleanup-src/bh-kernels/axes_histogram.c diff --git a/src/bh-kernels/centres-of-mass-3xn.c b/pre-cleanup-src/bh-kernels/centres-of-mass-3xn.c similarity index 100% rename from src/bh-kernels/centres-of-mass-3xn.c rename to pre-cleanup-src/bh-kernels/centres-of-mass-3xn.c diff --git a/src/bh-kernels/centres-of-mass.c b/pre-cleanup-src/bh-kernels/centres-of-mass.c similarity index 100% rename from src/bh-kernels/centres-of-mass.c rename to pre-cleanup-src/bh-kernels/centres-of-mass.c diff --git a/src/bh-kernels/collect-labeled.c b/pre-cleanup-src/bh-kernels/collect-labeled.c similarity index 100% rename from src/bh-kernels/collect-labeled.c rename to pre-cleanup-src/bh-kernels/collect-labeled.c diff --git a/src/bh-kernels/collect-nonzero.c b/pre-cleanup-src/bh-kernels/collect-nonzero.c similarity index 100% rename from src/bh-kernels/collect-nonzero.c rename to pre-cleanup-src/bh-kernels/collect-nonzero.c diff --git a/src/bh-kernels/count-labeled.c b/pre-cleanup-src/bh-kernels/count-labeled.c similarity index 100% rename from src/bh-kernels/count-labeled.c rename to pre-cleanup-src/bh-kernels/count-labeled.c diff --git a/src/bh-kernels/fft-cpu.c b/pre-cleanup-src/bh-kernels/fft-cpu.c similarity index 100% rename from src/bh-kernels/fft-cpu.c rename to pre-cleanup-src/bh-kernels/fft-cpu.c diff --git a/src/bh-kernels/fft-kernel.c b/pre-cleanup-src/bh-kernels/fft-kernel.c similarity index 100% rename from src/bh-kernels/fft-kernel.c rename to pre-cleanup-src/bh-kernels/fft-kernel.c diff --git a/src/bh-kernels/inertia-matrix.c b/pre-cleanup-src/bh-kernels/inertia-matrix.c similarity index 100% rename from src/bh-kernels/inertia-matrix.c rename to pre-cleanup-src/bh-kernels/inertia-matrix.c diff --git a/src/bh-kernels/inertia-matrix.o b/pre-cleanup-src/bh-kernels/inertia-matrix.o similarity index 100% rename from src/bh-kernels/inertia-matrix.o rename to pre-cleanup-src/bh-kernels/inertia-matrix.o diff --git a/src/bh-kernels/lookup-cpu.c b/pre-cleanup-src/bh-kernels/lookup-cpu.c similarity index 100% rename from src/bh-kernels/lookup-cpu.c rename to pre-cleanup-src/bh-kernels/lookup-cpu.c diff --git a/src/bh-kernels/matrix3x3/eigenvalues.c b/pre-cleanup-src/bh-kernels/matrix3x3/eigenvalues.c similarity index 100% rename from src/bh-kernels/matrix3x3/eigenvalues.c rename to pre-cleanup-src/bh-kernels/matrix3x3/eigenvalues.c diff --git a/src/bh-kernels/matrix3x3/eigenvalues.o b/pre-cleanup-src/bh-kernels/matrix3x3/eigenvalues.o similarity index 100% rename from src/bh-kernels/matrix3x3/eigenvalues.o rename to pre-cleanup-src/bh-kernels/matrix3x3/eigenvalues.o diff --git a/src/bh-kernels/matrix3x3/eigenvalues.py b/pre-cleanup-src/bh-kernels/matrix3x3/eigenvalues.py similarity index 100% rename from src/bh-kernels/matrix3x3/eigenvalues.py rename to pre-cleanup-src/bh-kernels/matrix3x3/eigenvalues.py diff --git a/src/bh-kernels/matrix3x3/eigenvectors.c b/pre-cleanup-src/bh-kernels/matrix3x3/eigenvectors.c similarity index 100% rename from src/bh-kernels/matrix3x3/eigenvectors.c rename to pre-cleanup-src/bh-kernels/matrix3x3/eigenvectors.c diff --git a/src/bh-kernels/ndi_label.c b/pre-cleanup-src/bh-kernels/ndi_label.c similarity index 100% rename from src/bh-kernels/ndi_label.c rename to pre-cleanup-src/bh-kernels/ndi_label.c diff --git a/src/bh-kernels/ndi_label.py b/pre-cleanup-src/bh-kernels/ndi_label.py similarity index 100% rename from src/bh-kernels/ndi_label.py rename to pre-cleanup-src/bh-kernels/ndi_label.py diff --git a/src/bh-kernels/principal-axes.c b/pre-cleanup-src/bh-kernels/principal-axes.c similarity index 100% rename from src/bh-kernels/principal-axes.c rename to pre-cleanup-src/bh-kernels/principal-axes.c diff --git a/src/bh-kernels/reduceat.c b/pre-cleanup-src/bh-kernels/reduceat.c similarity index 100% rename from src/bh-kernels/reduceat.c rename to pre-cleanup-src/bh-kernels/reduceat.c diff --git a/src/bh-kernels/reduceat.o b/pre-cleanup-src/bh-kernels/reduceat.o similarity index 100% rename from src/bh-kernels/reduceat.o rename to pre-cleanup-src/bh-kernels/reduceat.o diff --git a/src/bh-kernels/rfft-cpu.c b/pre-cleanup-src/bh-kernels/rfft-cpu.c similarity index 100% rename from src/bh-kernels/rfft-cpu.c rename to pre-cleanup-src/bh-kernels/rfft-cpu.c diff --git a/src/bh-kernels/select_segments.c b/pre-cleanup-src/bh-kernels/select_segments.c similarity index 100% rename from src/bh-kernels/select_segments.c rename to pre-cleanup-src/bh-kernels/select_segments.c diff --git a/src/bh-kernels/select_segments.o b/pre-cleanup-src/bh-kernels/select_segments.o similarity index 100% rename from src/bh-kernels/select_segments.o rename to pre-cleanup-src/bh-kernels/select_segments.o diff --git a/src/bh-kernels/select_segments.py b/pre-cleanup-src/bh-kernels/select_segments.py similarity index 100% rename from src/bh-kernels/select_segments.py rename to pre-cleanup-src/bh-kernels/select_segments.py diff --git a/src/bh-kernels/sliding-kernel-opencl.c b/pre-cleanup-src/bh-kernels/sliding-kernel-opencl.c similarity index 100% rename from src/bh-kernels/sliding-kernel-opencl.c rename to pre-cleanup-src/bh-kernels/sliding-kernel-opencl.c diff --git a/src/bh-kernels/sliding-kernel.c b/pre-cleanup-src/bh-kernels/sliding-kernel.c similarity index 100% rename from src/bh-kernels/sliding-kernel.c rename to pre-cleanup-src/bh-kernels/sliding-kernel.c diff --git a/src/bh-kernels/sliding-kernel_2D.c b/pre-cleanup-src/bh-kernels/sliding-kernel_2D.c similarity index 100% rename from src/bh-kernels/sliding-kernel_2D.c rename to pre-cleanup-src/bh-kernels/sliding-kernel_2D.c diff --git a/src/bh-kernels/sparse_label.c b/pre-cleanup-src/bh-kernels/sparse_label.c similarity index 100% rename from src/bh-kernels/sparse_label.c rename to pre-cleanup-src/bh-kernels/sparse_label.c diff --git a/src/config/__init__.py b/pre-cleanup-src/config/__init__.py similarity index 100% rename from src/config/__init__.py rename to pre-cleanup-src/config/__init__.py diff --git a/src/config/constants.py b/pre-cleanup-src/config/constants.py similarity index 100% rename from src/config/constants.py rename to pre-cleanup-src/config/constants.py diff --git a/src/config/kakapo/paths.py b/pre-cleanup-src/config/kakapo/paths.py similarity index 100% rename from src/config/kakapo/paths.py rename to pre-cleanup-src/config/kakapo/paths.py diff --git a/src/config/nautilus/paths.py b/pre-cleanup-src/config/nautilus/paths.py similarity index 100% rename from src/config/nautilus/paths.py rename to pre-cleanup-src/config/nautilus/paths.py diff --git a/src/config/paths.py b/pre-cleanup-src/config/paths.py similarity index 100% rename from src/config/paths.py rename to pre-cleanup-src/config/paths.py diff --git a/src/config/threadripper00/paths.py b/pre-cleanup-src/config/threadripper00/paths.py similarity index 100% rename from src/config/threadripper00/paths.py rename to pre-cleanup-src/config/threadripper00/paths.py diff --git a/src/config/threadripper00/paths.py~ b/pre-cleanup-src/config/threadripper00/paths.py~ similarity index 100% rename from src/config/threadripper00/paths.py~ rename to pre-cleanup-src/config/threadripper00/paths.py~ diff --git a/src/config/threadripper01/paths.py b/pre-cleanup-src/config/threadripper01/paths.py similarity index 100% rename from src/config/threadripper01/paths.py rename to pre-cleanup-src/config/threadripper01/paths.py diff --git a/src/contrib/cpptqdm/LICENSE b/pre-cleanup-src/contrib/cpptqdm/LICENSE similarity index 100% rename from src/contrib/cpptqdm/LICENSE rename to pre-cleanup-src/contrib/cpptqdm/LICENSE diff --git a/src/contrib/cpptqdm/README.md b/pre-cleanup-src/contrib/cpptqdm/README.md similarity index 100% rename from src/contrib/cpptqdm/README.md rename to pre-cleanup-src/contrib/cpptqdm/README.md diff --git a/src/contrib/cpptqdm/tqdm.h b/pre-cleanup-src/contrib/cpptqdm/tqdm.h similarity index 100% rename from src/contrib/cpptqdm/tqdm.h rename to pre-cleanup-src/contrib/cpptqdm/tqdm.h diff --git a/src/convert-to-hdf5.py b/pre-cleanup-src/convert-to-hdf5.py similarity index 100% rename from src/convert-to-hdf5.py rename to pre-cleanup-src/convert-to-hdf5.py diff --git a/src/doitall.py b/pre-cleanup-src/doitall.py similarity index 100% rename from src/doitall.py rename to pre-cleanup-src/doitall.py diff --git a/src/experimental/histogram.cc b/pre-cleanup-src/experimental/histogram.cc similarity index 100% rename from src/experimental/histogram.cc rename to pre-cleanup-src/experimental/histogram.cc diff --git a/src/figures/fig_bic.py b/pre-cleanup-src/figures/fig_bic.py similarity index 100% rename from src/figures/fig_bic.py rename to pre-cleanup-src/figures/fig_bic.py diff --git a/src/figures/figures.py b/pre-cleanup-src/figures/figures.py similarity index 100% rename from src/figures/figures.py rename to pre-cleanup-src/figures/figures.py diff --git a/src/figures/vedo_blood.py b/pre-cleanup-src/figures/vedo_blood.py similarity index 100% rename from src/figures/vedo_blood.py rename to pre-cleanup-src/figures/vedo_blood.py diff --git a/src/generate-Igauss.py b/pre-cleanup-src/generate-Igauss.py similarity index 100% rename from src/generate-Igauss.py rename to pre-cleanup-src/generate-Igauss.py diff --git a/src/generate-absorption-classes.py b/pre-cleanup-src/generate-absorption-classes.py similarity index 100% rename from src/generate-absorption-classes.py rename to pre-cleanup-src/generate-absorption-classes.py diff --git a/src/generate-byte-files.py b/pre-cleanup-src/generate-byte-files.py similarity index 100% rename from src/generate-byte-files.py rename to pre-cleanup-src/generate-byte-files.py diff --git a/src/generate_gauss_c.py b/pre-cleanup-src/generate_gauss_c.py similarity index 100% rename from src/generate_gauss_c.py rename to pre-cleanup-src/generate_gauss_c.py diff --git a/src/helper_functions.py b/pre-cleanup-src/helper_functions.py similarity index 100% rename from src/helper_functions.py rename to pre-cleanup-src/helper_functions.py diff --git a/src/histogram_processing/compute_distributions.py b/pre-cleanup-src/histogram_processing/compute_distributions.py similarity index 100% rename from src/histogram_processing/compute_distributions.py rename to pre-cleanup-src/histogram_processing/compute_distributions.py diff --git a/src/histogram_processing/compute_histograms.py b/pre-cleanup-src/histogram_processing/compute_histograms.py similarity index 100% rename from src/histogram_processing/compute_histograms.py rename to pre-cleanup-src/histogram_processing/compute_histograms.py diff --git a/src/histogram_processing/compute_probabilities.py b/pre-cleanup-src/histogram_processing/compute_probabilities.py similarity index 100% rename from src/histogram_processing/compute_probabilities.py rename to pre-cleanup-src/histogram_processing/compute_probabilities.py diff --git a/src/histogram_processing/compute_ridges.py b/pre-cleanup-src/histogram_processing/compute_ridges.py similarity index 100% rename from src/histogram_processing/compute_ridges.py rename to pre-cleanup-src/histogram_processing/compute_ridges.py diff --git a/src/histogram_processing/cubic2.py b/pre-cleanup-src/histogram_processing/cubic2.py similarity index 100% rename from src/histogram_processing/cubic2.py rename to pre-cleanup-src/histogram_processing/cubic2.py diff --git a/src/histogram_processing/distributions.py b/pre-cleanup-src/histogram_processing/distributions.py similarity index 100% rename from src/histogram_processing/distributions.py rename to pre-cleanup-src/histogram_processing/distributions.py diff --git a/src/histogram_processing/material_correction.py b/pre-cleanup-src/histogram_processing/material_correction.py similarity index 100% rename from src/histogram_processing/material_correction.py rename to pre-cleanup-src/histogram_processing/material_correction.py diff --git a/src/histogram_processing/optimize_distributions_flat.py b/pre-cleanup-src/histogram_processing/optimize_distributions_flat.py similarity index 100% rename from src/histogram_processing/optimize_distributions_flat.py rename to pre-cleanup-src/histogram_processing/optimize_distributions_flat.py diff --git a/src/histogram_processing/piecewise_cubic.py b/pre-cleanup-src/histogram_processing/piecewise_cubic.py similarity index 100% rename from src/histogram_processing/piecewise_cubic.py rename to pre-cleanup-src/histogram_processing/piecewise_cubic.py diff --git a/src/histogram_processing/piecewise_quadratic.py b/pre-cleanup-src/histogram_processing/piecewise_quadratic.py similarity index 100% rename from src/histogram_processing/piecewise_quadratic.py rename to pre-cleanup-src/histogram_processing/piecewise_quadratic.py diff --git a/src/histogram_processing/pybind_kernels b/pre-cleanup-src/histogram_processing/pybind_kernels similarity index 100% rename from src/histogram_processing/pybind_kernels rename to pre-cleanup-src/histogram_processing/pybind_kernels diff --git a/src/histogram_processing/test.py b/pre-cleanup-src/histogram_processing/test.py similarity index 100% rename from src/histogram_processing/test.py rename to pre-cleanup-src/histogram_processing/test.py diff --git a/src/imaging/bitmaps.py b/pre-cleanup-src/imaging/bitmaps.py similarity index 100% rename from src/imaging/bitmaps.py rename to pre-cleanup-src/imaging/bitmaps.py diff --git a/src/imaging/clustering.py b/pre-cleanup-src/imaging/clustering.py similarity index 100% rename from src/imaging/clustering.py rename to pre-cleanup-src/imaging/clustering.py diff --git a/src/imaging/distributions.py b/pre-cleanup-src/imaging/distributions.py similarity index 100% rename from src/imaging/distributions.py rename to pre-cleanup-src/imaging/distributions.py diff --git a/src/imaging/sparse_labels.py b/pre-cleanup-src/imaging/sparse_labels.py similarity index 100% rename from src/imaging/sparse_labels.py rename to pre-cleanup-src/imaging/sparse_labels.py diff --git a/src/imaging/sparse_ndi.py b/pre-cleanup-src/imaging/sparse_ndi.py similarity index 100% rename from src/imaging/sparse_ndi.py rename to pre-cleanup-src/imaging/sparse_ndi.py diff --git a/src/imaging/uk_ndi.py b/pre-cleanup-src/imaging/uk_ndi.py similarity index 100% rename from src/imaging/uk_ndi.py rename to pre-cleanup-src/imaging/uk_ndi.py diff --git a/src/io_modules/blockmap.py b/pre-cleanup-src/io_modules/blockmap.py similarity index 100% rename from src/io_modules/blockmap.py rename to pre-cleanup-src/io_modules/blockmap.py diff --git a/src/io_modules/cache_esrf2013.py b/pre-cleanup-src/io_modules/cache_esrf2013.py similarity index 100% rename from src/io_modules/cache_esrf2013.py rename to pre-cleanup-src/io_modules/cache_esrf2013.py diff --git a/src/io_modules/esrf2011.py b/pre-cleanup-src/io_modules/esrf2011.py similarity index 100% rename from src/io_modules/esrf2011.py rename to pre-cleanup-src/io_modules/esrf2011.py diff --git a/src/io_modules/esrf_read.py b/pre-cleanup-src/io_modules/esrf_read.py similarity index 100% rename from src/io_modules/esrf_read.py rename to pre-cleanup-src/io_modules/esrf_read.py diff --git a/src/io_modules/h5-blockmap.cc b/pre-cleanup-src/io_modules/h5-blockmap.cc similarity index 100% rename from src/io_modules/h5-blockmap.cc rename to pre-cleanup-src/io_modules/h5-blockmap.cc diff --git a/src/io_modules/h5tomo.py b/pre-cleanup-src/io_modules/h5tomo.py similarity index 100% rename from src/io_modules/h5tomo.py rename to pre-cleanup-src/io_modules/h5tomo.py diff --git a/src/io_modules/write_video.py b/pre-cleanup-src/io_modules/write_video.py similarity index 100% rename from src/io_modules/write_video.py rename to pre-cleanup-src/io_modules/write_video.py diff --git a/src/limbo/datasources.py b/pre-cleanup-src/limbo/datasources.py similarity index 100% rename from src/limbo/datasources.py rename to pre-cleanup-src/limbo/datasources.py diff --git a/src/limbo/rescale-everything.py b/pre-cleanup-src/limbo/rescale-everything.py similarity index 100% rename from src/limbo/rescale-everything.py rename to pre-cleanup-src/limbo/rescale-everything.py diff --git a/src/limbo/volm.py b/pre-cleanup-src/limbo/volm.py similarity index 100% rename from src/limbo/volm.py rename to pre-cleanup-src/limbo/volm.py diff --git a/src/meow/config b/pre-cleanup-src/meow/config similarity index 100% rename from src/meow/config rename to pre-cleanup-src/meow/config diff --git a/src/meow/meow_variables.py b/pre-cleanup-src/meow/meow_variables.py similarity index 100% rename from src/meow/meow_variables.py rename to pre-cleanup-src/meow/meow_variables.py diff --git a/src/meow/notebooks/00_generate_byte_data.ipynb b/pre-cleanup-src/meow/notebooks/00_generate_byte_data.ipynb similarity index 100% rename from src/meow/notebooks/00_generate_byte_data.ipynb rename to pre-cleanup-src/meow/notebooks/00_generate_byte_data.ipynb diff --git a/src/meow/notebooks/01_volume_matcher.ipynb b/pre-cleanup-src/meow/notebooks/01_volume_matcher.ipynb similarity index 100% rename from src/meow/notebooks/01_volume_matcher.ipynb rename to pre-cleanup-src/meow/notebooks/01_volume_matcher.ipynb diff --git a/src/meow/notebooks/02_generate_scales.ipynb b/pre-cleanup-src/meow/notebooks/02_generate_scales.ipynb similarity index 100% rename from src/meow/notebooks/02_generate_scales.ipynb rename to pre-cleanup-src/meow/notebooks/02_generate_scales.ipynb diff --git a/src/meow/notebooks/03_implant_analysis.ipynb b/pre-cleanup-src/meow/notebooks/03_implant_analysis.ipynb similarity index 100% rename from src/meow/notebooks/03_implant_analysis.ipynb rename to pre-cleanup-src/meow/notebooks/03_implant_analysis.ipynb diff --git a/src/meow/notebooks/04_generate_implant_diffusion.ipynb b/pre-cleanup-src/meow/notebooks/04_generate_implant_diffusion.ipynb similarity index 100% rename from src/meow/notebooks/04_generate_implant_diffusion.ipynb rename to pre-cleanup-src/meow/notebooks/04_generate_implant_diffusion.ipynb diff --git a/src/meow/notebooks/05_generate_implant_edt.ipynb b/pre-cleanup-src/meow/notebooks/05_generate_implant_edt.ipynb similarity index 100% rename from src/meow/notebooks/05_generate_implant_edt.ipynb rename to pre-cleanup-src/meow/notebooks/05_generate_implant_edt.ipynb diff --git a/src/meow/notebooks/06_compute_histograms.ipynb b/pre-cleanup-src/meow/notebooks/06_compute_histograms.ipynb similarity index 100% rename from src/meow/notebooks/06_compute_histograms.ipynb rename to pre-cleanup-src/meow/notebooks/06_compute_histograms.ipynb diff --git a/src/meow/notebooks/07_compute_ridges.ipynb b/pre-cleanup-src/meow/notebooks/07_compute_ridges.ipynb similarity index 100% rename from src/meow/notebooks/07_compute_ridges.ipynb rename to pre-cleanup-src/meow/notebooks/07_compute_ridges.ipynb diff --git a/src/meow/notebooks/08_compute_probabilities.ipynb b/pre-cleanup-src/meow/notebooks/08_compute_probabilities.ipynb similarity index 100% rename from src/meow/notebooks/08_compute_probabilities.ipynb rename to pre-cleanup-src/meow/notebooks/08_compute_probabilities.ipynb diff --git a/src/meow/notebooks/09_compute_segmentation.ipynb b/pre-cleanup-src/meow/notebooks/09_compute_segmentation.ipynb similarity index 100% rename from src/meow/notebooks/09_compute_segmentation.ipynb rename to pre-cleanup-src/meow/notebooks/09_compute_segmentation.ipynb diff --git a/src/meow/notebooks/10_compute_bone_area.ipynb b/pre-cleanup-src/meow/notebooks/10_compute_bone_area.ipynb similarity index 100% rename from src/meow/notebooks/10_compute_bone_area.ipynb rename to pre-cleanup-src/meow/notebooks/10_compute_bone_area.ipynb diff --git a/src/meow/notebooks/11_repeat_histogram_with_constraints.ipynb b/pre-cleanup-src/meow/notebooks/11_repeat_histogram_with_constraints.ipynb similarity index 100% rename from src/meow/notebooks/11_repeat_histogram_with_constraints.ipynb rename to pre-cleanup-src/meow/notebooks/11_repeat_histogram_with_constraints.ipynb diff --git a/src/meow/notebooks/config b/pre-cleanup-src/meow/notebooks/config similarity index 100% rename from src/meow/notebooks/config rename to pre-cleanup-src/meow/notebooks/config diff --git a/src/meow/run_workflow.py b/pre-cleanup-src/meow/run_workflow.py similarity index 100% rename from src/meow/run_workflow.py rename to pre-cleanup-src/meow/run_workflow.py diff --git a/src/meow/update_live_runner.py b/pre-cleanup-src/meow/update_live_runner.py similarity index 100% rename from src/meow/update_live_runner.py rename to pre-cleanup-src/meow/update_live_runner.py diff --git a/src/obsolete/generate-histograms-axes.py b/pre-cleanup-src/obsolete/generate-histograms-axes.py similarity index 100% rename from src/obsolete/generate-histograms-axes.py rename to pre-cleanup-src/obsolete/generate-histograms-axes.py diff --git a/src/obsolete/generate-radial-histograms.py b/pre-cleanup-src/obsolete/generate-radial-histograms.py similarity index 100% rename from src/obsolete/generate-radial-histograms.py rename to pre-cleanup-src/obsolete/generate-radial-histograms.py diff --git a/src/obsolete/generate-y-histograms.py b/pre-cleanup-src/obsolete/generate-y-histograms.py similarity index 100% rename from src/obsolete/generate-y-histograms.py rename to pre-cleanup-src/obsolete/generate-y-histograms.py diff --git a/src/preprocess/generate-implant-diffusion.py b/pre-cleanup-src/preprocess/generate-implant-diffusion.py similarity index 100% rename from src/preprocess/generate-implant-diffusion.py rename to pre-cleanup-src/preprocess/generate-implant-diffusion.py diff --git a/src/preprocess/generate-implant-edt.py b/pre-cleanup-src/preprocess/generate-implant-edt.py similarity index 100% rename from src/preprocess/generate-implant-edt.py rename to pre-cleanup-src/preprocess/generate-implant-edt.py diff --git a/src/preprocess/resample.py b/pre-cleanup-src/preprocess/resample.py similarity index 100% rename from src/preprocess/resample.py rename to pre-cleanup-src/preprocess/resample.py diff --git a/src/preprocess/rescale-cupy-bin.py b/pre-cleanup-src/preprocess/rescale-cupy-bin.py similarity index 100% rename from src/preprocess/rescale-cupy-bin.py rename to pre-cleanup-src/preprocess/rescale-cupy-bin.py diff --git a/src/preprocess/rescale-cupy.py b/pre-cleanup-src/preprocess/rescale-cupy.py similarity index 100% rename from src/preprocess/rescale-cupy.py rename to pre-cleanup-src/preprocess/rescale-cupy.py diff --git a/src/pybind_kernels/cpu/geometry.cc b/pre-cleanup-src/pybind_kernels/cpu/geometry.cc similarity index 100% rename from src/pybind_kernels/cpu/geometry.cc rename to pre-cleanup-src/pybind_kernels/cpu/geometry.cc diff --git a/src/pybind_kernels/cpu/histograms.cc b/pre-cleanup-src/pybind_kernels/cpu/histograms.cc similarity index 100% rename from src/pybind_kernels/cpu/histograms.cc rename to pre-cleanup-src/pybind_kernels/cpu/histograms.cc diff --git a/src/pybind_kernels/cpu/label.cc b/pre-cleanup-src/pybind_kernels/cpu/label.cc similarity index 100% rename from src/pybind_kernels/cpu/label.cc rename to pre-cleanup-src/pybind_kernels/cpu/label.cc diff --git a/src/pybind_kernels/include/parallel.hh b/pre-cleanup-src/pybind_kernels/include/parallel.hh similarity index 100% rename from src/pybind_kernels/include/parallel.hh rename to pre-cleanup-src/pybind_kernels/include/parallel.hh diff --git a/src/pybind_kernels/pybind/geometry-pybind.cc b/pre-cleanup-src/pybind_kernels/pybind/geometry-pybind.cc similarity index 100% rename from src/pybind_kernels/pybind/geometry-pybind.cc rename to pre-cleanup-src/pybind_kernels/pybind/geometry-pybind.cc diff --git a/src/scripts/bin2npy.py b/pre-cleanup-src/scripts/bin2npy.py similarity index 100% rename from src/scripts/bin2npy.py rename to pre-cleanup-src/scripts/bin2npy.py diff --git a/src/scripts/closing_mask.ipynb b/pre-cleanup-src/scripts/closing_mask.ipynb similarity index 100% rename from src/scripts/closing_mask.ipynb rename to pre-cleanup-src/scripts/closing_mask.ipynb diff --git a/src/scripts/closing_mask.py b/pre-cleanup-src/scripts/closing_mask.py similarity index 100% rename from src/scripts/closing_mask.py rename to pre-cleanup-src/scripts/closing_mask.py diff --git a/src/scripts/config b/pre-cleanup-src/scripts/config similarity index 100% rename from src/scripts/config rename to pre-cleanup-src/scripts/config diff --git a/src/scripts/display_partial_segment.py b/pre-cleanup-src/scripts/display_partial_segment.py similarity index 100% rename from src/scripts/display_partial_segment.py rename to pre-cleanup-src/scripts/display_partial_segment.py diff --git a/src/scripts/generate-byte-hdf5.py b/pre-cleanup-src/scripts/generate-byte-hdf5.py similarity index 100% rename from src/scripts/generate-byte-hdf5.py rename to pre-cleanup-src/scripts/generate-byte-hdf5.py diff --git a/src/scripts/generate-scales.py b/pre-cleanup-src/scripts/generate-scales.py similarity index 100% rename from src/scripts/generate-scales.py rename to pre-cleanup-src/scripts/generate-scales.py diff --git a/src/scripts/generate_gimp_probabilities.py b/pre-cleanup-src/scripts/generate_gimp_probabilities.py similarity index 100% rename from src/scripts/generate_gimp_probabilities.py rename to pre-cleanup-src/scripts/generate_gimp_probabilities.py diff --git a/src/scripts/generate_otsu_probabilities.py b/pre-cleanup-src/scripts/generate_otsu_probabilities.py similarity index 100% rename from src/scripts/generate_otsu_probabilities.py rename to pre-cleanup-src/scripts/generate_otsu_probabilities.py diff --git a/src/scripts/h5tobin.py b/pre-cleanup-src/scripts/h5tobin.py similarity index 100% rename from src/scripts/h5tobin.py rename to pre-cleanup-src/scripts/h5tobin.py diff --git a/src/scripts/otsu.ipynb b/pre-cleanup-src/scripts/otsu.ipynb similarity index 100% rename from src/scripts/otsu.ipynb rename to pre-cleanup-src/scripts/otsu.ipynb diff --git a/src/scripts/segment_from_distributions.py b/pre-cleanup-src/scripts/segment_from_distributions.py similarity index 100% rename from src/scripts/segment_from_distributions.py rename to pre-cleanup-src/scripts/segment_from_distributions.py diff --git a/src/scripts/volume_matcher.py b/pre-cleanup-src/scripts/volume_matcher.py similarity index 100% rename from src/scripts/volume_matcher.py rename to pre-cleanup-src/scripts/volume_matcher.py diff --git a/src/segmentation/airandbone-fn.py b/pre-cleanup-src/segmentation/airandbone-fn.py similarity index 100% rename from src/segmentation/airandbone-fn.py rename to pre-cleanup-src/segmentation/airandbone-fn.py diff --git a/src/segmentation/airandbone.py b/pre-cleanup-src/segmentation/airandbone.py similarity index 100% rename from src/segmentation/airandbone.py rename to pre-cleanup-src/segmentation/airandbone.py diff --git a/src/segmentation/bone.py b/pre-cleanup-src/segmentation/bone.py similarity index 100% rename from src/segmentation/bone.py rename to pre-cleanup-src/segmentation/bone.py diff --git a/src/segmentation/hiresboneregion.py b/pre-cleanup-src/segmentation/hiresboneregion.py similarity index 100% rename from src/segmentation/hiresboneregion.py rename to pre-cleanup-src/segmentation/hiresboneregion.py diff --git a/src/segmentation/implant-FoR.py b/pre-cleanup-src/segmentation/implant-FoR.py similarity index 100% rename from src/segmentation/implant-FoR.py rename to pre-cleanup-src/segmentation/implant-FoR.py diff --git a/src/segmentation/implant-data.py b/pre-cleanup-src/segmentation/implant-data.py similarity index 100% rename from src/segmentation/implant-data.py rename to pre-cleanup-src/segmentation/implant-data.py diff --git a/src/segmentation/segment-air-cc.py b/pre-cleanup-src/segmentation/segment-air-cc.py similarity index 100% rename from src/segmentation/segment-air-cc.py rename to pre-cleanup-src/segmentation/segment-air-cc.py diff --git a/src/segmentation/segment-blood-cc.py b/pre-cleanup-src/segmentation/segment-blood-cc.py similarity index 100% rename from src/segmentation/segment-blood-cc.py rename to pre-cleanup-src/segmentation/segment-blood-cc.py diff --git a/src/segmentation/segment-blood-cc2.py b/pre-cleanup-src/segmentation/segment-blood-cc2.py similarity index 100% rename from src/segmentation/segment-blood-cc2.py rename to pre-cleanup-src/segmentation/segment-blood-cc2.py diff --git a/src/segmentation/segment-implant-cc.py b/pre-cleanup-src/segmentation/segment-implant-cc.py similarity index 100% rename from src/segmentation/segment-implant-cc.py rename to pre-cleanup-src/segmentation/segment-implant-cc.py diff --git a/src/segmentation/segment-implant.py b/pre-cleanup-src/segmentation/segment-implant.py similarity index 100% rename from src/segmentation/segment-implant.py rename to pre-cleanup-src/segmentation/segment-implant.py diff --git a/src/struktur.md b/pre-cleanup-src/struktur.md similarity index 100% rename from src/struktur.md rename to pre-cleanup-src/struktur.md diff --git a/src/test.py b/pre-cleanup-src/test.py similarity index 100% rename from src/test.py rename to pre-cleanup-src/test.py diff --git a/src/Makefile b/src/Makefile deleted file mode 100644 index bc449e1..0000000 --- a/src/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -CXX=icpc -CXXFLAGS += -std=c++17 -CXXFLAGS += $(shell pkg-config --cflags fmt) -CXXFLAGS += $(shell pkg-config --cflags hdf5) -LIBS += $(shell pkg-config --libs fmt) -LIBS += $(shell pkg-config --libs hdf5) - -%.o: %.cc - $(CXX) $(CXXFLAGS) -c $< - -h5-blockmap: h5-blockmap.o - echo $(LIBS) - $(CXX) $(CXXFLAGS) $< $(LIBS) -o $@ - From e738c63a6e1079695bf3e81fe14c6331c571495e Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Feb 2023 12:56:09 +0100 Subject: [PATCH 017/136] Moved structure notes to github issues --- pre-cleanup-src/struktur.md | 42 ------------------------------------- 1 file changed, 42 deletions(-) delete mode 100644 pre-cleanup-src/struktur.md diff --git a/pre-cleanup-src/struktur.md b/pre-cleanup-src/struktur.md deleted file mode 100644 index f8e10b1..0000000 --- a/pre-cleanup-src/struktur.md +++ /dev/null @@ -1,42 +0,0 @@ -src/ - __init__.py - config/ - constants.py - paths.py - threadripper00.json - lib/ - __init__.py - cpp/ - cpu/ - cpu_seq/ - gpu/ - best/ - include/ - py/ # TODO tænk over hvordan de vælger implementation -- gerne hvordan det trickler "nedad" - Istedet for at loade al data ind i ram og så køre blokvist over på GPU, så udnyt async yield til at lave en generator! - async memmap! - geometry/ - FoR_me.py - debug-explore/ - *.ipynb - processing_steps/ # kun cli ting der kører af sig selv (+rapport ting over hvad der skete) - 100-.py - 200- - pybind/ - *-pybind.cc - test/ - pybind-*.py - større-test(s).py - utils/ - io/ - histograms/ - alternative_processing_steps/ - doitall.sh - -sæt ci op som test lokalt > generer fil > github action tjekker om fil rapporten matcher git commit hash og melder korrekt test kørsel (eller noget i den dur!) - -under oprydning, hold til samme argument interface som de andre! (i.e. compute_ridges gør ikke ( ͡° ͜ʖ ͡°) ) - -gennemgå doitall og hiv de relevante ud i processing_steps. Dertil kør alt igennem! - -doitall skal også lave en rapport tex. (tænk applied ML small assignment rapporten) \ No newline at end of file From 8bd436761d5da89a4957b942cf4366c96bbb3be7 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Feb 2023 14:46:47 +0100 Subject: [PATCH 018/136] #25 Moved current updated files to match new structure --- .gitignore | 1 - pre-cleanup-src/Makefile | 14 ++++++++++++++ src/{pybind_kernels => }/Makefile | 17 ++++++++--------- src/{pybind_kernels => lib/cpp}/cpu/io.cc | 0 .../cpp}/cpu/morphology.cc | 0 src/{pybind_kernels => lib/cpp}/cpu_seq/io.cc | 0 .../cpp}/cpu_seq/morphology.cc | 0 src/{pybind_kernels => lib/cpp}/gpu/io.cc | 0 .../cpp}/gpu/morphology.cc | 0 .../cpp}/include/datatypes.hh | 0 src/{pybind_kernels => lib/cpp}/include/io.hh | 0 .../cpp}/include/morphology.hh | 0 src/{pybind_kernels => }/pybind/io-pybind.cc | 0 .../pybind/morphology-pybind.cc | 0 src/pybind_kernels/Readme.md | 1 - src/pybind_kernels/__init__.py | 0 src/{pybind_kernels => }/test/test_io.py | 2 +- .../test/test_morphology.py | 2 +- 18 files changed, 24 insertions(+), 13 deletions(-) create mode 100644 pre-cleanup-src/Makefile rename src/{pybind_kernels => }/Makefile (69%) rename src/{pybind_kernels => lib/cpp}/cpu/io.cc (100%) rename src/{pybind_kernels => lib/cpp}/cpu/morphology.cc (100%) rename src/{pybind_kernels => lib/cpp}/cpu_seq/io.cc (100%) rename src/{pybind_kernels => lib/cpp}/cpu_seq/morphology.cc (100%) rename src/{pybind_kernels => lib/cpp}/gpu/io.cc (100%) rename src/{pybind_kernels => lib/cpp}/gpu/morphology.cc (100%) rename src/{pybind_kernels => lib/cpp}/include/datatypes.hh (100%) rename src/{pybind_kernels => lib/cpp}/include/io.hh (100%) rename src/{pybind_kernels => lib/cpp}/include/morphology.hh (100%) rename src/{pybind_kernels => }/pybind/io-pybind.cc (100%) rename src/{pybind_kernels => }/pybind/morphology-pybind.cc (100%) delete mode 100644 src/pybind_kernels/Readme.md delete mode 100644 src/pybind_kernels/__init__.py rename src/{pybind_kernels => }/test/test_io.py (98%) rename src/{pybind_kernels => }/test/test_morphology.py (97%) diff --git a/.gitignore b/.gitignore index 6e48464..5743ee6 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,6 @@ CMakeCache.txt CMakeFiles CMakeScripts Testing -Makefile cmake_install.cmake install_manifest.txt compile_commands.json diff --git a/pre-cleanup-src/Makefile b/pre-cleanup-src/Makefile new file mode 100644 index 0000000..bc449e1 --- /dev/null +++ b/pre-cleanup-src/Makefile @@ -0,0 +1,14 @@ +CXX=icpc +CXXFLAGS += -std=c++17 +CXXFLAGS += $(shell pkg-config --cflags fmt) +CXXFLAGS += $(shell pkg-config --cflags hdf5) +LIBS += $(shell pkg-config --libs fmt) +LIBS += $(shell pkg-config --libs hdf5) + +%.o: %.cc + $(CXX) $(CXXFLAGS) -c $< + +h5-blockmap: h5-blockmap.o + echo $(LIBS) + $(CXX) $(CXXFLAGS) $< $(LIBS) -o $@ + diff --git a/src/pybind_kernels/Makefile b/src/Makefile similarity index 69% rename from src/pybind_kernels/Makefile rename to src/Makefile index 18cd3fb..2a00b42 100644 --- a/src/pybind_kernels/Makefile +++ b/src/Makefile @@ -1,13 +1,14 @@ -PYTHON = python3.10 # Define constants and collections +PYTHON = python3.10 PYBIND_FLAGS += $(shell $(PYTHON) -m pybind11 --include) -march=native -Wall -shared -fPIC -fopenmp -g -std=c++17 -O3 PYBIND_SUFFIX = $(shell $(PYTHON)-config --extension-suffix) +CPP_FOLDER=lib/cpp #CXXFLAGS += -I../contrib/cpptqdm/ -Iinclude -CXXFLAGS += -Iinclude +CXXFLAGS += -I$(CPP_FOLDER)/include PLATFORMS=cpu_seq cpu gpu LIBS=io morphology -TARGETS = $(foreach PLATFORM, $(PLATFORMS), $(foreach LIB, $(LIBS), $(PLATFORM)/$(LIB)$(PYBIND_SUFFIX))) -CLEANUP = $(TARGETS) $(foreach PLATFORM, $(PLATFORMS), $(PLATFORM)/__pycache__) +TARGETS = $(foreach PLATFORM, $(PLATFORMS), $(foreach LIB, $(LIBS), $(CPP_FOLDER)/$(PLATFORM)/$(LIB)$(PYBIND_SUFFIX))) +CLEANUP = $(TARGETS) $(foreach PLATFORM, $(PLATFORMS), $(CPP_FOLDER)/$(PLATFORM)/__pycache__) # Detect if OpenACC can be used ifneq (, $(shell which nvc++)) @@ -24,13 +25,11 @@ CXXFLAGS += -undefined dynamic_lookup # https://pybind11.readthedocs.io/en/stabl CLEANUP += $(TARGETS) $(foreach TARGET, $(TARGETS), $(TARGET).dSYM) # These are also generated on Mac endif -CXXFLAGS += -I../contrib/cpptqdm/ - all: $(TARGETS) define GEN_RULE -$(PLATFORM)/$(LIB)$(PYBIND_SUFFIX): pybind/$(LIB)-pybind.cc $(PLATFORM)/$(LIB).cc - $(CXX) $(CXXFLAGS) $(PYBIND_FLAGS) -I$(PLATFORM) $$< -o $(PLATFORM)/$(LIB)$(PYBIND_SUFFIX) +$(CPP_FOLDER)/$(PLATFORM)/$(LIB)$(PYBIND_SUFFIX): pybind/$(LIB)-pybind.cc $(CPP_FOLDER)/$(PLATFORM)/$(LIB).cc + $(CXX) $(CXXFLAGS) $(PYBIND_FLAGS) -I$(CPP_FOLDER)/$(PLATFORM) $$< -o $(CPP_FOLDER)/$(PLATFORM)/$(LIB)$(PYBIND_SUFFIX) endef $(foreach PLATFORM, $(PLATFORMS), \ @@ -43,4 +42,4 @@ test: all $(PYTHON) -m pytest -n auto test clean: - rm -rf $(CLEANUP) test/__pycache__ .pytest_cache **/*.so \ No newline at end of file + rm -rf $(CLEANUP) __pycache__ test/__pycache__ .pytest_cache lib/cpp/**/*.so \ No newline at end of file diff --git a/src/pybind_kernels/cpu/io.cc b/src/lib/cpp/cpu/io.cc similarity index 100% rename from src/pybind_kernels/cpu/io.cc rename to src/lib/cpp/cpu/io.cc diff --git a/src/pybind_kernels/cpu/morphology.cc b/src/lib/cpp/cpu/morphology.cc similarity index 100% rename from src/pybind_kernels/cpu/morphology.cc rename to src/lib/cpp/cpu/morphology.cc diff --git a/src/pybind_kernels/cpu_seq/io.cc b/src/lib/cpp/cpu_seq/io.cc similarity index 100% rename from src/pybind_kernels/cpu_seq/io.cc rename to src/lib/cpp/cpu_seq/io.cc diff --git a/src/pybind_kernels/cpu_seq/morphology.cc b/src/lib/cpp/cpu_seq/morphology.cc similarity index 100% rename from src/pybind_kernels/cpu_seq/morphology.cc rename to src/lib/cpp/cpu_seq/morphology.cc diff --git a/src/pybind_kernels/gpu/io.cc b/src/lib/cpp/gpu/io.cc similarity index 100% rename from src/pybind_kernels/gpu/io.cc rename to src/lib/cpp/gpu/io.cc diff --git a/src/pybind_kernels/gpu/morphology.cc b/src/lib/cpp/gpu/morphology.cc similarity index 100% rename from src/pybind_kernels/gpu/morphology.cc rename to src/lib/cpp/gpu/morphology.cc diff --git a/src/pybind_kernels/include/datatypes.hh b/src/lib/cpp/include/datatypes.hh similarity index 100% rename from src/pybind_kernels/include/datatypes.hh rename to src/lib/cpp/include/datatypes.hh diff --git a/src/pybind_kernels/include/io.hh b/src/lib/cpp/include/io.hh similarity index 100% rename from src/pybind_kernels/include/io.hh rename to src/lib/cpp/include/io.hh diff --git a/src/pybind_kernels/include/morphology.hh b/src/lib/cpp/include/morphology.hh similarity index 100% rename from src/pybind_kernels/include/morphology.hh rename to src/lib/cpp/include/morphology.hh diff --git a/src/pybind_kernels/pybind/io-pybind.cc b/src/pybind/io-pybind.cc similarity index 100% rename from src/pybind_kernels/pybind/io-pybind.cc rename to src/pybind/io-pybind.cc diff --git a/src/pybind_kernels/pybind/morphology-pybind.cc b/src/pybind/morphology-pybind.cc similarity index 100% rename from src/pybind_kernels/pybind/morphology-pybind.cc rename to src/pybind/morphology-pybind.cc diff --git a/src/pybind_kernels/Readme.md b/src/pybind_kernels/Readme.md deleted file mode 100644 index 27d0412..0000000 --- a/src/pybind_kernels/Readme.md +++ /dev/null @@ -1 +0,0 @@ -# TODO :) \ No newline at end of file diff --git a/src/pybind_kernels/__init__.py b/src/pybind_kernels/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/pybind_kernels/test/test_io.py b/src/test/test_io.py similarity index 98% rename from src/pybind_kernels/test/test_io.py rename to src/test/test_io.py index 3ece7e4..12ed56d 100644 --- a/src/pybind_kernels/test/test_io.py +++ b/src/test/test_io.py @@ -2,7 +2,7 @@ Unittests for the I/O pybind kernels. ''' import sys -sys.path.append(sys.path[0]+"/../") +sys.path.append(sys.path[0]+"/../lib/cpp") import cpu_seq.io as io import numpy as np import tempfile diff --git a/src/pybind_kernels/test/test_morphology.py b/src/test/test_morphology.py similarity index 97% rename from src/pybind_kernels/test/test_morphology.py rename to src/test/test_morphology.py index a28c8a6..a608d0d 100644 --- a/src/pybind_kernels/test/test_morphology.py +++ b/src/test/test_morphology.py @@ -2,7 +2,7 @@ Unittests for the morphology pybind kernels. ''' import sys -sys.path.append(sys.path[0]+"/../") +sys.path.append(sys.path[0]+"/../lib/cpp") import cpu_seq.morphology as m_cpu_seq import cpu.morphology as m_cpu import gpu.morphology as m_gpu From e93880b7417777ebbfbc0f41fd450a63f340228f Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Feb 2023 15:05:27 +0100 Subject: [PATCH 019/136] #16 Removed pytest warning --- src/test/test_morphology.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/test_morphology.py b/src/test/test_morphology.py index a608d0d..d99ccae 100644 --- a/src/test/test_morphology.py +++ b/src/test/test_morphology.py @@ -49,11 +49,11 @@ def test_morphology(r, m, op, nd): assert np.allclose(verification, result) - return fend - fsta, (vend - vsta) / (fend - fsta) + print (f'Testing the {m.__name__} implementation of {op}. Ran in {fend - fsta}, which is {(vend - vsta) / (fend - fsta)} times better than ndi') if __name__ == '__main__': # TDOO move the data generation and ndi verification out to speed up running for r in rs: for m in impls: for op, nd in funcs: - print (f'Testing the {m.__name__} implementation of {op}', test_morphology(r, m, op, nd)) + test_morphology(r, m, op, nd) From bb412d960bbb3aac334ee48979df7af89b547420 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Feb 2023 15:39:36 +0100 Subject: [PATCH 020/136] #25 moved processing step 1 into new structure, along side its dependencies. --- src/config/paths.py | 12 ++++++++++++ .../paths.py => src/lib/py/helpers.py | 15 +-------------- .../processing_steps/100_cache_esrf2013.py | 8 ++++---- 3 files changed, 17 insertions(+), 18 deletions(-) create mode 100644 src/config/paths.py rename pre-cleanup-src/config/threadripper00/paths.py => src/lib/py/helpers.py (65%) rename pre-cleanup-src/io_modules/cache_esrf2013.py => src/processing_steps/100_cache_esrf2013.py (83%) diff --git a/src/config/paths.py b/src/config/paths.py new file mode 100644 index 0000000..e1ac537 --- /dev/null +++ b/src/config/paths.py @@ -0,0 +1,12 @@ +data_root = "/data" +fast_root = "/data_fast" + +hdf5_root = f"{data_root}/MAXIBONE/Goats/tomograms" +hdf5_root_fast = f"{fast_root}/MAXIBONE/Goats/tomograms" +binary_root = f"{hdf5_root}/binary" +binary_root_fast = f"{hdf5_root_fast}/binary" + +esrf_data_local = f"{hdf5_root}/ESRF/" +esrf_data_sftp = "/XNS/XrayImaging/MiG/manjula.esci.nbi.dk.2_localhost/" +esrf_implants_root = f"{esrf_data_local}/esrf_dental_implants_april_2013/" +esrf_granules_root = f"{esrf_data_local}/esrf_dental_granules_july_2012/" \ No newline at end of file diff --git a/pre-cleanup-src/config/threadripper00/paths.py b/src/lib/py/helpers.py similarity index 65% rename from pre-cleanup-src/config/threadripper00/paths.py rename to src/lib/py/helpers.py index 00a8685..f76df16 100644 --- a/pre-cleanup-src/config/threadripper00/paths.py +++ b/src/lib/py/helpers.py @@ -1,17 +1,5 @@ -data_root = "/data" -fast_root = "/data_fast" - -hdf5_root = f"{data_root}/MAXIBONE/Goats/tomograms" -hdf5_root_fast = f"{fast_root}/MAXIBONE/Goats/tomograms" -binary_root = f"{hdf5_root_fast}/binary" - -esrf_data_local= f"{hdf5_root}/ESRF/" -esrf_data_sftp = "/XNS/XrayImaging/MiG/manjula.esci.nbi.dk.2_localhost/" -esrf_implants_root = f"{esrf_data_local}/esrf_dental_implants_april_2013/" -esrf_granules_root = f"{esrf_data_local}/esrf_dental_granules_july_2012/" - -# TODO: Hvorhen skal det her hen? import sys + def commandline_args(defaults): keys = list(defaults.keys()) @@ -40,4 +28,3 @@ def commandline_args(defaults): args.append(type(default)(sys.argv[i+1])) return args - diff --git a/pre-cleanup-src/io_modules/cache_esrf2013.py b/src/processing_steps/100_cache_esrf2013.py similarity index 83% rename from pre-cleanup-src/io_modules/cache_esrf2013.py rename to src/processing_steps/100_cache_esrf2013.py index 73dae5e..f2b12c6 100644 --- a/pre-cleanup-src/io_modules/cache_esrf2013.py +++ b/src/processing_steps/100_cache_esrf2013.py @@ -1,10 +1,11 @@ import os, sys, pathlib, tqdm, fabric sys.path.append(sys.path[0]+"/../") -from config.paths import commandline_args, esrf_data_sftp, esrf_data_local +from lib.py.helpers import commandline_args +from config.paths import esrf_data_sftp, esrf_data_local if __name__ == "__main__": - sample, experiment = commandline_args({"sample":"<required>", - "experiment":"esrf_dental_implants_april_2013"}) + sample, experiment = commandline_args({"sample" : "<required>", + "experiment" : "esrf_dental_implants_april_2013"}) index_dir = f"{esrf_data_local}/{experiment}/index/"; with open(f"{index_dir}/{sample}.txt") as f: @@ -32,4 +33,3 @@ sftp.get(f,f) connection.close() - From 89f29fa326561bf276bc83d4f77ff2377a933051 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Feb 2023 15:40:36 +0100 Subject: [PATCH 021/136] #29 Added verbose to cache_esrf --- src/processing_steps/100_cache_esrf2013.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/processing_steps/100_cache_esrf2013.py b/src/processing_steps/100_cache_esrf2013.py index f2b12c6..c744dc7 100644 --- a/src/processing_steps/100_cache_esrf2013.py +++ b/src/processing_steps/100_cache_esrf2013.py @@ -4,8 +4,9 @@ from config.paths import esrf_data_sftp, esrf_data_local if __name__ == "__main__": - sample, experiment = commandline_args({"sample" : "<required>", - "experiment" : "esrf_dental_implants_april_2013"}) + sample, experiment, verbose = commandline_args({"sample" : "<required>", + "experiment" : "esrf_dental_implants_april_2013", + "verbose" : 1}) index_dir = f"{esrf_data_local}/{experiment}/index/"; with open(f"{index_dir}/{sample}.txt") as f: @@ -16,19 +17,19 @@ volume_dir = os.path.dirname(volume_xml) local_directory = f"{esrf_data_local}/{experiment}/{volume_dir}" sftp_directory = f"{esrf_data_sftp}/{experiment}/{volume_dir}" - print(f"Local: Creating directory {local_directory}") + if verbose >= 1: print(f"Local: Creating directory {local_directory}") pathlib.Path(local_directory).mkdir(parents=True, exist_ok=True) with fabric.Connection('erda') as connection: - print("Connected to ERDA") + if verbose >= 1: print("Connected to ERDA") with connection.sftp() as sftp: - print(f"SFTP: Attempting to chdir to {sftp_directory}") + if verbose >= 1: print(f"SFTP: Attempting to chdir to {sftp_directory}") sftp.chdir(sftp_directory) - print(f"SFTP: Reading directory contents") + if verbose >= 1: print(f"SFTP: Reading directory contents") files = sftp.listdir() - print(f"Local: Attempting to chdir to {local_directory}") + if verbose >= 1: print(f"Local: Attempting to chdir to {local_directory}") os.chdir(local_directory) - print("SFTP: Downloading subvolume contents") + if verbose >= 1: print("SFTP: Downloading subvolume contents") for f in tqdm.tqdm(files): sftp.get(f,f) From f0cdf30e4e64f419890b3c793bbead30bae8aa08 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Feb 2023 15:41:00 +0100 Subject: [PATCH 022/136] #25 Started adding pip dependencies --- src/requirements.txt | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 src/requirements.txt diff --git a/src/requirements.txt b/src/requirements.txt new file mode 100644 index 0000000..b4d4689 --- /dev/null +++ b/src/requirements.txt @@ -0,0 +1,2 @@ +fabric==3.0.0 +tqdm==4.64.1 \ No newline at end of file From 62dc2d65f02bddc984ac83e33d23210bdf7e5754 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Feb 2023 15:51:41 +0100 Subject: [PATCH 023/136] #25 Moved the generate byte hdf 5 script --- .../processing_steps/200_generate_byte_hdf5.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pre-cleanup-src/scripts/generate-byte-hdf5.py => src/processing_steps/200_generate_byte_hdf5.py (100%) diff --git a/pre-cleanup-src/scripts/generate-byte-hdf5.py b/src/processing_steps/200_generate_byte_hdf5.py similarity index 100% rename from pre-cleanup-src/scripts/generate-byte-hdf5.py rename to src/processing_steps/200_generate_byte_hdf5.py From 5d9155fa3654d3b03014569bdd4cf3ee8cf4725c Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Feb 2023 15:52:38 +0100 Subject: [PATCH 024/136] #25 Fixed the import errors of generate_byte_hdf5. This included moving teh esrf_read helper script to lib/py/ --- .../io_modules => src/lib/py}/esrf_read.py | 0 .../200_generate_byte_hdf5.py | 19 +++++++++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) rename {pre-cleanup-src/io_modules => src/lib/py}/esrf_read.py (100%) diff --git a/pre-cleanup-src/io_modules/esrf_read.py b/src/lib/py/esrf_read.py similarity index 100% rename from pre-cleanup-src/io_modules/esrf_read.py rename to src/lib/py/esrf_read.py diff --git a/src/processing_steps/200_generate_byte_hdf5.py b/src/processing_steps/200_generate_byte_hdf5.py index fa64136..a3df603 100755 --- a/src/processing_steps/200_generate_byte_hdf5.py +++ b/src/processing_steps/200_generate_byte_hdf5.py @@ -7,16 +7,19 @@ import h5py, sys, os.path, pathlib, tqdm sys.path.append(sys.path[0]+"/../") import bohrium as bh # TODO: Get rid of Bohrium dependence without losing too much performance -from io_modules.esrf_read import * +from lib.py.esrf_read import * import numpy as np, matplotlib.pyplot as plt from config.paths import * +from lib.py.helpers import commandline_args from PIL import Image NA = np.newaxis -sample, chunk_length, use_bohrium, xml_root = commandline_args({"sample":"<required>","chunk_length":256, - "use_bohrium":True,"xml_root":esrf_implants_root}) +sample, chunk_length, use_bohrium, xml_root = commandline_args({"sample" : "<required>", + "chunk_length" : 256, + "use_bohrium" : True, + "xml_root" : esrf_implants_root}) print(f"data_root={xml_root}") @@ -97,11 +100,11 @@ def normalize(A,value_range,nbits=16,dtype=np.uint16): h5tomo_lsb = h5file_lsb['voxels'] def cylinder_mask(Ny,Nx): - ys = bh.linspace(-1,1,Ny) - xs = bh.linspace(-1,1,Nx) + ys = np.linspace(-1,1,Ny) + xs = np.linspace(-1,1,Nx) return (xs[NA,:]**2 + ys[:,NA]**2) < 1 -mask = bh.array(cylinder_mask(Ny,Nx)) +mask = np.array(cylinder_mask(Ny,Nx)) for i in tqdm.tqdm(range(len(subvolume_metadata))): subvolume_info = subvolume_metadata[i]; @@ -115,7 +118,7 @@ def cylinder_mask(Ny,Nx): # print(f"Writing {subvolume_info['experiment']}") # h5tomo[z_offset:z_offset+nz] = tomo[:,sy:ey,sx:ex]; # del tomo - chunk = bh.zeros((chunk_length,Ny,Nx),dtype=np.uint16); + chunk = np.zeros((chunk_length,Ny,Nx),dtype=np.uint16); for z in range(0,nz,chunk_length): chunk_end = min(z+chunk_length,nz); @@ -148,7 +151,7 @@ def cylinder_mask(Ny,Nx): chunk_lsb = chunk_lsb.copy2numpy() print("chunk_lsb.copy2numpy().max: ", chunk_lsb.max()) h5tomo_lsb[z_offset+z:z_offset+chunk_end] = chunk_lsb[:] - bh.flush() + np.flush() z_offset += nz; From 5d91765e7f9c07589a7fc02aa94dc8195ca33470 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Feb 2023 15:54:58 +0100 Subject: [PATCH 025/136] #29 Added verbose to generate_byte_hdf5 --- .../200_generate_byte_hdf5.py | 49 ++++++++++--------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/src/processing_steps/200_generate_byte_hdf5.py b/src/processing_steps/200_generate_byte_hdf5.py index a3df603..559bc85 100755 --- a/src/processing_steps/200_generate_byte_hdf5.py +++ b/src/processing_steps/200_generate_byte_hdf5.py @@ -16,13 +16,14 @@ NA = np.newaxis -sample, chunk_length, use_bohrium, xml_root = commandline_args({"sample" : "<required>", - "chunk_length" : 256, - "use_bohrium" : True, - "xml_root" : esrf_implants_root}) +sample, chunk_length, use_bohrium, xml_root, verbose = commandline_args({"sample" : "<required>", + "chunk_length" : 256, + "use_bohrium" : True, + "xml_root" : esrf_implants_root, + "verbose" : 1}) -print(f"data_root={xml_root}") +if verbose >= 1: print(f"data_root={xml_root}") # Normalize, such that 1,...,2^(nbits)-1 correspond to vmin,...,vmax # 0 corresponds to a masked value @@ -43,10 +44,10 @@ def normalize(A,value_range,nbits=16,dtype=np.uint16): (Nz,Ny,Nx) = (np.sum(subvolume_dimensions[:,0]), np.min(subvolume_dimensions[:,1]&~31), np.min(subvolume_dimensions[:,2]&~31)) for i in range(len(subvolume_metadata)): - print(f"{i} {sample}/{subvolume_metadata[i]['experiment']}: {subvolume_range[i]}") -print((global_vmin, global_vmax), (Nz,Ny,Nx)) -print(subvolume_dimensions) -print(subvolume_range) + if verbose >= 1: print(f"{i} {sample}/{subvolume_metadata[i]['experiment']}: {subvolume_range[i]}") +if verbose >= 1: print((global_vmin, global_vmax), (Nz,Ny,Nx)) +if verbose >= 1: print(subvolume_dimensions) +if verbose >= 1: print(subvolume_range) #import re @@ -69,7 +70,7 @@ def normalize(A,value_range,nbits=16,dtype=np.uint16): outdir = os.path.dirname(lsb_filename) pathlib.Path(outdir).mkdir(parents=True, exist_ok=True) -print(f"Writing {msb_filename} and {lsb_filename}") +if verbose >= 1: print(f"Writing {msb_filename} and {lsb_filename}") h5file_msb = h5py.File(msb_filename,"w"); h5file_lsb = h5py.File(lsb_filename,"w"); @@ -111,11 +112,11 @@ def cylinder_mask(Ny,Nx): (nz,ny,nx) = subvolume_dimensions[i]; (sy,sx) = ((ny-Ny)//2+((ny-Ny)%2), (nx-Nx)//2+((nx-Nx)%2)) (ey,ex) = (ny-(ny-Ny)//2, nx-(nx-Nx)//2) - print((sy,ey),(sx,ex)) + if verbose >= 1: print((sy,ey),(sx,ex)) - # print(f"Loading {subvolume_info['experiment']}") + # if verbose >= 1: print(f"Loading {subvolume_info['experiment']}") # tomo = normalize(esrf_full_tomogram_bh(subvolume_info), (global_vmin,global_vmax)); - # print(f"Writing {subvolume_info['experiment']}") + # if verbose >= 1: print(f"Writing {subvolume_info['experiment']}") # h5tomo[z_offset:z_offset+nz] = tomo[:,sy:ey,sx:ex]; # del tomo chunk = np.zeros((chunk_length,Ny,Nx),dtype=np.uint16); @@ -123,14 +124,14 @@ def cylinder_mask(Ny,Nx): chunk_end = min(z+chunk_length,nz); region = [[sx,sy,z],[ex,ey,chunk_end]] - print(f"Reading chunk {z+z_offset}:{chunk_end+z_offset} ({i}-{z}), region={region}"); + if verbose >= 1: print(f"Reading chunk {z+z_offset}:{chunk_end+z_offset} ({i}-{z}), region={region}"); slab_data = esrf_edfrange_to_bh(subvolume_info,region) - print(f"Chunk shape: {slab_data.shape}") - print("Max value before masking:", slab_data.max()) + if verbose >= 1: print(f"Chunk shape: {slab_data.shape}") + if verbose >= 1: print("Max value before masking:", slab_data.max()) slab_data *= mask[NA,:,:] - print("Max value after masking:", slab_data.max()) + if verbose >= 1: print("Max value after masking:", slab_data.max()) chunk[:chunk_end-z] = normalize(slab_data,(global_vmin,global_vmax)) - print("Max value after normalizing:", chunk.max()) + if verbose >= 1: print("Max value after normalizing:", chunk.max()) # for j in range(0,chunk_end-z): # slice_meta, slice_data = esrf_edf_n_to_npy(subvolume_info,z+j); @@ -138,18 +139,18 @@ def cylinder_mask(Ny,Nx): # chunk[j] = normalize(slice_data[sy:ey,sx:ex],(global_vmin,global_vmax)) * mask - print(f"Writing {sample} MSB slice {z+z_offset}:{chunk_end+z_offset} ({i}-{z})"); + if verbose >= 1: print(f"Writing {sample} MSB slice {z+z_offset}:{chunk_end+z_offset} ({i}-{z})"); chunk_msb = ((chunk[:chunk_end-z]>>8)&0xff).astype(np.uint8) - print("chunk_msb.max: ", chunk_msb.max()) + if verbose >= 1: print("chunk_msb.max: ", chunk_msb.max()) chunk_msb = chunk_msb.copy2numpy() - print("chunk_msb.copy2numpy().max: ", chunk_msb.max()) + if verbose >= 1: print("chunk_msb.copy2numpy().max: ", chunk_msb.max()) h5tomo_msb[z_offset+z:z_offset+chunk_end] = chunk_msb[:] - print(f"Writing {sample} LSB slice {z+z_offset}:{chunk_end+z_offset} ({i}-{z})"); + if verbose >= 1: print(f"Writing {sample} LSB slice {z+z_offset}:{chunk_end+z_offset} ({i}-{z})"); chunk_lsb = (chunk[:chunk_end-z]&0xff).astype(np.uint8) - print("chunk_lsb.max: ", chunk_lsb.max()) + if verbose >= 1: print("chunk_lsb.max: ", chunk_lsb.max()) chunk_lsb = chunk_lsb.copy2numpy() - print("chunk_lsb.copy2numpy().max: ", chunk_lsb.max()) + if verbose >= 1: print("chunk_lsb.copy2numpy().max: ", chunk_lsb.max()) h5tomo_lsb[z_offset+z:z_offset+chunk_end] = chunk_lsb[:] np.flush() From e567284ab4db94b1b997bf32391d7e25e12f948d Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Feb 2023 15:58:09 +0100 Subject: [PATCH 026/136] #25 Moved volume matcher and added jax as a dependency --- .../processing_steps/300_volume_matcher.py | 4 +++- src/requirements.txt | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) rename pre-cleanup-src/scripts/volume_matcher.py => src/processing_steps/300_volume_matcher.py (98%) diff --git a/pre-cleanup-src/scripts/volume_matcher.py b/src/processing_steps/300_volume_matcher.py similarity index 98% rename from pre-cleanup-src/scripts/volume_matcher.py rename to src/processing_steps/300_volume_matcher.py index 2e26ac5..cf8c602 100755 --- a/pre-cleanup-src/scripts/volume_matcher.py +++ b/src/processing_steps/300_volume_matcher.py @@ -12,7 +12,9 @@ import jax.numpy as jp import h5py, jax, sys from PIL import Image -from config.paths import hdf5_root, commandline_args +sys.path.append(sys.path[0]+"/../") +from config.paths import hdf5_root +from lib.py.helpers import commandline_args volume_matched_dir = f"{hdf5_root}/processed/volume_matched" diff --git a/src/requirements.txt b/src/requirements.txt index b4d4689..945718f 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -1,2 +1,3 @@ fabric==3.0.0 -tqdm==4.64.1 \ No newline at end of file +tqdm==4.64.1 +jax==0.4.3 \ No newline at end of file From 20c7ff3452319ce462e2bb3a6e12fee21323c305 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Feb 2023 16:00:22 +0100 Subject: [PATCH 027/136] #29 Added verbose to volume matcher --- src/processing_steps/300_volume_matcher.py | 31 +++++++++++++--------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/src/processing_steps/300_volume_matcher.py b/src/processing_steps/300_volume_matcher.py index cf8c602..336660c 100755 --- a/src/processing_steps/300_volume_matcher.py +++ b/src/processing_steps/300_volume_matcher.py @@ -16,6 +16,7 @@ from config.paths import hdf5_root from lib.py.helpers import commandline_args +verbose = 1 volume_matched_dir = f"{hdf5_root}/processed/volume_matched" def match_region(voxels_top, voxels_bot, overlap, max_shift): @@ -44,25 +45,25 @@ def match_all_regions(voxels,crossings,write_image_checks=True): errors = np.zeros(len(crossings),dtype=np.float32) match_region_jit = jax.jit(match_region,static_argnums=(2,3)); - print(f"Crossings at z-indices: {crossings}") + if verbose >= 1: print(f"Crossings at z-indices: {crossings}") for i in range(len(crossings)): crossing = crossings[i] - print(f"Processing crossing at z={crossing}:") - print(f"Reading top region: voxels[{crossing-max_shift}:{crossing}]") + if verbose >= 1: print(f"Processing crossing at z={crossing}:") + if verbose >= 1: print(f"Reading top region: voxels[{crossing-max_shift}:{crossing}]") top_voxels = jp.array(voxels[crossing-max_shift:crossing]).astype(jp.float32) - print(f"Reading bottom region: voxels[{crossing}:{crossing+max_shift}]") + if verbose >= 1: print(f"Reading bottom region: voxels[{crossing}:{crossing+max_shift}]") bot_voxels = jp.array(voxels[crossing:crossing+max_shift]).astype(jp.float32) - print(f"Matching regions (Shapes: {bot_voxels.shape} {top_voxels.shape})") + if verbose >= 1: print(f"Matching regions (Shapes: {bot_voxels.shape} {top_voxels.shape})") shift, error = match_region_jit(top_voxels,bot_voxels,overlap,max_shift) shifts[i] = shift errors[i] = error - print(f"Optimal shift is {shift} with error {error} per voxel") + if verbose >= 1: print(f"Optimal shift is {shift} with error {error} per voxel") if(write_image_checks): image_dir = f"{volume_matched_dir}/verification" pathlib.Path(image_dir).mkdir(parents=True, exist_ok=True) - print(f"Writing images of matched slices to {image_dir} to check correctness.") + if verbose >= 1: print(f"Writing images of matched slices to {image_dir} to check correctness.") merged_zy_slice = np.concatenate([top_voxels[:,:,Nx//2],bot_voxels[shift:,:,Nx//2]]) # merged_zy_slice = np.array(merged_voxels[:,:,Nx//2]) @@ -84,11 +85,11 @@ def write_matched(voxels_in, voxels_out, crossings, shifts): cum_shifts = [0]+list(np.cumsum(shifts)) crossings = list(crossings) + [voxels_in.shape[0]] - print(f"Cumulative shifts: {cum_shifts}") - print(f"Duplicating subvolume 0: 0:{crossings[0]}") + if verbose >= 1: print(f"Cumulative shifts: {cum_shifts}") + if verbose >= 1: print(f"Duplicating subvolume 0: 0:{crossings[0]}") voxels_out[:crossings[0]] = voxels_in[:crossings[0]]; for i in range(len(crossings)-1): - print(f"Duplicating unmatched part of subvolume {i+1}: voxels_out[{crossings[i]-cum_shifts[i]}:{crossings[i+1]-cum_shifts[i]-shifts[i]}] = voxels_in[{crossings[i]+shifts[i]}:{crossings[i+1]}];") + if verbose >= 1: print(f"Duplicating unmatched part of subvolume {i+1}: voxels_out[{crossings[i]-cum_shifts[i]}:{crossings[i+1]-cum_shifts[i]-shifts[i]}] = voxels_in[{crossings[i]+shifts[i]}:{crossings[i+1]}];") voxels_out[crossings[i]-cum_shifts[i]:crossings[i+1]-cum_shifts[i]-shifts[i]] = voxels_in[crossings[i]+shifts[i]:crossings[i+1]]; @@ -108,7 +109,11 @@ def write_matched_hdf5(h5_filename_in, h5_filename_out, crossings, shifts, compr if __name__ == "__main__": - sample, overlap, max_shift, generate_h5 = commandline_args({"sample":"<required>","overlap":10,"max_shift":150,"generate_h5":False}) + sample, overlap, max_shift, generate_h5, verbose = commandline_args({"sample" : "<required>", + "overlap" : 10, + "max_shift" : 150, + "generate_h5" : False, + "verbose" : 1}) input_h5name = f"{hdf5_root}/hdf5-byte/msb/{sample}.h5" output_h5name = f"{volume_matched_dir}/1x/{sample}.h5" @@ -122,7 +127,7 @@ def write_matched_hdf5(h5_filename_in, h5_filename_out, crossings, shifts, compr (Nz,Ny,Nx) = h5file['voxels'].shape crossings = np.cumsum(subvolume_dimensions[:-1,0]).astype(int) - print(f"Matching all regions for sample {sample} at crossings {crossings}.") + if verbose >= 1: print(f"Matching all regions for sample {sample} at crossings {crossings}.") shifts, errors = match_all_regions(voxels,crossings) np.save(f"{volume_matched_dir}/{sample}-shifts.npy",shifts) @@ -134,5 +139,5 @@ def write_matched_hdf5(h5_filename_in, h5_filename_out, crossings, shifts, compr h5file.close() - print(f"Copying over volume from {input_h5name} shifted by {shifts} to {output_h5name}") + if verbose >= 1: print(f"Copying over volume from {input_h5name} shifted by {shifts} to {output_h5name}") if(generate_h5): write_matched_hdf5(input_h5name, output_h5name, crossings, shifts) From 369e3acd773b4e409e5d8e44f09c245571d75b77 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Feb 2023 17:21:57 +0100 Subject: [PATCH 028/136] #25 Moved h5tobin --- .../scripts/h5tobin.py => src/processing_steps/400_h5tobin.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pre-cleanup-src/scripts/h5tobin.py => src/processing_steps/400_h5tobin.py (100%) diff --git a/pre-cleanup-src/scripts/h5tobin.py b/src/processing_steps/400_h5tobin.py similarity index 100% rename from pre-cleanup-src/scripts/h5tobin.py rename to src/processing_steps/400_h5tobin.py From 3510a35c3ac7f6709120da5d9ad65460f591a536 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Feb 2023 17:29:50 +0100 Subject: [PATCH 029/136] #25 Moved helper functions --- pre-cleanup-src/helper_functions.py | 138 -------------------------- src/lib/py/helpers.py | 145 ++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+), 138 deletions(-) delete mode 100644 pre-cleanup-src/helper_functions.py diff --git a/pre-cleanup-src/helper_functions.py b/pre-cleanup-src/helper_functions.py deleted file mode 100644 index 80d30c9..0000000 --- a/pre-cleanup-src/helper_functions.py +++ /dev/null @@ -1,138 +0,0 @@ -#!/usr/bin/env python3 -import os, h5py, pathlib, numpy as np, pybind_kernels.histograms as histograms, matplotlib.pyplot as plt, tqdm -from config.paths import hdf5_root, binary_root -from numpy import newaxis as NA - -def update_hdf5(filename,group_name,datasets={},attributes={},dimensions=None, - compression=None,chunk_shape=None): - - output_dir = os.path.dirname(filename) - pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) - f = h5py.File(filename,'a') - - if((group_name is not None) and (group_name != "/")): - g = f.require_group(group_name) - else: - g = f - - for k in datasets: - v = datasets[k] - if(k in g): del g[k] - g.create_dataset(k,shape=v.shape,dtype=v.dtype, - compression=compression, chunks=chunk_shape,maxshape=None) - g[k][:] = v[:] - - if dimensions is not None: - try: - dims = dimensions[k] - for i, description in enumerate(dims): - g[k].dims[i] = description - except: - pass - - for k in attributes: - v = attributes[k] - g.attrs[k] = v - - f.close() - - -#TODO: Use this for masks, no compression and no chunking default for small metadata datasets -def update_hdf5_mask(filename,group_name,datasets={},attributes={},dimensions=None, - compression="lzf",chunk_shape=(64,64,64)): - update_hdf5(filename,group_name,datasets,attributes,dimensions,compression,chunk_shape) - - -def h5meta_info_volume_matched(sample): - with h5py.File(f"{hdf5_root}/hdf5-byte/msb/{sample}.h5","r") as h5meta: - vm_shifts = h5meta["volume_matching_shifts"][:] - Nz, Ny, Nx = h5meta['voxels'].shape - Nz -= np.sum(vm_shifts) - - subvolume_dimensions = h5meta['subvolume_dimensions'][:] - subvolume_nzs = subvolume_dimensions[:,0] - np.append(vm_shifts,0) - voxel_size = h5meta["voxels"].attrs["voxelsize"] - - return ((Nz,Ny,Nx), subvolume_nzs, voxel_size) - -def block_info(h5meta_filename,block_size=0, n_blocks=0,z_offset=0): - print(f"Opening {h5meta_filename}") - with h5py.File(h5meta_filename, 'r') as h5meta: - vm_shifts = h5meta["volume_matching_shifts"][:] - Nz, Ny, Nx = h5meta['voxels'].shape - Nz -= np.sum(vm_shifts) - Nr = int(np.sqrt((Nx//2)**2 + (Ny//2)**2))+1 - - - subvolume_dimensions = h5meta['subvolume_dimensions'][:] - subvolume_nzs = subvolume_dimensions[:,0] - np.append(vm_shifts,0) - - if block_size == 0: - # If block_size is 0, let each block be exactly a full subvolume - blocks_are_subvolumes = True - - # Do either n_blocks subvolumes, or if n_blocks == 0: all remaining after offset - if n_blocks == 0: - n_blocks = len(subvolume_nzs)-z_offset - - else: - blocks_are_subvolumes = False - if n_blocks == 0: - n_blocks = Nz // block_size + (Nz % block_size > 0) - - - return {'dimensions':(Nz,Ny,Nx,Nr), - 'voxel_size':h5meta["voxels"].attrs["voxelsize"], - 'n_blocks': n_blocks, - 'block_size': block_size, - 'blocks_are_subvolumes': blocks_are_subvolumes, - 'subvolume_dimensions': subvolume_dimensions, - 'subvolume_nzs': subvolume_nzs, - 'subvolume_starts': np.concatenate([[0],np.cumsum(subvolume_nzs)[:-1]] - ) - } - - -def load_block(sample, offset, block_size, mask_name, mask_scale, field_names): - ''' - Loads a block of data from disk into memory. - ''' - Nfields = len(field_names) - - h5meta = h5py.File(f'{hdf5_root}/hdf5-byte/msb/{sample}.h5', 'r') - Nz, Ny, Nx = h5meta['voxels'].shape - Nz -= np.sum(h5meta["volume_matching_shifts"][:]) - h5meta.close() -# print(block_size,Nz,offset) - block_size = min(block_size, Nz-offset) - - voxels = np.zeros((block_size,Ny,Nx), dtype=np.uint16) - fields = np.zeros((Nfields,block_size//2,Ny//2,Nx//2), dtype=np.uint16) - - if mask_name is not None: - for i in tqdm.tqdm(range(1),f"Loading {mask_name} mask from {hdf5_root}/masks/{mask_scale}x/{sample}.h5", leave=True): - with h5py.File(f"{hdf5_root}/masks/{mask_scale}x/{sample}.h5","r") as h5mask: - mask = h5mask[mask_name]["mask"][offset//mask_scale:offset//mask_scale + block_size//mask_scale] - - #TODO: Make voxel & field scale command line parameters - for i in tqdm.tqdm(range(1),f"Loading {voxels.shape} voxels from {binary_root}/voxels/1x/{sample}.uint16", leave=True): - histograms.load_slice(voxels, f'{binary_root}/voxels/1x/{sample}.uint16', (offset, 0, 0), (Nz, Ny, Nx)) # TODO: Don't use 3 different methods for load/store - - for i in tqdm.tqdm(range(Nfields),f"Loading {binary_root}/fields/implant-{field_names}/2x/{sample}.npy",leave=True): - fi = np.load(f"{binary_root}/fields/implant-{field_names[i]}/2x/{sample}.npy", mmap_mode='r') - fields[i,:] = fi[offset//2:offset//2 + block_size//2] - - if mask_name is not None: - nz, ny, nx = (block_size//mask_scale), Ny//mask_scale, Nx//mask_scale - mask_1x = np.broadcast_to(mask[:,NA,:,NA,:,NA],(nz,mask_scale, ny,mask_scale, nx,mask_scale)) - mask_1x = mask_1x.reshape(nz*mask_scale,ny*mask_scale,nx*mask_scale) - voxels[:nz*mask_scale] *= mask_1x # block_size may not be divisible by mask_scale - voxels[nz*mask_scale:] *= mask_1x[-1][NA,...] # Remainder gets last line of mask - -# plt.imshow(voxels[:,voxels.shape[1]//2,:]); plt.show() -# plt.imshow(fields[0,:,fields[0].shape[1]//2,:]); plt.show() - return voxels, fields - -def row_normalize(A,r): - na = np.newaxis - return A/(r[:,na]+(r==0)[:,na]) diff --git a/src/lib/py/helpers.py b/src/lib/py/helpers.py index f76df16..b76ec6a 100644 --- a/src/lib/py/helpers.py +++ b/src/lib/py/helpers.py @@ -1,4 +1,13 @@ import sys +sys.path.append(sys.path[0]+"/../") + +from config.paths import binary_root, hdf5_root +import h5py +from lib.cpp.cpu.io import load_slice +import os +import numpy as np +import pathlib +import tqdm def commandline_args(defaults): keys = list(defaults.keys()) @@ -28,3 +37,139 @@ def commandline_args(defaults): args.append(type(default)(sys.argv[i+1])) return args + + +def update_hdf5(filename,group_name,datasets={},attributes={},dimensions=None, + compression=None,chunk_shape=None): + + output_dir = os.path.dirname(filename) + pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) + f = h5py.File(filename,'a') + + if((group_name is not None) and (group_name != "/")): + g = f.require_group(group_name) + else: + g = f + + for k in datasets: + v = datasets[k] + if(k in g): del g[k] + g.create_dataset(k,shape=v.shape,dtype=v.dtype, + compression=compression, chunks=chunk_shape,maxshape=None) + g[k][:] = v[:] + + if dimensions is not None: + try: + dims = dimensions[k] + for i, description in enumerate(dims): + g[k].dims[i] = description + except: + pass + + for k in attributes: + v = attributes[k] + g.attrs[k] = v + + f.close() + + +#TODO: Use this for masks, no compression and no chunking default for small metadata datasets +def update_hdf5_mask(filename,group_name,datasets={},attributes={},dimensions=None, + compression="lzf",chunk_shape=(64,64,64)): + update_hdf5(filename,group_name,datasets,attributes,dimensions,compression,chunk_shape) + + +def h5meta_info_volume_matched(sample): + with h5py.File(f"{hdf5_root}/hdf5-byte/msb/{sample}.h5","r") as h5meta: + vm_shifts = h5meta["volume_matching_shifts"][:] + Nz, Ny, Nx = h5meta['voxels'].shape + Nz -= np.sum(vm_shifts) + + subvolume_dimensions = h5meta['subvolume_dimensions'][:] + subvolume_nzs = subvolume_dimensions[:,0] - np.append(vm_shifts,0) + voxel_size = h5meta["voxels"].attrs["voxelsize"] + + return ((Nz,Ny,Nx), subvolume_nzs, voxel_size) + +def block_info(h5meta_filename,block_size=0, n_blocks=0,z_offset=0): + print(f"Opening {h5meta_filename}") + with h5py.File(h5meta_filename, 'r') as h5meta: + vm_shifts = h5meta["volume_matching_shifts"][:] + Nz, Ny, Nx = h5meta['voxels'].shape + Nz -= np.sum(vm_shifts) + Nr = int(np.sqrt((Nx//2)**2 + (Ny//2)**2))+1 + + + subvolume_dimensions = h5meta['subvolume_dimensions'][:] + subvolume_nzs = subvolume_dimensions[:,0] - np.append(vm_shifts,0) + + if block_size == 0: + # If block_size is 0, let each block be exactly a full subvolume + blocks_are_subvolumes = True + + # Do either n_blocks subvolumes, or if n_blocks == 0: all remaining after offset + if n_blocks == 0: + n_blocks = len(subvolume_nzs)-z_offset + + else: + blocks_are_subvolumes = False + if n_blocks == 0: + n_blocks = Nz // block_size + (Nz % block_size > 0) + + + return {'dimensions':(Nz,Ny,Nx,Nr), + 'voxel_size':h5meta["voxels"].attrs["voxelsize"], + 'n_blocks': n_blocks, + 'block_size': block_size, + 'blocks_are_subvolumes': blocks_are_subvolumes, + 'subvolume_dimensions': subvolume_dimensions, + 'subvolume_nzs': subvolume_nzs, + 'subvolume_starts': np.concatenate([[0],np.cumsum(subvolume_nzs)[:-1]] + ) + } + + +def load_block(sample, offset, block_size, mask_name, mask_scale, field_names): + ''' + Loads a block of data from disk into memory. + ''' + NA = np.newaxis + Nfields = len(field_names) + + h5meta = h5py.File(f'{hdf5_root}/hdf5-byte/msb/{sample}.h5', 'r') + Nz, Ny, Nx = h5meta['voxels'].shape + Nz -= np.sum(h5meta["volume_matching_shifts"][:]) + h5meta.close() +# print(block_size,Nz,offset) + block_size = min(block_size, Nz-offset) + + voxels = np.zeros((block_size,Ny,Nx), dtype=np.uint16) + fields = np.zeros((Nfields,block_size//2,Ny//2,Nx//2), dtype=np.uint16) + + if mask_name is not None: + for i in tqdm.tqdm(range(1),f"Loading {mask_name} mask from {hdf5_root}/masks/{mask_scale}x/{sample}.h5", leave=True): + with h5py.File(f"{hdf5_root}/masks/{mask_scale}x/{sample}.h5","r") as h5mask: + mask = h5mask[mask_name]["mask"][offset//mask_scale:offset//mask_scale + block_size//mask_scale] + + #TODO: Make voxel & field scale command line parameters + for i in tqdm.tqdm(range(1),f"Loading {voxels.shape} voxels from {binary_root}/voxels/1x/{sample}.uint16", leave=True): + load_slice(voxels, f'{binary_root}/voxels/1x/{sample}.uint16', (offset, 0, 0), (Nz, Ny, Nx)) # TODO: Don't use 3 different methods for load/store + + for i in tqdm.tqdm(range(Nfields),f"Loading {binary_root}/fields/implant-{field_names}/2x/{sample}.npy",leave=True): + fi = np.load(f"{binary_root}/fields/implant-{field_names[i]}/2x/{sample}.npy", mmap_mode='r') + fields[i,:] = fi[offset//2:offset//2 + block_size//2] + + if mask_name is not None: + nz, ny, nx = (block_size//mask_scale), Ny//mask_scale, Nx//mask_scale + mask_1x = np.broadcast_to(mask[:,NA,:,NA,:,NA],(nz,mask_scale, ny,mask_scale, nx,mask_scale)) + mask_1x = mask_1x.reshape(nz*mask_scale,ny*mask_scale,nx*mask_scale) + voxels[:nz*mask_scale] *= mask_1x # block_size may not be divisible by mask_scale + voxels[nz*mask_scale:] *= mask_1x[-1][NA,...] # Remainder gets last line of mask + +# plt.imshow(voxels[:,voxels.shape[1]//2,:]); plt.show() +# plt.imshow(fields[0,:,fields[0].shape[1]//2,:]); plt.show() + return voxels, fields + +def row_normalize(A,r): + na = np.newaxis + return A/(r[:,na]+(r==0)[:,na]) From 25d0427afa31b2dca78cea8fae61d764c89771ff Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Feb 2023 17:30:04 +0100 Subject: [PATCH 030/136] Updated h5tobin to use new structure --- src/processing_steps/400_h5tobin.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/processing_steps/400_h5tobin.py b/src/processing_steps/400_h5tobin.py index f9ad99f..0457f58 100755 --- a/src/processing_steps/400_h5tobin.py +++ b/src/processing_steps/400_h5tobin.py @@ -1,10 +1,10 @@ #!/usr/bin/env python3 import sys, pathlib, h5py, numpy as np sys.path.append(sys.path[0]+"/../") -import pybind_kernels.histograms as histograms -from config.paths import hdf5_root, binary_root, commandline_args +from config.paths import hdf5_root, binary_root from tqdm import tqdm -from helper_functions import update_hdf5 +from lib.cpp.cpu.io import write_slice +from lib.py.helpers import commandline_args, update_hdf5 slice_all = slice(None) @@ -65,7 +65,7 @@ def h5tobin(sample,region=(slice_all,slice_all,slice_all),shift_volume_match=1): subvolume_msb = dmsb[input_zstarts[i]:input_zends[i],y_range,x_range].astype(np.uint16) subvolume_lsb = dlsb[input_zstarts[i]:input_zends[i],y_range,x_range].astype(np.uint16) - histograms.write_slice((subvolume_msb << 8) | subvolume_lsb, output_zstarts[i]*Ny*Nx, outfile) + write_slice((subvolume_msb << 8) | subvolume_lsb, output_zstarts[i]*Ny*Nx, outfile) del subvolume_msb del subvolume_lsb From 578054527cd2ab52756c130ef3907649113993c0 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Feb 2023 17:31:43 +0100 Subject: [PATCH 031/136] #29 Added verbose to h5tobin --- src/processing_steps/400_h5tobin.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/processing_steps/400_h5tobin.py b/src/processing_steps/400_h5tobin.py index 0457f58..53b63ed 100755 --- a/src/processing_steps/400_h5tobin.py +++ b/src/processing_steps/400_h5tobin.py @@ -7,6 +7,7 @@ from lib.py.helpers import commandline_args, update_hdf5 slice_all = slice(None) +verbose = 1 def slice_length(s,n): start = s.start if s.start is not None else 0 @@ -37,23 +38,23 @@ def h5tobin(sample,region=(slice_all,slice_all,slice_all),shift_volume_match=1): input_zstarts = np.concatenate([[0], np.cumsum(Nzs[:-1])]).astype(int) input_zends = (np.cumsum(Nzs) - np.concatenate([vm_shifts,[0]])).astype(int) - print(f'HDF5 voxel data:') - print(f'subvolume_dims =\n{subvolume_dims}') - print(f'Nzs = {Nzs}') - print(f'vm_shifts = {vm_shifts}') - print(f'input_zstarts = {input_zstarts}') - print(f'input_zends = {input_zends}') + if verbose >= 1: print(f'HDF5 voxel data:') + if verbose >= 1: print(f'subvolume_dims =\n{subvolume_dims}') + if verbose >= 1: print(f'Nzs = {Nzs}') + if verbose >= 1: print(f'vm_shifts = {vm_shifts}') + if verbose >= 1: print(f'input_zstarts = {input_zstarts}') + if verbose >= 1: print(f'input_zends = {input_zends}') output_zstarts = np.concatenate([[0], np.cumsum(Nzs[:-1]) - np.cumsum(vm_shifts)]).astype(int) output_zends = np.concatenate([output_zstarts[1:], [output_zstarts[-1]+Nzs[-1]]]).astype(int) - print(f'output_zstarts = {output_zstarts}') - print(f'output_zends = {output_zends}') + if verbose >= 1: print(f'output_zstarts = {output_zstarts}') + if verbose >= 1: print(f'output_zends = {output_zends}') assert((input_zends - input_zstarts == output_zends - output_zstarts).all()) - print(f'Shape to extract:\n{region}') + if verbose >= 1: print(f'Shape to extract:\n{region}') nzs = input_zends - input_zstarts # Actual number of z-slices per subvolume after vm-correction - print(f"Volume matched subvolume nzs = {nzs}") + if verbose >= 1: print(f"Volume matched subvolume nzs = {nzs}") # TODO: z_range is ignored # TODO: Store metadata about region range in json # TODO: Come up with appropriate "file format" scheme @@ -81,9 +82,10 @@ def h5tobin(sample,region=(slice_all,slice_all,slice_all),shift_volume_match=1): if __name__ == "__main__": - sample, y_cutoff, shift_volume_match = commandline_args({"sample":"<required>", - "y_cutoff": 0, - "shift_volume_match":1}) + sample, y_cutoff, shift_volume_match, verbose = commandline_args({"sample" : "<required>", + "y_cutoff" : 0, + "shift_volume_match" : 1, + "verbose" : 1}) region = (slice_all,slice(y_cutoff,None), slice_all) h5tobin(sample,region,shift_volume_match) From b7fde73a72422f96c98686aa9a36c5aec47084ec Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Feb 2023 17:32:48 +0100 Subject: [PATCH 032/136] #25 Moved rescale_cupy_bin --- .../processing_steps/500_rescale_cupy_bin.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pre-cleanup-src/preprocess/rescale-cupy-bin.py => src/processing_steps/500_rescale_cupy_bin.py (100%) diff --git a/pre-cleanup-src/preprocess/rescale-cupy-bin.py b/src/processing_steps/500_rescale_cupy_bin.py similarity index 100% rename from pre-cleanup-src/preprocess/rescale-cupy-bin.py rename to src/processing_steps/500_rescale_cupy_bin.py From c42fc29b2fe41792ecd71795591ea84ef3d14b47 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Feb 2023 17:46:09 +0100 Subject: [PATCH 033/136] #25 Moved rescale_cupy_bin dependencies --- {pre-cleanup-src/preprocess => src/lib/py}/resample.py | 0 src/processing_steps/500_rescale_cupy_bin.py | 6 +++--- src/requirements.txt | 5 +++-- 3 files changed, 6 insertions(+), 5 deletions(-) rename {pre-cleanup-src/preprocess => src/lib/py}/resample.py (100%) diff --git a/pre-cleanup-src/preprocess/resample.py b/src/lib/py/resample.py similarity index 100% rename from pre-cleanup-src/preprocess/resample.py rename to src/lib/py/resample.py diff --git a/src/processing_steps/500_rescale_cupy_bin.py b/src/processing_steps/500_rescale_cupy_bin.py index 87257a4..6a40b0a 100644 --- a/src/processing_steps/500_rescale_cupy_bin.py +++ b/src/processing_steps/500_rescale_cupy_bin.py @@ -3,9 +3,9 @@ import numpy as np import cupy as cp #import numpy as cp -from resample import downsample2x, downsample3x -from config.paths import commandline_args, hdf5_root, binary_root -from pybind_kernels.histograms import load_slice, write_slice # Rename and place under io_modules +from lib.py.helpers import commandline_args +from lib.py.resample import downsample2x, downsample3x +from config.paths import hdf5_root, binary_root mempool = cp.get_default_memory_pool() pinned_mempool = cp.get_default_pinned_memory_pool() diff --git a/src/requirements.txt b/src/requirements.txt index 945718f..18bdb83 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -1,3 +1,4 @@ +cupy-cuda11x==11.5.0 fabric==3.0.0 -tqdm==4.64.1 -jax==0.4.3 \ No newline at end of file +jax==0.4.3 +tqdm==4.64.1 \ No newline at end of file From bf63dc0030dc037547c9884272f0dd56ead61eba Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Feb 2023 17:47:30 +0100 Subject: [PATCH 034/136] #29 added verbose to rescale_cupy_bin --- src/processing_steps/500_rescale_cupy_bin.py | 32 +++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/processing_steps/500_rescale_cupy_bin.py b/src/processing_steps/500_rescale_cupy_bin.py index 6a40b0a..f7d5f47 100644 --- a/src/processing_steps/500_rescale_cupy_bin.py +++ b/src/processing_steps/500_rescale_cupy_bin.py @@ -12,10 +12,12 @@ mempool.free_all_blocks() pinned_mempool.free_all_blocks() - if __name__ == "__main__": - sample, image, chunk_size, dtype = commandline_args({"sample":"<required>", "image": "voxels", - "chunk_size":32*2, "dtype":"uint16"}) + sample, image, chunk_size, dtype, verbose = commandline_args({"sample" : "<required>", + "image" : "voxels", + "chunk_size" : 32*2, + "dtype" : "uint16", + "verbose" : 1}) scales = [2,4,8,16,32]; # Can do 6, 9, 12, 24, 27, etc. as well, but we currently don't. See old rescaly-cupy.py T = np.dtype(dtype) @@ -23,10 +25,10 @@ input_meta = f'{hdf5_root}/hdf5-byte/msb/{sample}.h5' input_bin = f"{binary_root}/{image}/1x/{sample}.{dtype}" output_root = f"{binary_root}/{image}" - print(f"Generating power-of-twos rescalings for sample {sample}") - print(f"Input metadata from {input_meta}") - print(f"Input flat binary {dtype} data from {input_bin}") - print(f"Output flat binary {dtype} data to {output_root}/[1,2,4,8,16,32]x/{sample}.{dtype}") + if verbose >= 1: print(f"Generating power-of-twos rescalings for sample {sample}") + if verbose >= 1: print(f"Input metadata from {input_meta}") + if verbose >= 1: print(f"Input flat binary {dtype} data from {input_bin}") + if verbose >= 1: print(f"Output flat binary {dtype} data to {output_root}/[1,2,4,8,16,32]x/{sample}.{dtype}") meta_h5 = h5py.File(input_meta, 'r') full_Nz, Ny, Nx = meta_h5['voxels'].shape @@ -34,13 +36,13 @@ Nz = full_Nz - np.sum(shifts) meta_h5.close() - print(f"Downscaling from 1x {(Nz,Ny,Nx)} to 2x {(Nz//2,Ny//2,Nx//2)}") + if verbose >= 1: print(f"Downscaling from 1x {(Nz,Ny,Nx)} to 2x {(Nz//2,Ny//2,Nx//2)}") if(chunk_size % 32): - print(f"Chunk size {chunk_size} is invalid: must be divisible by 32.") + if verbose >= 1: print(f"Chunk size {chunk_size} is invalid: must be divisible by 32.") sys.exit(-1) -# print(f"Used GPU memory: {mempool.used_bytes()//1000000}MB out of {mempool.total_bytes()/1000000}MB. {pinned_mempool.n_free_blocks()} free pinned blocks.") +# if verbose >= 1: print(f"Used GPU memory: {mempool.used_bytes()//1000000}MB out of {mempool.total_bytes()/1000000}MB. {pinned_mempool.n_free_blocks()} free pinned blocks.") -# print(f"Used GPU memory: {mempool.used_bytes()//1000000}MB out of {mempool.total_bytes()/1000000}MB. {pinned_mempool.n_free_blocks()} free pinned blocks.") +# if verbose >= 1: print(f"Used GPU memory: {mempool.used_bytes()//1000000}MB out of {mempool.total_bytes()/1000000}MB. {pinned_mempool.n_free_blocks()} free pinned blocks.") # TODO: Just iterate now we do powers of two voxels2x = np.empty((Nz//2,Ny//2,Nx//2),dtype=T) @@ -61,10 +63,10 @@ try: voxels1x_chunk = cp.fromfile(input_bin, dtype=T, count=chunk_items, offset=z*Ny*Nx*T.itemsize).reshape(zend-z,Ny,Nx) except: - print(f"Read failed. chunk_items = {chunk_items} = {(zend-z)*Ny*Nx}, z = {z}, zend-z = {zend-z}") + if verbose >= 1: print(f"Read failed. chunk_items = {chunk_items} = {(zend-z)*Ny*Nx}, z = {z}, zend-z = {zend-z}") sys.exit(-1) -# print(f"Used GPU memory: {mempool.used_bytes()//1000000}MB out of {mempool.total_bytes()/1000000}MB. {pinned_mempool.n_free_blocks()} free pinned blocks.") +# if verbose >= 1: print(f"Used GPU memory: {mempool.used_bytes()//1000000}MB out of {mempool.total_bytes()/1000000}MB. {pinned_mempool.n_free_blocks()} free pinned blocks.") voxels2x_chunk = downsample2x(voxels1x_chunk) del voxels1x_chunk voxels4x_chunk = downsample2x(voxels2x_chunk) @@ -90,10 +92,10 @@ del voxels16x_chunk del voxels32x_chunk - print(f"Allocating {(Nz//2,Ny//2,Nx//2)}={Nz//2*Ny//2*Nx//2} {dtype} for voxels2x on GPU") + if verbose >= 1: print(f"Allocating {(Nz//2,Ny//2,Nx//2)}={Nz//2*Ny//2*Nx//2} {dtype} for voxels2x on GPU") for i in tqdm.tqdm(range(len(scales)),f"{sample}: Downscaling to all smaller scales: {scales[2:]}"): output_dir = f"{output_root}/{scales[i]}x/" pathlib.Path(f"{output_dir}").mkdir(parents=True, exist_ok=True) - print(f"Writing out scale {scales[i]}x {(voxels[i].shape)} to {output_dir}/{sample}.uint16") + if verbose >= 1: print(f"Writing out scale {scales[i]}x {(voxels[i].shape)} to {output_dir}/{sample}.uint16") voxels[i].tofile(f"{output_dir}/{sample}.uint16") From 3bcb916d7c7fab6f235debddc15acf1e7620ed7a Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 09:15:06 +0100 Subject: [PATCH 035/136] #25 Moved segment_implant_cc --- .../processing_steps/600_segment_implant_cc.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pre-cleanup-src/segmentation/segment-implant-cc.py => src/processing_steps/600_segment_implant_cc.py (100%) diff --git a/pre-cleanup-src/segmentation/segment-implant-cc.py b/src/processing_steps/600_segment_implant_cc.py similarity index 100% rename from pre-cleanup-src/segmentation/segment-implant-cc.py rename to src/processing_steps/600_segment_implant_cc.py From 1c794ef3ede80337b6577e24bc21aabe496a3630 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 09:17:56 +0100 Subject: [PATCH 036/136] #25 Fixed some of the dependencies of segment_implant_cc --- {pre-cleanup-src => src}/config/constants.py | 0 src/processing_steps/600_segment_implant_cc.py | 8 ++++---- 2 files changed, 4 insertions(+), 4 deletions(-) rename {pre-cleanup-src => src}/config/constants.py (100%) diff --git a/pre-cleanup-src/config/constants.py b/src/config/constants.py similarity index 100% rename from pre-cleanup-src/config/constants.py rename to src/config/constants.py diff --git a/src/processing_steps/600_segment_implant_cc.py b/src/processing_steps/600_segment_implant_cc.py index 47a9a29..df9d20d 100644 --- a/src/processing_steps/600_segment_implant_cc.py +++ b/src/processing_steps/600_segment_implant_cc.py @@ -1,10 +1,10 @@ import h5py, sys, os.path, pathlib, numpy as np, scipy.ndimage as ndi, tqdm, matplotlib.pyplot as plt sys.path.append(sys.path[0]+"/../") from config.constants import * -from config.paths import hdf5_root, binary_root, commandline_args -from pybind_kernels.geometry import center_of_mass, inertia_matrix, integrate_axes, sample_plane -from pybind_kernels.histograms import load_slice -from helper_functions import update_hdf5, update_hdf5_mask +from config.paths import hdf5_root, binary_root +from lib.py.helpers import commandline_args, update_hdf5, update_hdf5_mask +from lib.cpp.cpu_seq.geometry import center_of_mass, inertia_matrix, integrate_axes, sample_plane +from lib.cpp.cpu.io import load_slice NA = np.newaxis From 2183b6b53627df62991684416cbb627afe0e9020 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 09:18:43 +0100 Subject: [PATCH 037/136] #29 Added verbose to segment_implant_cc --- src/processing_steps/600_segment_implant_cc.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/processing_steps/600_segment_implant_cc.py b/src/processing_steps/600_segment_implant_cc.py index df9d20d..1f09060 100644 --- a/src/processing_steps/600_segment_implant_cc.py +++ b/src/processing_steps/600_segment_implant_cc.py @@ -8,7 +8,10 @@ NA = np.newaxis -sample, scale, chunk_size = commandline_args({"sample":"<required>","scale":8, "chunk_size":256}) +sample, scale, chunk_size, verbose = commandline_args({"sample" : "<required>", + "scale" : 8, + "chunk_size" : 256, + "verbose" : 1}) # Load metadata. TODO: Clean up, make automatic function. meta_filename = f"{hdf5_root}/hdf5-byte/msb/{sample}.h5" @@ -24,7 +27,7 @@ values = np.linspace(global_vmin,global_vmax,2**16) implant_threshold_u16 = np.argmin(np.abs(values-implant_threshold)) -print(f"Reading metadata from {meta_filename}.\n"+ +if verbose >= 1: print(f"Reading metadata from {meta_filename}.\n"+ f"volume_matching_shifts = {vm_shifts}\n"+ f"full_Nz,Ny,Nx = {full_Nz,Ny,Nx}\n"+ f"Nz = {Nz}\n"+ @@ -44,9 +47,9 @@ noisy_implant[z:z+chunk_length] = voxel_chunk[:chunk_length] >= implant_threshold_u16 -print(f"Computing connected components") +if verbose >= 1: print(f"Computing connected components") label, n_features = ndi.label(noisy_implant) -print(f"Counting component volumes") +if verbose >= 1: print(f"Counting component volumes") bincnts = np.bincount(label[label>0],minlength=n_features+1) largest_cc_ix = np.argmax(bincnts) @@ -54,7 +57,7 @@ output_dir = f"{hdf5_root}/masks/{scale}x/" pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) -print(f"Writing largest connected component to {output_dir}/{sample}.h5") +if verbose >= 1: print(f"Writing largest connected component to {output_dir}/{sample}.h5") update_hdf5_mask(f"{output_dir}/{sample}.h5", group_name="implant", From a2a4c264a01826407c3e07f21208aa6558df242a Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 09:20:00 +0100 Subject: [PATCH 038/136] #25 Moved implant_FoR --- .../implant-FoR.py => src/processing_steps/700_implant_FoR.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pre-cleanup-src/segmentation/implant-FoR.py => src/processing_steps/700_implant_FoR.py (100%) diff --git a/pre-cleanup-src/segmentation/implant-FoR.py b/src/processing_steps/700_implant_FoR.py similarity index 100% rename from pre-cleanup-src/segmentation/implant-FoR.py rename to src/processing_steps/700_implant_FoR.py From 556b3d68e17c9357e2863c42d6bdb8524368b330 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 09:24:27 +0100 Subject: [PATCH 039/136] #25 Fixed some of the dependencies of implant_FoR --- src/processing_steps/700_implant_FoR.py | 8 ++++---- src/requirements.txt | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/processing_steps/700_implant_FoR.py b/src/processing_steps/700_implant_FoR.py index 5df0d2b..21512be 100644 --- a/src/processing_steps/700_implant_FoR.py +++ b/src/processing_steps/700_implant_FoR.py @@ -1,14 +1,14 @@ import h5py, sys, os.path, pathlib, numpy as np, numpy.linalg as la, tqdm sys.path.append(sys.path[0]+"/../") from config.constants import * -from config.paths import hdf5_root, binary_root, commandline_args -from pybind_kernels.geometry import center_of_mass, inertia_matrix, integrate_axes, sample_plane -from pybind_kernels.histograms import load_slice, erode_3d_sphere_gpu as erode_3d, dilate_3d_sphere_gpu as dilate_3d +from config.paths import hdf5_root, binary_root +from lib.cpp.cpu_seq.geometry import center_of_mass, inertia_matrix, integrate_axes, sample_plane +from lib.cpp.gpu.morphology import erode_3d_sphere as erode_3d, dilate_3d_sphere as dilate_3d import matplotlib.pyplot as plt from matplotlib.colors import colorConverter import scipy as sp, scipy.ndimage as ndi, scipy.interpolate as interpolate, scipy.signal as signal import vedo, vedo.pointcloud as pc -from helper_functions import update_hdf5, update_hdf5_mask +from lib.py.helpers import update_hdf5, update_hdf5_mask, commandline_args from numpy import array, newaxis as NA # Hvor skal disse hen? diff --git a/src/requirements.txt b/src/requirements.txt index 18bdb83..98617bb 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -1,4 +1,5 @@ cupy-cuda11x==11.5.0 fabric==3.0.0 jax==0.4.3 -tqdm==4.64.1 \ No newline at end of file +tqdm==4.64.1 +vedo==2023.4.3 \ No newline at end of file From 2d9ad72840c2b68f2027bc7190795f88a50b5921 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 09:27:54 +0100 Subject: [PATCH 040/136] #29 Added verbose to implant_FoR --- src/processing_steps/700_implant_FoR.py | 70 +++++++++++++------------ 1 file changed, 37 insertions(+), 33 deletions(-) diff --git a/src/processing_steps/700_implant_FoR.py b/src/processing_steps/700_implant_FoR.py index 21512be..0ed5ae1 100644 --- a/src/processing_steps/700_implant_FoR.py +++ b/src/processing_steps/700_implant_FoR.py @@ -11,6 +11,8 @@ from lib.py.helpers import update_hdf5, update_hdf5_mask, commandline_args from numpy import array, newaxis as NA +verbose = 1 + # Hvor skal disse hen? def circle_center(p0,p1,p2): m1, m2 = (p0+p1)/2, (p0+p2)/2 # Midpoints @@ -56,12 +58,12 @@ def open_3d(image, r): def coordinate_image(shape): Nz,Ny,Nx = shape - print(f"Broadcasting coordinates for {shape} image") + if verbose >= 1: print(f"Broadcasting coordinates for {shape} image") zs, ys, xs = np.broadcast_to(np.arange(Nz)[:,NA,NA],shape),\ np.broadcast_to(np.arange(Ny)[NA,:,NA],shape),\ np.broadcast_to(np.arange(Nx)[NA,NA,:],shape); zyxs = np.stack([zs,ys,xs],axis=-1) - print(f"Done") + if verbose >= 1: print(f"Done") return zyxs @@ -105,7 +107,7 @@ def homogeneous_transform(xs, M): hxs[...,:3] = xs; hxs[..., 3] = 1 - print(hxs.shape, M.shape) + if verbose >= 1: print(hxs.shape, M.shape) return hxs @ M.T @@ -273,24 +275,26 @@ def figure_FoR_voxels(name,voxels,debug=True): if __name__ == "__main__": - sample, scale, debug = commandline_args({"sample":"<required>","scale":8,"debug":1}) + sample, scale, verbose = commandline_args({"sample" : "<required>", + "scale" : 8, + "verbose" : 1}) if(scale<8): - print(f"Selected scale is {scale}x: This should not be run at high resolution, use scale>=8.") + if verbose >= 1: print(f"Selected scale is {scale}x: This should not be run at high resolution, use scale>=8.") #sys.exit(-1) ## STEP 0: LOAD MASKS, VOXELS, AND METADATA image_output_dir = f"{hdf5_root}/processed/implant-FoR/{sample}/" - print(f"Storing all debug-images to {image_output_dir}") + if verbose >= 1: print(f"Storing all debug-images to {image_output_dir}") pathlib.Path(image_output_dir).mkdir(parents=True, exist_ok=True) - print(f"Loading {scale}x implant mask from {hdf5_root}/masks/{scale}x/{sample}.h5") + if verbose >= 1: print(f"Loading {scale}x implant mask from {hdf5_root}/masks/{scale}x/{sample}.h5") implant_file = h5py.File(f"{hdf5_root}/masks/{scale}x/{sample}.h5",'r') implant = implant_file["implant/mask"][:] voxel_size = implant_file["implant"].attrs["voxel_size"] implant_file.close() - print(f"Loading {scale}x voxels from {binary_root}/voxels/{scale}x/{sample}.uint16") + if verbose >= 1: print(f"Loading {scale}x voxels from {binary_root}/voxels/{scale}x/{sample}.uint16") voxels = np.fromfile(f"{binary_root}/voxels/{scale}x/{sample}.uint16",dtype=np.uint16).reshape(implant.shape) nz,ny,nx = implant.shape @@ -298,7 +302,7 @@ def figure_FoR_voxels(name,voxels,debug=True): ### STEP 1: COMPUTE IMPLANT PRINCIPAL AXES FRAME OF REFERENCE ## STEP1A: DIAGONALIZE MOMENT OF INTERTIA MATRIX TO GET PRINCIPAL AXES cm = np.array(center_of_mass(implant)) # in downsampled-voxel index coordinates - print(f"Center of mass is: {cm}") + if verbose >= 1: print(f"Center of mass is: {cm}") IM = np.array(inertia_matrix(implant,cm)).reshape(3,3) ls,E = la.eigh(IM) @@ -320,7 +324,7 @@ def figure_FoR_voxels(name,voxels,debug=True): UVW = E.T u_vec,v_vec,w_vec = UVW - figure_FoR_UVW(debug) + figure_FoR_UVW(verbose >= 2) ### STEP 2: COMPUTE PHANTOM SCREW GEOMETRY # @@ -397,7 +401,7 @@ def UVW2xyz(p): implant_length_voxels = implant_length/voxel_size implant_radius_voxels = implant_radius/voxel_size - figure_FoR_cylinder(debug) + figure_FoR_cylinder(verbose >= 2) ### 3: In the cylinder coordinates, find radii and angle ranges to fill in the "holes" in the implant and make it solid ### (More robust than closing operations, as we don't want to effect the screw threads). @@ -412,7 +416,7 @@ def UVW2xyz(p): #TODO: Local circle figure (instead of showing global fit on local slice, which isn't snug) bbox_uvwp = [Up_min,Up_max,Vp_min,Vp_max,Wp_min,Wp_max] - figure_FoR_circle("prime-circle",Cp*voxel_size,v_vec,w_vec,implant_radius,bbox_uvwp,debug) + figure_FoR_circle("prime-circle",Cp*voxel_size,v_vec,w_vec,implant_radius,bbox_uvwp,verbose >= 2) ## 3B: Profile of radii and angles implant_thetas = np.arctan2(implant_Vps,implant_Wps) @@ -453,8 +457,8 @@ def UVW2xyz(p): solid_implant_UVWps = ((((np.array(np.nonzero(solid_quarter)).T - cm) @ E) - w0v)*voxel_size - cp) @ UVWp Up_integrals, Up_bins = np.histogram(solid_implant_UVWps[:,0],200) - figure_FoR_profiles(debug) - figure_FoR_voxels("solid_implant",solid_implant,debug) + figure_FoR_profiles(verbose >= 2) + figure_FoR_voxels("solid_implant",solid_implant,verbose >= 2) back_mask = (Ws<0) front_mask = largest_cc_of((Ws>50)*(~solid_implant))#*(thetas>=theta_from)*(thetas<=theta_to) @@ -462,26 +466,26 @@ def UVW2xyz(p): # back_part = voxels*back_mask front_part = voxels*front_mask - figure_FoR_voxels("back_part", voxels*back_mask, debug) - figure_FoR_voxels("front_part",voxels*front_mask, debug) + figure_FoR_voxels("back_part", voxels*back_mask, verbose >= 2) + figure_FoR_voxels("front_part",voxels*front_mask, verbose >= 2) Cp_zyx = Cp[::-1]*voxel_size Muvwp = zyx_to_UVWp_transform() - print(f"MUvpw = {np.round(Muvwp,2)}") - print(f"UVW = {np.round(UVW,2)}") - print(f"UVWp = {np.round(UVWp,2)}") - print(f"Cp = {np.round(Cp_zyx,2)}") - print(f"cp = {np.round(cp,2)}") - print(f"cm = {np.round(cm,2)}") + if verbose >= 1: print(f"MUvpw = {np.round(Muvwp,2)}") + if verbose >= 1: print(f"UVW = {np.round(UVW,2)}") + if verbose >= 1: print(f"UVWp = {np.round(UVWp,2)}") + if verbose >= 1: print(f"Cp = {np.round(Cp_zyx,2)}") + if verbose >= 1: print(f"cp = {np.round(cp,2)}") + if verbose >= 1: print(f"cm = {np.round(cm,2)}") - figure_FoR_UVWp(debug) + figure_FoR_UVWp(verbose >= 2) - print(f"Physical Cp = {Cp[::-1]*voxel_size}") + if verbose >= 1: print(f"Physical Cp = {Cp[::-1]*voxel_size}") output_dir = f"{hdf5_root}/hdf5-byte/msb/" - print(f"Writing frame-of-reference metadata to {output_dir}/{sample}.h5") + if verbose >= 1: print(f"Writing frame-of-reference metadata to {output_dir}/{sample}.h5") update_hdf5(f"{output_dir}/{sample}.h5", group_name="implant-FoR", datasets={"UVW":UVW, @@ -514,32 +518,32 @@ def UVW2xyz(p): output_dir = f"{hdf5_root}/masks/{scale}x/" pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) - print(f"Saving implant_solid mask to {output_dir}/{sample}.h5") + if verbose >= 1: print(f"Saving implant_solid mask to {output_dir}/{sample}.h5") update_hdf5_mask(f"{output_dir}/{sample}.h5", group_name="implant_solid", datasets={"mask":solid_implant}, attributes={"sample":sample,"scale":scale,"voxel_size":voxel_size}) - print(f"Saving implant_shell mask to {output_dir}/{sample}.h5") + if verbose >= 1: print(f"Saving implant_shell mask to {output_dir}/{sample}.h5") update_hdf5_mask(f"{output_dir}/{sample}.h5", group_name="implant_shell", datasets={"mask":implant_shell_mask}, attributes={"sample":sample,"scale":scale,"voxel_size":voxel_size}) - print(f"Saving cut_cylinder_air mask to {output_dir}/{sample}.h5") + if verbose >= 1: print(f"Saving cut_cylinder_air mask to {output_dir}/{sample}.h5") update_hdf5_mask(f"{output_dir}/{sample}.h5", group_name="cut_cylinder_air", datasets={"mask":back_mask}, attributes={"sample":sample,"scale":scale,"voxel_size":voxel_size}) - print(f"Saving cut_cylinder_bone mask to {output_dir}/{sample}.h5") + if verbose >= 1: print(f"Saving cut_cylinder_bone mask to {output_dir}/{sample}.h5") update_hdf5_mask(f"{output_dir}/{sample}.h5", group_name="cut_cylinder_bone", datasets={"mask":front_mask}, attributes={"sample":sample, "scale":scale, "voxel_size":voxel_size}) - print(f"Computing bone region") + if verbose >= 1: print(f"Computing bone region") hist, bins = np.histogram(front_part, 256) hist[0] = 0 peaks, info = signal.find_peaks(hist,height=0.5*hist.max()) @@ -547,7 +551,7 @@ def UVW2xyz(p): try: p1, p2 = peaks[np.argsort(info['peak_heights'])[:2]] midpoint = int(round((bins[p1]+bins[p2+1])/2)) # p1 is left-edge of p1-bin, p2+1 is right edge of p2-bin - print(f"p1, p2 = ({p1,bins[p1]}), ({p2,bins[p2]}); midpoint = {midpoint}") + if verbose >= 1: print(f"p1, p2 = ({p1,bins[p1]}), ({p2,bins[p2]}); midpoint = {midpoint}") bone_mask1 = front_part > midpoint closing_diameter, opening_diameter = 400, 300 # micrometers @@ -564,10 +568,10 @@ def UVW2xyz(p): bone_region_mask = largest_cc_of(bone_region_mask) except: - print(f"Wasnt able to separate into resin and bone region. Assuming all is bone region.") + if verbose >= 1: print(f"Wasnt able to separate into resin and bone region. Assuming all is bone region.") bone_region_mask = front_mask - print(f"Saving bone_region mask to {output_dir}/{sample}.h5") + if verbose >= 1: print(f"Saving bone_region mask to {output_dir}/{sample}.h5") update_hdf5_mask(f"{output_dir}/{sample}.h5", group_name="bone_region", datasets={"mask":bone_region_mask}, From 9612a357210f2cbd50fc68cfed989df112bf5776 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 09:29:46 +0100 Subject: [PATCH 041/136] #25 Moved implant_data --- .../implant-data.py => src/processing_steps/800_implant_data.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pre-cleanup-src/segmentation/implant-data.py => src/processing_steps/800_implant_data.py (100%) diff --git a/pre-cleanup-src/segmentation/implant-data.py b/src/processing_steps/800_implant_data.py similarity index 100% rename from pre-cleanup-src/segmentation/implant-data.py rename to src/processing_steps/800_implant_data.py From 20f2c2f292555ce66d7ac98443e09bca2603808f Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 09:31:20 +0100 Subject: [PATCH 042/136] #25 Fixed dependencies of implant_data --- src/processing_steps/800_implant_data.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/processing_steps/800_implant_data.py b/src/processing_steps/800_implant_data.py index 5c0ac76..a15c8b4 100644 --- a/src/processing_steps/800_implant_data.py +++ b/src/processing_steps/800_implant_data.py @@ -1,13 +1,12 @@ import h5py, sys, os.path, pathlib, numpy as np, numpy.linalg as la, tqdm sys.path.append(sys.path[0]+"/../") from config.constants import * -from config.paths import hdf5_root, binary_root, commandline_args -from pybind_kernels.geometry import center_of_mass, inertia_matrix, integrate_axes, fill_implant_mask, compute_front_mask -from pybind_kernels.histograms import load_slice, erode_3d_sphere_gpu as erode_3d, dilate_3d_sphere_gpu as dilate_3d +from config.paths import hdf5_root, binary_root +from lib.cpp.cpu_seq.geometry import fill_implant_mask, compute_front_mask import matplotlib.pyplot as plt import scipy as sp, scipy.ndimage as ndi, scipy.interpolate as interpolate, scipy.signal as signal import vedo, vedo.pointcloud as pc -from helper_functions import * +from lib.py.helpers import commandline_args, update_hdf5, update_hdf5_mask from numpy import array, newaxis as NA From d31572f357741038c214770c51c14b1ea7392033 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 09:32:55 +0100 Subject: [PATCH 043/136] #29 Added verbose to implant_data --- src/processing_steps/800_implant_data.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/processing_steps/800_implant_data.py b/src/processing_steps/800_implant_data.py index a15c8b4..cff7989 100644 --- a/src/processing_steps/800_implant_data.py +++ b/src/processing_steps/800_implant_data.py @@ -10,9 +10,11 @@ from numpy import array, newaxis as NA -sample, scale = commandline_args({"sample":"<required>","scale":2}) +sample, scale, verbose = commandline_args({"sample" : "<required>", + "scale" : 2, + "verbose" : 1}) -print(f"Loading principal axis and cylinder frame-of-references") +if verbose >= 1: print(f"Loading principal axis and cylinder frame-of-references") h5meta = h5py.File(f"{hdf5_root}/hdf5-byte/msb/{sample}.h5","r") try: h5g = h5meta["implant-FoR"] @@ -26,7 +28,7 @@ print(f"Make sure you have run implant-FoR.py for {sample} at scale {scale}x") sys.exit(-1) -print(f"Loading {scale}x implant mask from {hdf5_root}/masks/{scale}x/{sample}.h5") +if verbose >= 1: print(f"Loading {scale}x implant mask from {hdf5_root}/masks/{scale}x/{sample}.h5") try: implant_file = h5py.File(f"{hdf5_root}/masks/{scale}x/{sample}.h5",'r') implant = implant_file["implant/mask"][:] @@ -49,7 +51,7 @@ bbox_flat = tuple(bbox.flatten()) Muvwp_flat = tuple(Muvwp.flatten()) -print(f"Filling implant mask") +if verbose >= 1: print(f"Filling implant mask") fill_implant_mask(implant.astype(np.uint8,copy=False), voxel_size,bbox_flat, rsqr_fraction, Muvwp_flat, From 0ef3d3f9fd7d3a2285dac4d44d9eadc046c94b70 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 09:35:57 +0100 Subject: [PATCH 044/136] #25 Moved generate_gauss_c and fixed dependencies --- .../processing_steps/900_generate_gauss_c.py | 7 ++++--- src/requirements.txt | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) rename pre-cleanup-src/generate_gauss_c.py => src/processing_steps/900_generate_gauss_c.py (96%) diff --git a/pre-cleanup-src/generate_gauss_c.py b/src/processing_steps/900_generate_gauss_c.py similarity index 96% rename from pre-cleanup-src/generate_gauss_c.py rename to src/processing_steps/900_generate_gauss_c.py index da3bdbb..663eccf 100644 --- a/pre-cleanup-src/generate_gauss_c.py +++ b/src/processing_steps/900_generate_gauss_c.py @@ -7,8 +7,9 @@ from math import pi, sqrt, exp from scipy import ndimage as ndi -from config.paths import hdf5_root, binary_root, commandline_args -import pybind_kernels.histograms as histograms +from config.paths import hdf5_root, binary_root +from lib.py.helpers import commandline_args +from lib.cpp.cpu_seq import gauss_filter NA = np.newaxis impl_type = np.float32 @@ -52,7 +53,7 @@ def toint(arr, dtype=np.uint8): start = timeit.default_timer() print(f"Repeated Gauss blurs ({reps} iterations, sigma_voxels={sigma_voxels}, kernel length={radius} coefficients)") - histograms.gauss_filter_par_cpu(implant_mask, implant_mask.shape, kernel, reps, result) + gauss_filter(implant_mask, implant_mask.shape, kernel, reps, result) if verify: print (f'Parallel C edition took {timeit.default_timer() - start} seconds') diff --git a/src/requirements.txt b/src/requirements.txt index 98617bb..5125944 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -1,4 +1,5 @@ cupy-cuda11x==11.5.0 +edt==2.3.0 fabric==3.0.0 jax==0.4.3 tqdm==4.64.1 From 001fd25664f66c7bff28d2d00c4b9f8a0b66bea7 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 09:38:17 +0100 Subject: [PATCH 045/136] #29 Added verbose to generate_gauss_c --- src/processing_steps/900_generate_gauss_c.py | 39 +++++++++++--------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/src/processing_steps/900_generate_gauss_c.py b/src/processing_steps/900_generate_gauss_c.py index 663eccf..ce827f6 100644 --- a/src/processing_steps/900_generate_gauss_c.py +++ b/src/processing_steps/900_generate_gauss_c.py @@ -21,25 +21,30 @@ def toint(arr, dtype=np.uint8): # sigma is given in physical units, i.e. in micrometers, in order to give scale-invariant results. if __name__ == '__main__': - sample, sigma, reps, scale, voxel_size_1x, verify, debug = commandline_args({"sample":"<required>","sigma":40.0,"repititions":10,"scale":2, - "voxel_size_1x":1.85, "verify_against_ndimage":False, "debug_images":True}) - print(f"Diffusion approximation by repeated Gaussian blurs.\n") + sample, sigma, reps, scale, voxel_size_1x, verify, verbose = commandline_args({"sample":"<required>", + "sigma":40.0, + "repititions":10, + "scale":2, + "voxel_size_1x":1.85, + "verify_against_ndimage":False, + "verbose":1}) + if verbose >= 1: print(f"Diffusion approximation by repeated Gaussian blurs.\n") voxel_size = voxel_size_1x*scale sigma_voxels = sigma/voxel_size - print(f"At scale {scale}x, voxel size is {voxel_size} micrometers.") - print(f"Using sigma={sigma} micrometers, sigma_voxels={sigma_voxels}.") + if verbose >= 1: print(f"At scale {scale}x, voxel size is {voxel_size} micrometers.") + if verbose >= 1: print(f"Using sigma={sigma} micrometers, sigma_voxels={sigma_voxels}.") output_dir = f"{binary_root}/fields/implant-gauss/{scale}x" pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) - print(f"Loading implant_solid mask from {hdf5_root}/masks/{scale}x/{sample}.h5") + if verbose >= 1: print(f"Loading implant_solid mask from {hdf5_root}/masks/{scale}x/{sample}.h5") with h5py.File(f"{hdf5_root}/masks/{scale}x/{sample}.h5","r") as f: implant_mask = f['implant_solid/mask'][:] nz,ny,nx = implant_mask.shape - print(f"Implant mask has shape {implant_mask.shape}") + if verbose >= 1: print(f"Implant mask has shape {implant_mask.shape}") - if debug: + if verbose >= 2: print(f"Writing PNGs of implant mask slices to {output_dir}") Image.fromarray(toint(implant_mask[:,:,nx//2].astype(impl_type))).save(f"{output_dir}/{sample}-mask-yz.png") Image.fromarray(toint(implant_mask[:,ny//2,:].astype(impl_type))).save(f"{output_dir}/{sample}-mask-xz.png") @@ -52,20 +57,20 @@ def toint(arr, dtype=np.uint8): if verify: start = timeit.default_timer() - print(f"Repeated Gauss blurs ({reps} iterations, sigma_voxels={sigma_voxels}, kernel length={radius} coefficients)") + if verbose >= 1: print(f"Repeated Gauss blurs ({reps} iterations, sigma_voxels={sigma_voxels}, kernel length={radius} coefficients)") gauss_filter(implant_mask, implant_mask.shape, kernel, reps, result) if verify: - print (f'Parallel C edition took {timeit.default_timer() - start} seconds') + if verbose >= 1: print (f'Parallel C edition took {timeit.default_timer() - start} seconds') xs = np.linspace(-1,1,nx) rs = np.sqrt(xs[NA,NA,:]**2 + xs[NA,:,NA]**2) cylinder_mask = (rs<=1) - print(f"Writing diffusion-field to {output_dir}/{sample}.npy") + if verbose >= 1: print(f"Writing diffusion-field to {output_dir}/{sample}.npy") np.save(f'{output_dir}/{sample}.npy', toint(result*cylinder_mask,np.uint16)*cylinder_mask) - if debug: + if verbose >= 2: print(f"Debug: Writing PNGs of result slices to {output_dir}") Image.fromarray(toint(result[nz//2,:,:])).save(f'{output_dir}/{sample}-gauss-xy.png') Image.fromarray(toint(result[:,ny//2,:])).save(f'{output_dir}/{sample}-gauss-xz.png') @@ -82,7 +87,7 @@ def toint(arr, dtype=np.uint8): control[implant_mask] = 1 print (f'ndimage edition took {timeit.default_timer() - start} seconds') np.save(f'{output_dir}/{sample}_ndimage.npy',control) - if debug: + if verbose >= 2: Image.fromarray(toint(control[nz//2,:,:])).save(f'{output_dir}/{sample}-control-xy.png') Image.fromarray(toint(control[:,ny//2,:])).save(f'{output_dir}/{sample}-control-xz.png') Image.fromarray(toint(control[:,:,nx//2])).save(f'{output_dir}/{sample}-control-yz.png') @@ -103,21 +108,21 @@ def toint(arr, dtype=np.uint8): plt.savefig(f'{output_dir}/{sample}-diff-{name}.png') - print(f"Computing Euclidean distance transform.") + if verbose >= 1: print(f"Computing Euclidean distance transform.") fedt = edt.edt(~implant_mask,parallel=16) del implant_mask edt_output_dir = f"{binary_root}/fields/implant-edt/{scale}x" pathlib.Path(edt_output_dir).mkdir(parents=True, exist_ok=True) - print(f"Writing EDT-field to {edt_output_dir}/{sample}.npy") + if verbose >= 1: print(f"Writing EDT-field to {edt_output_dir}/{sample}.npy") np.save(f'{edt_output_dir}/{sample}.npy', toint(fedt*cylinder_mask,np.uint16)*cylinder_mask) mixed_output_dir = f"{binary_root}/fields/implant-gauss+edt/{scale}x" - print(f"Writing combined field to {mixed_output_dir}/{sample}.npy") + if verbose >= 1: print(f"Writing combined field to {mixed_output_dir}/{sample}.npy") pathlib.Path(mixed_output_dir).mkdir(parents=True, exist_ok=True) result = (result-fedt/(fedt.max()))*cylinder_mask result -= result.min() result /= result.max() - print(f"Result (min,max) = ({result.min(),result.max()})") + if verbose >= 1: print(f"Result (min,max) = ({result.min(),result.max()})") np.save(f'{mixed_output_dir}/{sample}.npy', toint(result*cylinder_mask,np.uint16)*cylinder_mask) From 0ab97a2f79ff9616af6b8d77788e7efb6a91eb35 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 09:40:34 +0100 Subject: [PATCH 046/136] #25 Made room for init.d-like naming scheme --- .../{100_cache_esrf2013.py => 0100_cache_esrf2013.py} | 0 .../{200_generate_byte_hdf5.py => 0200_generate_byte_hdf5.py} | 0 .../{300_volume_matcher.py => 0300_volume_matcher.py} | 0 src/processing_steps/{400_h5tobin.py => 0400_h5tobin.py} | 0 .../{500_rescale_cupy_bin.py => 0500_rescale_cupy_bin.py} | 0 .../{600_segment_implant_cc.py => 0600_segment_implant_cc.py} | 0 src/processing_steps/{700_implant_FoR.py => 0700_implant_FoR.py} | 0 .../{800_implant_data.py => 0800_implant_data.py} | 0 .../{900_generate_gauss_c.py => 0900_generate_gauss_c.py} | 0 9 files changed, 0 insertions(+), 0 deletions(-) rename src/processing_steps/{100_cache_esrf2013.py => 0100_cache_esrf2013.py} (100%) rename src/processing_steps/{200_generate_byte_hdf5.py => 0200_generate_byte_hdf5.py} (100%) rename src/processing_steps/{300_volume_matcher.py => 0300_volume_matcher.py} (100%) rename src/processing_steps/{400_h5tobin.py => 0400_h5tobin.py} (100%) rename src/processing_steps/{500_rescale_cupy_bin.py => 0500_rescale_cupy_bin.py} (100%) rename src/processing_steps/{600_segment_implant_cc.py => 0600_segment_implant_cc.py} (100%) rename src/processing_steps/{700_implant_FoR.py => 0700_implant_FoR.py} (100%) rename src/processing_steps/{800_implant_data.py => 0800_implant_data.py} (100%) rename src/processing_steps/{900_generate_gauss_c.py => 0900_generate_gauss_c.py} (100%) diff --git a/src/processing_steps/100_cache_esrf2013.py b/src/processing_steps/0100_cache_esrf2013.py similarity index 100% rename from src/processing_steps/100_cache_esrf2013.py rename to src/processing_steps/0100_cache_esrf2013.py diff --git a/src/processing_steps/200_generate_byte_hdf5.py b/src/processing_steps/0200_generate_byte_hdf5.py similarity index 100% rename from src/processing_steps/200_generate_byte_hdf5.py rename to src/processing_steps/0200_generate_byte_hdf5.py diff --git a/src/processing_steps/300_volume_matcher.py b/src/processing_steps/0300_volume_matcher.py similarity index 100% rename from src/processing_steps/300_volume_matcher.py rename to src/processing_steps/0300_volume_matcher.py diff --git a/src/processing_steps/400_h5tobin.py b/src/processing_steps/0400_h5tobin.py similarity index 100% rename from src/processing_steps/400_h5tobin.py rename to src/processing_steps/0400_h5tobin.py diff --git a/src/processing_steps/500_rescale_cupy_bin.py b/src/processing_steps/0500_rescale_cupy_bin.py similarity index 100% rename from src/processing_steps/500_rescale_cupy_bin.py rename to src/processing_steps/0500_rescale_cupy_bin.py diff --git a/src/processing_steps/600_segment_implant_cc.py b/src/processing_steps/0600_segment_implant_cc.py similarity index 100% rename from src/processing_steps/600_segment_implant_cc.py rename to src/processing_steps/0600_segment_implant_cc.py diff --git a/src/processing_steps/700_implant_FoR.py b/src/processing_steps/0700_implant_FoR.py similarity index 100% rename from src/processing_steps/700_implant_FoR.py rename to src/processing_steps/0700_implant_FoR.py diff --git a/src/processing_steps/800_implant_data.py b/src/processing_steps/0800_implant_data.py similarity index 100% rename from src/processing_steps/800_implant_data.py rename to src/processing_steps/0800_implant_data.py diff --git a/src/processing_steps/900_generate_gauss_c.py b/src/processing_steps/0900_generate_gauss_c.py similarity index 100% rename from src/processing_steps/900_generate_gauss_c.py rename to src/processing_steps/0900_generate_gauss_c.py From 3fe7dfd6776bded1056b26187d3143fbca873394 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 09:53:41 +0100 Subject: [PATCH 047/136] #25 Moved compute_histograms --- .../processing_steps/1000_compute_histograms.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pre-cleanup-src/histogram_processing/compute_histograms.py => src/processing_steps/1000_compute_histograms.py (100%) diff --git a/pre-cleanup-src/histogram_processing/compute_histograms.py b/src/processing_steps/1000_compute_histograms.py similarity index 100% rename from pre-cleanup-src/histogram_processing/compute_histograms.py rename to src/processing_steps/1000_compute_histograms.py From 6f2db2a47921ffbcb626239d2a1b15be39e4ddfb Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 10:08:00 +0100 Subject: [PATCH 048/136] #25 Preliminary fix to compute_histograms dependencies --- .../1000_compute_histograms.py | 53 ++++++++++++------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/src/processing_steps/1000_compute_histograms.py b/src/processing_steps/1000_compute_histograms.py index 75f8490..529ad79 100755 --- a/src/processing_steps/1000_compute_histograms.py +++ b/src/processing_steps/1000_compute_histograms.py @@ -1,21 +1,26 @@ #!/usr/bin/env python3 import os, sys, pathlib, copy, scipy.ndimage as ndi sys.path.append(sys.path[0]+"/../") -import pybind_kernels.histograms as histograms +# TODO Move benchmarking out of this script. +from lib.cpp.cpu_seq.histograms import axis_histogram as axis_histogram_seq_cpu, field_histogram as field_histogram_seq_cpu +from lib.cpp.cpu.histograms import axis_histogram as axis_histogram_par_cpu, field_histogram as field_histogram_par_cpu, field_histogram_resample +from lib.cpp.gpu.histograms import axis_histogram as axis_histogram_par_gpu, field_histogram as field_histogram_par_gpu +from lib.cpp.cpu_seq.histograms import masked_minmax # TODO is it histogram specific? import numpy as np, h5py, timeit from datetime import datetime from PIL import Image from tqdm import tqdm from config.paths import * from config.constants import implant_threshold -from helper_functions import block_info, load_block +from lib.py.helpers import block_info, load_block, commandline_args NA = np.newaxis +verbose = 1 # TODO: Currently specialized to uint16_t -masked_minmax = histograms.masked_minmax +#masked_minmax = histograms.masked_minmax -def axes_histogram(voxels, func=histograms.axis_histogram_seq_cpu, ranges=None, voxel_bins=256): +def axes_histogram(voxels, func=axis_histogram_seq_cpu, ranges=None, voxel_bins=256): (Nz,Ny,Nx) = voxels.shape Nr = int(np.sqrt((Nx//2)**2 + (Ny//2)**2))+1 @@ -28,25 +33,25 @@ def axes_histogram(voxels, func=histograms.axis_histogram_seq_cpu, ranges=None, vmin, vmax = masked_minmax(voxels) else: vmin, vmax = ranges - print ("Entering call", datetime.now()) - func(voxels, x_bins, y_bins, z_bins, r_bins, vmin, vmax, True) - print ("Exited call", datetime.now()) + if verbose >= 1: print ("Entering call", datetime.now()) + func(voxels, x_bins, y_bins, z_bins, r_bins, vmin, vmax, verbose >= 1) + if verbose >= 1: print ("Exited call", datetime.now()) return x_bins, y_bins, z_bins, r_bins def field_histogram(voxels, field, field_bins, voxel_bins, ranges): bins = np.zeros((field_bins, voxel_bins), dtype=np.uint64) vmin, vmax = ranges # python3 histograms_tester.py 770c_pag 1849.98s user 170.42s system 512% cpu 6:33.95 total - histograms.field_histogram_par_cpu(voxels, field, bins, vmin, vmax) + field_histogram_par_cpu(voxels, field, bins, vmin, vmax) # python3 histograms_tester.py 770c_pag 1095.49s user 141.76s system 104% cpu 19:44.64 total - #histograms.field_histogram_seq_cpu(voxels, field, bins, vmin, vmax) + #field_histogram_seq_cpu(voxels, field, bins, vmin, vmax) return bins def verify_axes_histogram(voxels, ranges=(1,4095), voxel_bins=256): tolerance = 1e-5 - schx, schy, schz, schr = axes_histogram(voxels, func=histograms.axis_histogram_seq_cpu, ranges=ranges, voxel_bins=voxel_bins) - pchx, pchy, pchz, pchr = axes_histogram(voxels, func=histograms.axis_histogram_par_cpu, ranges=ranges, voxel_bins=voxel_bins) + schx, schy, schz, schr = axes_histogram(voxels, func=axis_histogram_seq_cpu, ranges=ranges, voxel_bins=voxel_bins) + pchx, pchy, pchz, pchr = axes_histogram(voxels, func=axis_histogram_par_cpu, ranges=ranges, voxel_bins=voxel_bins) dx = np.abs(schx - pchx).sum() dy = np.abs(schy - pchy).sum() @@ -62,7 +67,7 @@ def verify_axes_histogram(voxels, ranges=(1,4095), voxel_bins=256): print (f'diff z = {dz}') print (f'diff r = {dr}') - pghx, pghy, pghz, pghr = axes_histogram(voxels, func=histograms.axis_histogram_par_gpu, ranges=ranges, voxel_bins=voxel_bins) + pghx, pghy, pghz, pghr = axes_histogram(voxels, func=axis_histogram_par_gpu, ranges=ranges, voxel_bins=voxel_bins) dx = np.abs(schx - pghx).sum() dy = np.abs(schy - pghy).sum() @@ -87,9 +92,9 @@ def benchmark_axes_histograms(voxels, ranges=(1,4095), voxel_bins=256, runs=10): print() print('----- Benchmarking -----') print() - seq_cpu = timeit.timeit(lambda: axes_histogram(voxels, func=histograms.axis_histogram_seq_cpu, ranges=ranges, voxel_bins=voxel_bins), number=runs) - par_cpu = timeit.timeit(lambda: axes_histogram(voxels, func=histograms.axis_histogram_par_cpu, ranges=ranges, voxel_bins=voxel_bins), number=runs) - par_gpu = timeit.timeit(lambda: axes_histogram(voxels, func=histograms.axis_histogram_par_gpu, ranges=ranges, voxel_bins=voxel_bins), number=runs) + seq_cpu = timeit.timeit(lambda: axes_histogram(voxels, func=axis_histogram_seq_cpu, ranges=ranges, voxel_bins=voxel_bins), number=runs) + par_cpu = timeit.timeit(lambda: axes_histogram(voxels, func=axis_histogram_par_cpu, ranges=ranges, voxel_bins=voxel_bins), number=runs) + par_gpu = timeit.timeit(lambda: axes_histogram(voxels, func=axis_histogram_par_gpu, ranges=ranges, voxel_bins=voxel_bins), number=runs) print (f'Average of {runs} runs:') print (f'Seq CPU: {seq_cpu / runs:9.04f}') print (f'Par CPU: {par_cpu / runs:9.04f}') @@ -142,9 +147,9 @@ def run_out_of_core(sample, block_size=128, z_offset=0, n_blocks=0, voxels, fields = load_block(sample, zstart, block_size, mask, mask_scale, field_names) for i in tqdm(range(1),"Histogramming over x,y,z axes and radius", leave=True): - histograms.axis_histogram_par_gpu(voxels, (zstart, 0, 0), voxels.shape[0], x_bins, y_bins, z_bins, r_bins, center, (vmin, vmax), False) + axis_histogram_par_gpu(voxels, (zstart, 0, 0), voxels.shape[0], x_bins, y_bins, z_bins, r_bins, center, (vmin, vmax), False) for i in tqdm(range(Nfields),f"Histogramming w.r.t. fields {field_names}", leave=True): - histograms.field_histogram_resample_par_cpu(voxels, fields[i], (zstart, 0, 0), (Nz, Ny, Nx), (Nz//2,Ny//2,Nx//2), voxels.shape[0], f_bins[i], (vmin, vmax), (fmin, fmax)) + field_histogram_resample(voxels, fields[i], (zstart, 0, 0), (Nz, Ny, Nx), (Nz//2,Ny//2,Nx//2), voxels.shape[0], f_bins[i], (vmin, vmax), (fmin, fmax)) f_bins[:, 0,:] = 0 # TODO EDT mask hack f_bins[:,-1,:] = 0 # TODO "bright" mask hack @@ -165,10 +170,18 @@ def run_out_of_core(sample, block_size=128, z_offset=0, n_blocks=0, # Special parameter values: # - block_size == 0 means "do one full subvolume at the time, interpret z_offset as start-at-subvolume-number" # - n_blocks == 0 means "all blocks" + # TODO move some of the constants / parameters out into the configuration sample, block_size, z_offset, n_blocks, suffix, \ - mask, mask_scale, voxel_bins, field_bins = commandline_args({"sample":"<required>", - "block_size":256, "z_offset": 0, "n_blocks":0, "suffix":"", - "mask":"None", "mask_scale": 8, "voxel_bins":4096, "field_bins":2048}) + mask, mask_scale, voxel_bins, field_bins, verbose = commandline_args({"sample" : "<required>", + "block_size" : 256, + "z_offset" : 0, + "n_blocks" : 0, + "suffix" : "", + "mask" : "None", + "mask_scale" : 8, + "voxel_bins" : 4096, + "field_bins" : 2048, + "verbose" : 1}) implant_threshold_u16 = 32000 # TODO: use config.constants (vmin,vmax),(fmin,fmax) = ((1e4,3e4),(1,2**16-1)) # TODO: Compute from total voxel histogram resp. total field histogram From c29fb87c81af61230f87755bdeb1dfa0ef895a8f Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 10:09:38 +0100 Subject: [PATCH 049/136] #25 Moved optimize_distributions_flat --- pre-cleanup-src/doitall.py | 10 +++++++--- .../1100_optimize_distributions_flat.py | 0 2 files changed, 7 insertions(+), 3 deletions(-) rename pre-cleanup-src/histogram_processing/optimize_distributions_flat.py => src/processing_steps/1100_optimize_distributions_flat.py (100%) diff --git a/pre-cleanup-src/doitall.py b/pre-cleanup-src/doitall.py index 47648ee..341ac0e 100644 --- a/pre-cleanup-src/doitall.py +++ b/pre-cleanup-src/doitall.py @@ -60,17 +60,21 @@ 11) for b $(seq 0 $nblocks); do python3 histogram_processing/optimize_distributions_flat.py $sample bone_region$b edt 4 0; done + ;; + + 12) for b $(seq 0 $nblocks); do python3 histogram_processing/compute_probabilities_flat.py $sample bone_region$b edt 10 0; done ;; - 12) + 13) python3 scripts/segment-from-distributions.py $sample 0 0 bone_region optimized_distributions ;; - 13) for m in 0 1; do python3 preprocess/rescale-cupy-bin.py $sample segmented/P$m ; done + 14) + for m in 0 1; do python3 preprocess/rescale-cupy-bin.py $sample segmented/P$m ; done ;; - 14) + 15) python3 segmentation/segment-blod-cc.py $sample ;; diff --git a/pre-cleanup-src/histogram_processing/optimize_distributions_flat.py b/src/processing_steps/1100_optimize_distributions_flat.py similarity index 100% rename from pre-cleanup-src/histogram_processing/optimize_distributions_flat.py rename to src/processing_steps/1100_optimize_distributions_flat.py From 01933f7df3f12093d182420352b98ef63c49853b Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 10:18:52 +0100 Subject: [PATCH 050/136] #25 Fixed some of the dependencies of optimize_distributions_flat --- .../histogram_processing => src/lib/py}/distributions.py | 0 .../lib/py}/piecewise_cubic.py | 0 src/processing_steps/1100_optimize_distributions_flat.py | 8 ++++---- 3 files changed, 4 insertions(+), 4 deletions(-) rename {pre-cleanup-src/histogram_processing => src/lib/py}/distributions.py (100%) rename {pre-cleanup-src/histogram_processing => src/lib/py}/piecewise_cubic.py (100%) diff --git a/pre-cleanup-src/histogram_processing/distributions.py b/src/lib/py/distributions.py similarity index 100% rename from pre-cleanup-src/histogram_processing/distributions.py rename to src/lib/py/distributions.py diff --git a/pre-cleanup-src/histogram_processing/piecewise_cubic.py b/src/lib/py/piecewise_cubic.py similarity index 100% rename from pre-cleanup-src/histogram_processing/piecewise_cubic.py rename to src/lib/py/piecewise_cubic.py diff --git a/src/processing_steps/1100_optimize_distributions_flat.py b/src/processing_steps/1100_optimize_distributions_flat.py index a5225d3..3deb5a6 100644 --- a/src/processing_steps/1100_optimize_distributions_flat.py +++ b/src/processing_steps/1100_optimize_distributions_flat.py @@ -1,9 +1,9 @@ import os, sys, tqdm, numpy as np, matplotlib.pyplot as plt, numpy.linalg as la, scipy.ndimage as ndi, scipy.optimize as opt, time sys.path.append(sys.path[0]+"/../") -from piecewise_cubic import piecewisecubic_matrix, piecewisecubic, smooth_fun -from config.paths import commandline_args, hdf5_root as hdf5_root -from distributions import * -from helper_functions import * +from lib.py.piecewise_cubic import piecewisecubic_matrix, piecewisecubic, smooth_fun +from config.paths import hdf5_root +from lib.py.distributions import powers +from lib.py.helpers import commandline_args, row_normalize, update_hdf5 na = np.newaxis hist_path = f"{hdf5_root}/processed/histograms/" From 9f7cd297bb297c730df96e4634ba4b791725f203 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 10:22:20 +0100 Subject: [PATCH 051/136] #29 added verbose to optimize_distributions_flat --- .../1100_optimize_distributions_flat.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/processing_steps/1100_optimize_distributions_flat.py b/src/processing_steps/1100_optimize_distributions_flat.py index 3deb5a6..c349ff1 100644 --- a/src/processing_steps/1100_optimize_distributions_flat.py +++ b/src/processing_steps/1100_optimize_distributions_flat.py @@ -7,11 +7,11 @@ na = np.newaxis hist_path = f"{hdf5_root}/processed/histograms/" -sample, region_mask, field, stride, debug = commandline_args({"sample":"<required>", - "region_mask":"<required>", - "field":"edt", - "stride": 4, - "debug":8 +sample, region_mask, field, stride, verbose = commandline_args({"sample":"<required>", + "region_mask":"<required>", + "field":"edt", + "stride": 4, + "verbose":8 }) f_hist = np.load(f"{hist_path}/{sample}/bins-{region_mask}.npz") @@ -32,7 +32,7 @@ def material_points(labs,material_id): #hist = hist/(sums + (sums==0)) lab = f_labels[field][::stride,::stride] -if debug==1: +if verbose >= 2: plt.imshow(lab) plt.show() @@ -61,7 +61,7 @@ def material_points(labs,material_id): print(f"Optimizing distributions for {field} with {lab.max()} materials") -if (debug&7): +if (verbose >= 3): plt.ion() fig = plt.figure(figsize=(15,15)) ax = fig.add_subplot(111) @@ -105,7 +105,7 @@ def opt_all(abcd,*args): Ecloseness = np.sum(1/(np.abs(C[1:]-C[:-1])+0.001)) # print(np.round(E1,2), np.round(1e2*Ecloseness,2)) - if(debug==2): + if(verbose >= 3): line1.set_ydata(model) ax.set_title(f"{x}: a = {np.round(A*A,1)}, b = {np.round(B*B,1)}, c = {np.round(C,1)}, d = {np.round(D*D,1)}") ax.relim() @@ -132,7 +132,7 @@ def opt_all(abcd,*args): if(n>0): abcd0 = np.array([amx[ms,i], bmx[ms,i], cmx[ms,i], dmx[ms,i]]).flatten() - if (debug==1): + if (verbose == 2): model = powers(vs,abcd0) line1.set_ydata(np.sum(model,axis=0)) line2.set_ydata(hist[i]) @@ -141,7 +141,7 @@ def opt_all(abcd,*args): fig.canvas.draw() fig.canvas.flush_events() - if(debug==2): + if(verbose == 3): ax.set_title(f"x = {x}") line2.set_ydata(hist[i]) @@ -180,7 +180,7 @@ def opt_all(abcd,*args): # print(f"ABCDm = {ABCDm}") - if(debug==4): + if(verbose == 5): colors = ['r','orange'] lines = [line3,line4] model = powers(vs,abcd) @@ -213,7 +213,7 @@ def opt_all(abcd,*args): hist_modeled[gi] = np.sum(model,axis=0) hist_m[ms,gi] = model -if (debug&8): +if (verbose == 6): fig = plt.figure(figsize=(10,10)) axarr = fig.subplots(2,2) fig.suptitle(f'{sample} {region_mask}') # or plt.suptitle('Main title') From fd0a7a21959076b6fd87fb1b92c8dac75779d3fc Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 10:26:11 +0100 Subject: [PATCH 052/136] Moved compute_probabilities_flat --- .../processing_steps/1200_compute_probabilities_flat.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pre-cleanup-src/histogram_processing/compute_probabilities.py => src/processing_steps/1200_compute_probabilities_flat.py (100%) diff --git a/pre-cleanup-src/histogram_processing/compute_probabilities.py b/src/processing_steps/1200_compute_probabilities_flat.py similarity index 100% rename from pre-cleanup-src/histogram_processing/compute_probabilities.py rename to src/processing_steps/1200_compute_probabilities_flat.py From 747a3af596c800170d02edca8f681510bebcd494 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 10:29:52 +0100 Subject: [PATCH 053/136] #25 Updated the dependencies of compute_probabilities_flat --- src/processing_steps/1200_compute_probabilities_flat.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/processing_steps/1200_compute_probabilities_flat.py b/src/processing_steps/1200_compute_probabilities_flat.py index f30d028..7e3988b 100644 --- a/src/processing_steps/1200_compute_probabilities_flat.py +++ b/src/processing_steps/1200_compute_probabilities_flat.py @@ -1,10 +1,11 @@ import os, sys, tqdm, numpy as np, matplotlib.pyplot as plt, numpy.linalg as la, scipy.ndimage as ndi, scipy.optimize as opt, time +import h5py +import pathlib sys.path.append(sys.path[0]+"/../") #from piecewise_linear import piecewiselinear_matrix, piecewiselinear, smooth_fun as smooth_fun_l -from piecewise_cubic import piecewisecubic_matrix, piecewisecubic, smooth_fun as smooth_fun_c +from lib.py.piecewise_cubic import piecewisecubic_matrix, piecewisecubic, smooth_fun as smooth_fun_c from config.paths import commandline_args, hdf5_root as hdf5_root -from distributions import * -from helper_functions import * +from lib.py.helpers import update_hdf5, row_normalize na = np.newaxis From 29a950c751fb51284cb2a63a9a1498e6249c12a9 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 10:33:35 +0100 Subject: [PATCH 054/136] #29 Added verbose to compute_probabilties_flat --- .../1200_compute_probabilities_flat.py | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/processing_steps/1200_compute_probabilities_flat.py b/src/processing_steps/1200_compute_probabilities_flat.py index 7e3988b..931a502 100644 --- a/src/processing_steps/1200_compute_probabilities_flat.py +++ b/src/processing_steps/1200_compute_probabilities_flat.py @@ -4,16 +4,16 @@ sys.path.append(sys.path[0]+"/../") #from piecewise_linear import piecewiselinear_matrix, piecewiselinear, smooth_fun as smooth_fun_l from lib.py.piecewise_cubic import piecewisecubic_matrix, piecewisecubic, smooth_fun as smooth_fun_c -from config.paths import commandline_args, hdf5_root as hdf5_root -from lib.py.helpers import update_hdf5, row_normalize +from config.paths import hdf5_root +from lib.py.helpers import commandline_args, row_normalize, update_hdf5 na = np.newaxis - +verbose = 1 # TODO: Til fælles fil. def save_probabilities(Ps,sample, region_mask,field_name, value_ranges, prob_method): output_path = f'{hdf5_root}/processed/probabilities/{sample}.h5' - print(f"output_path = {output_path}") - print(f"group_name1 = {prob_method}/{region_mask}\n" + + if verbose >= 1: print(f"output_path = {output_path}") + if verbose >= 1: print(f"group_name1 = {prob_method}/{region_mask}\n" + f"group_name2 = {prob_method}/{region_mask}/{field_name}") update_hdf5( output_path, @@ -22,7 +22,7 @@ def save_probabilities(Ps,sample, region_mask,field_name, value_ranges, prob_met attributes = {} ) for m,P in enumerate(Ps): - print(f"Storing {P.shape} probabilities P{m}") + if verbose >= 1: print(f"Storing {P.shape} probabilities P{m}") update_hdf5( output_path, group_name = f'{prob_method}/{region_mask}/{field_name}', @@ -47,11 +47,11 @@ def evaluate_2d(G, xs, vs): hist_path = f"{hdf5_root}/processed/histograms/" -sample, region_mask, field_name, n_segments_c, debug = commandline_args({"sample":"<required>", - "region_mask":"<required>", - "field_name":"edt", - "n_segments": 4, - "debug":8 +sample, region_mask, field_name, n_segments_c, verbose = commandline_args({"sample" : "<required>", + "region_mask" : "<required>", + "field_name" : "edt", + "n_segments" : 4, + "verbose" : 8 }) hist_path = f"{hdf5_root}/processed/histograms/" @@ -150,7 +150,7 @@ def evaluate_2d(G, xs, vs): ##---- TODO: STICK THE DEBUG-PLOTTING FUNCTIONS SOMEWHERE CENTRAL -if (debug&7): +if (verbose & 7): plt.ion() fig = plt.figure(figsize=(15,15)) ax = fig.add_subplot(111) @@ -162,7 +162,7 @@ def evaluate_2d(G, xs, vs): plt.show() -if(debug==4): +if(verbose == 4): colors = ['b','r'] lines = [line3,line4] @@ -182,7 +182,7 @@ def evaluate_2d(G, xs, vs): fig.canvas.flush_events() -if (debug==8): +if (verbose == 8): fig = plt.figure(figsize=(10,10)) axarr = fig.subplots(3,2) fig.suptitle(f'{sample} {region_mask}') @@ -209,7 +209,7 @@ def evaluate_2d(G, xs, vs): -if (debug==10): +if (verbose == 10): fig = plt.figure(figsize=(15,15)) axarr = fig.subplots(2,2) fig.suptitle(f'{sample} {region_mask}') From 95ef0e094ca31aec500358c0d3346536be207e7c Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 10:35:02 +0100 Subject: [PATCH 055/136] #25 Moved segment from distributions --- .../processing_steps/1300_segment_from_distributions.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pre-cleanup-src/scripts/segment_from_distributions.py => src/processing_steps/1300_segment_from_distributions.py (100%) diff --git a/pre-cleanup-src/scripts/segment_from_distributions.py b/src/processing_steps/1300_segment_from_distributions.py similarity index 100% rename from pre-cleanup-src/scripts/segment_from_distributions.py rename to src/processing_steps/1300_segment_from_distributions.py From 4d7a57a1242dc2467c8462c24665244ca3fea5f1 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 10:38:57 +0100 Subject: [PATCH 056/136] #25 Fixed dependencies of segment_from_distributions --- .../1300_segment_from_distributions.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/processing_steps/1300_segment_from_distributions.py b/src/processing_steps/1300_segment_from_distributions.py index 837644f..5554e9f 100644 --- a/src/processing_steps/1300_segment_from_distributions.py +++ b/src/processing_steps/1300_segment_from_distributions.py @@ -1,12 +1,14 @@ import os, sys, pathlib, h5py, numpy as np, scipy.ndimage as ndi sys.path.append(sys.path[0]+"/../") -import pybind_kernels.histograms as histograms -import pybind_kernels.label as label -from config.paths import binary_root, hdf5_root_fast as hdf5_root, commandline_args +#import pybind_kernels.histograms as histograms +#import pybind_kernels.label as label +from lib.cpp.gpu.label import material_prob_justonefieldthx +from lib.cpp.cpu.io import write_slice +from config.paths import binary_root, hdf5_root_fast as hdf5_root from tqdm import tqdm import matplotlib.pyplot as plt from PIL import Image -from helper_functions import block_info, load_block +from lib.py.helpers import block_info, commandline_args, load_block na = np.newaxis debug = True @@ -20,7 +22,7 @@ def load_probabilities(path, group, axes_names, field_names, m): prob_file.close() return P_axes, P_fields except Exception as e: - print(f"Couldn't load {group}/{name}/P{m} from {path}: {e}") + print(f"Couldn't load {group}/{axes_names}|{field_names}/P{m} from {path}: {e}") sys.exit(-1) def load_value_ranges(path, group): @@ -97,7 +99,7 @@ def nblocks(size, block_size): result = np.zeros((zend-zstart,Ny,Nx), dtype=np.uint16) - label.material_prob_justonefieldthx(voxels,fields[0],P_fields[0],result, + material_prob_justonefieldthx(voxels,fields[0],P_fields[0],result, (vmin,vmax),(fmin,fmax), (zstart,0,0), (zend,Ny,Nx)); @@ -115,5 +117,5 @@ def nblocks(size, block_size): print (f'Segmentation has min {result.min()} and max {result.max()}') print(f"Writing results from block {b}") - histograms.write_slice(result, zstart*Ny*Nx, output_file) + write_slice(result, zstart*Ny*Nx, output_file) From 96447e64531e1e5531de1984ea1e829575900f6e Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 10:42:39 +0100 Subject: [PATCH 057/136] #29 Added verbose to segment from distributions --- .../1300_segment_from_distributions.py | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/processing_steps/1300_segment_from_distributions.py b/src/processing_steps/1300_segment_from_distributions.py index 5554e9f..6d9e29f 100644 --- a/src/processing_steps/1300_segment_from_distributions.py +++ b/src/processing_steps/1300_segment_from_distributions.py @@ -10,9 +10,7 @@ from PIL import Image from lib.py.helpers import block_info, commandline_args, load_block na = np.newaxis - -debug = True - +verbose = 1 def load_probabilities(path, group, axes_names, field_names, m): try: @@ -26,7 +24,7 @@ def load_probabilities(path, group, axes_names, field_names, m): sys.exit(-1) def load_value_ranges(path, group): - print(f"Reading value_ranges from {group} in {path}\n") + if verbose >= 1: print(f"Reading value_ranges from {group} in {path}\n") try: f = h5py.File(path, 'r') return f[group]['value_ranges'][:].astype(int) @@ -39,14 +37,14 @@ def nblocks(size, block_size): return (size // block_size) + (1 if size % block_size > 0 else 0) if __name__ == '__main__': - sample, block_start, n_blocks, region_mask, group, mask_scale, scheme, debug_output = commandline_args({'sample':'<required>', - "block_start":0, - "n_blocks":0, - 'region_mask': 'bone_region', - 'group': 'otsu_separation', - 'mask_scale': 8, - 'scheme':"edt", #MIDLERTIDIG - 'debug_output': None}) + sample, block_start, n_blocks, region_mask, group, mask_scale, scheme, verbose = commandline_args({'sample' : '<required>', + "block_start" : 0, + "n_blocks" : 0, + 'region_mask' : 'bone_region', + 'group' : 'otsu_separation', + 'mask_scale' : 8, + 'scheme' : "edt", #MIDLERTIDIG + 'verbose' : 1}) # Iterate over all subvolumes bi = block_info(f'{hdf5_root}/hdf5-byte/msb/{sample}.h5', block_size=0, n_blocks=n_blocks, z_offset=block_start) @@ -113,9 +111,8 @@ def nblocks(size, block_size): # (zstart, 0, 0), (zend, sy, sx) # ) - if debug: - print (f'Segmentation has min {result.min()} and max {result.max()}') + if verbose >= 2: print (f'Segmentation has min {result.min()} and max {result.max()}') - print(f"Writing results from block {b}") + if verbose >= 1: print(f"Writing results from block {b}") write_slice(result, zstart*Ny*Nx, output_file) From b53638e1301ed1efb8caeca01340c1639a250079 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 10:45:21 +0100 Subject: [PATCH 058/136] #25 Added processing step 14 as a symlink --- src/processing_steps/1400_rescale_cupy_bin.py | 1 + 1 file changed, 1 insertion(+) create mode 120000 src/processing_steps/1400_rescale_cupy_bin.py diff --git a/src/processing_steps/1400_rescale_cupy_bin.py b/src/processing_steps/1400_rescale_cupy_bin.py new file mode 120000 index 0000000..ec15bd3 --- /dev/null +++ b/src/processing_steps/1400_rescale_cupy_bin.py @@ -0,0 +1 @@ +processing_steps/0500_rescale_cupy_bin.py \ No newline at end of file From 0ef89f2dad8dcc89c6c32a671c67a190a3e01057 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 10:46:24 +0100 Subject: [PATCH 059/136] #25 Moved segment_blood_cc --- .../processing_steps/1500_segment_blood_cc.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pre-cleanup-src/segmentation/segment-blood-cc.py => src/processing_steps/1500_segment_blood_cc.py (100%) diff --git a/pre-cleanup-src/segmentation/segment-blood-cc.py b/src/processing_steps/1500_segment_blood_cc.py similarity index 100% rename from pre-cleanup-src/segmentation/segment-blood-cc.py rename to src/processing_steps/1500_segment_blood_cc.py From e63def628c709a316e6964485cdadc6fea4a01ad Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 10:49:03 +0100 Subject: [PATCH 060/136] #25 Fixed the dependencies of segment_blood_cc --- src/processing_steps/1500_segment_blood_cc.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/processing_steps/1500_segment_blood_cc.py b/src/processing_steps/1500_segment_blood_cc.py index 5925347..ad31808 100644 --- a/src/processing_steps/1500_segment_blood_cc.py +++ b/src/processing_steps/1500_segment_blood_cc.py @@ -1,10 +1,10 @@ import h5py, sys, os.path, pathlib, numpy as np, scipy.ndimage as ndi, tqdm, matplotlib.pyplot as plt sys.path.append(sys.path[0]+"/../") from config.constants import * -from config.paths import hdf5_root, hdf5_root_fast, binary_root, commandline_args -from pybind_kernels.histograms import load_slice +from config.paths import hdf5_root, hdf5_root_fast, binary_root +from lib.cpp.cpu import load_slice from scipy import ndimage as ndi -from helper_functions import * +from lib.py.helpers import block_info, commandline_args, update_hdf5 sample, m, scheme, chunk_size = commandline_args({"sample":"<required>", "material":0, "scheme":"edt","chunk_size":256}) From 5857d92a2ab9ebd662a45606b5d9086550f5dc0a Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 10:50:13 +0100 Subject: [PATCH 061/136] #29 Added verbose to segment_blood_cc --- src/processing_steps/1500_segment_blood_cc.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/processing_steps/1500_segment_blood_cc.py b/src/processing_steps/1500_segment_blood_cc.py index ad31808..a0e1ea2 100644 --- a/src/processing_steps/1500_segment_blood_cc.py +++ b/src/processing_steps/1500_segment_blood_cc.py @@ -6,7 +6,11 @@ from scipy import ndimage as ndi from lib.py.helpers import block_info, commandline_args, update_hdf5 -sample, m, scheme, chunk_size = commandline_args({"sample":"<required>", "material":0, "scheme":"edt","chunk_size":256}) +sample, m, scheme, chunk_size, verbose = commandline_args({"sample" : "<required>", + "material" : 0, + "scheme" : "edt", + "chunk_size" : 256, + "verbose" : 1}) scales = [32, 16, 8, 4, 2] From 6ff9582819cb39e6542b89ad01d151ab260fc478 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Feb 2023 13:30:21 +0100 Subject: [PATCH 062/136] #25 Verified that 0300 works --- .gitignore | 5 ++++- src/processing_steps/0300_volume_matcher.py | 5 +++-- src/requirements.txt | 1 + 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 5743ee6..224e2b6 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,7 @@ src/meow/runner_processing/* # Compiled files *.so -*.so.dSYM \ No newline at end of file +*.so.dSYM + +# Ignore the $BONE_DATA symlinks, as they're only there for convinience in vscode +data_* \ No newline at end of file diff --git a/src/processing_steps/0300_volume_matcher.py b/src/processing_steps/0300_volume_matcher.py index 336660c..374aea8 100755 --- a/src/processing_steps/0300_volume_matcher.py +++ b/src/processing_steps/0300_volume_matcher.py @@ -139,5 +139,6 @@ def write_matched_hdf5(h5_filename_in, h5_filename_out, crossings, shifts, compr h5file.close() - if verbose >= 1: print(f"Copying over volume from {input_h5name} shifted by {shifts} to {output_h5name}") - if(generate_h5): write_matched_hdf5(input_h5name, output_h5name, crossings, shifts) + if(generate_h5): + if verbose >= 1: print(f"Copying over volume from {input_h5name} shifted by {shifts} to {output_h5name}") + write_matched_hdf5(input_h5name, output_h5name, crossings, shifts) diff --git a/src/requirements.txt b/src/requirements.txt index 5125944..9146494 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -2,5 +2,6 @@ cupy-cuda11x==11.5.0 edt==2.3.0 fabric==3.0.0 jax==0.4.3 +jaxlib==0.4.3 tqdm==4.64.1 vedo==2023.4.3 \ No newline at end of file From 1407958db4dcf1a767960c05fde3486ed460edaf Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 13 Feb 2023 09:57:21 +0100 Subject: [PATCH 063/136] #25 step 400 works --- src/lib/cpp/cpu_seq/io.cc | 1 + src/processing_steps/0400_h5tobin.py | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/lib/cpp/cpu_seq/io.cc b/src/lib/cpp/cpu_seq/io.cc index 01cf2f8..2d30477 100644 --- a/src/lib/cpp/cpu_seq/io.cc +++ b/src/lib/cpp/cpu_seq/io.cc @@ -34,6 +34,7 @@ void write_contiguous_slice(const T *data, } file.seekp(offset * sizeof(T), ios::beg); file.write((char*) data, size * sizeof(T)); + file.flush(); // Should have flushed, but just in case file.close(); } diff --git a/src/processing_steps/0400_h5tobin.py b/src/processing_steps/0400_h5tobin.py index 53b63ed..0c4def6 100755 --- a/src/processing_steps/0400_h5tobin.py +++ b/src/processing_steps/0400_h5tobin.py @@ -3,7 +3,7 @@ sys.path.append(sys.path[0]+"/../") from config.paths import hdf5_root, binary_root from tqdm import tqdm -from lib.cpp.cpu.io import write_slice +from lib.cpp.cpu_seq.io import write_slice from lib.py.helpers import commandline_args, update_hdf5 slice_all = slice(None) @@ -65,12 +65,16 @@ def h5tobin(sample,region=(slice_all,slice_all,slice_all),shift_volume_match=1): for i in tqdm(range(Nvols), desc=f'Loading {sample} from HDF5 and writing binary'): subvolume_msb = dmsb[input_zstarts[i]:input_zends[i],y_range,x_range].astype(np.uint16) subvolume_lsb = dlsb[input_zstarts[i]:input_zends[i],y_range,x_range].astype(np.uint16) - - write_slice((subvolume_msb << 8) | subvolume_lsb, output_zstarts[i]*Ny*Nx, outfile) + combined = (subvolume_msb << 8) | subvolume_lsb del subvolume_msb del subvolume_lsb + # TODO For some reason, when 'output_zstarts' is a numpy type, 'combined' gets interpreted as an uint8 array through pybind. It is therefore important that it is converted to a python integer. This should be investigated, as it doesn't make sense that arguments should affect each other in this manner! Especially since it's only the first argument that's templated. Note: it's not due to mixed types in the tuple, as giving it three numpy values also breaks it. + write_slice(combined, outfile, (int(output_zstarts[i]), 0, 0), combined.shape) + + del combined + msb_file.close() lsb_file.close() From 58707e583664dc72721f805c99c85ce87efa8bf8 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 13 Feb 2023 09:58:08 +0100 Subject: [PATCH 064/136] #16 Added the option to only run a subset of tests --- src/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Makefile b/src/Makefile index 2a00b42..e26cb0c 100644 --- a/src/Makefile +++ b/src/Makefile @@ -41,5 +41,8 @@ $(foreach PLATFORM, $(PLATFORMS), \ test: all $(PYTHON) -m pytest -n auto test +test_%: test/test_%.py all + $(PYTHON) -m pytest -n auto $< + clean: rm -rf $(CLEANUP) __pycache__ test/__pycache__ .pytest_cache lib/cpp/**/*.so \ No newline at end of file From eb4acf3a567b7f9a34dfcb341f1d4a8e9e89937f Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 13 Feb 2023 09:59:01 +0100 Subject: [PATCH 065/136] #16 Extended io test with the seek past file end test --- src/test/test_io.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/test/test_io.py b/src/test/test_io.py index 12ed56d..4256fbe 100644 --- a/src/test/test_io.py +++ b/src/test/test_io.py @@ -59,6 +59,19 @@ def test_dtype(dtype): for i in range(partial_factor+1): io.load_slice(read_data, individual_tmp_file, (i*partial,0,0), read_data.shape) assert np.allclose(data[i*partial:(i+1)*partial], read_data) + + # Write past where the file ends + impl.write_slice(data, individual_tmp_file, (data.shape[0]*2,0,0), data.shape) + assert os.path.getsize(individual_tmp_file) == 3 * data.nbytes + + # Check that the old data remains, the middle data is zeros, and that the new data is the same + read_data = np.empty_like(data) + for i in range(3): + impl.load_slice(read_data, individual_tmp_file, (i*data.shape[0],0,0), data.shape) + if i != 1: + assert np.allclose(data, read_data) + else: + assert np.allclose(np.zeros_like(data), read_data) os.remove(individual_tmp_file) From 262a4a2f8a07ae030c297dabea1a61ab8fe8babf Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 13 Feb 2023 09:59:22 +0100 Subject: [PATCH 066/136] #16 extended io test to test different implementations --- src/test/test_io.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/src/test/test_io.py b/src/test/test_io.py index 4256fbe..199257d 100644 --- a/src/test/test_io.py +++ b/src/test/test_io.py @@ -3,7 +3,9 @@ ''' import sys sys.path.append(sys.path[0]+"/../lib/cpp") -import cpu_seq.io as io +import cpu_seq.io as io_cpu_seq +import cpu.io as io_cpu +import gpu.io as io_gpu import numpy as np import tempfile import os @@ -14,50 +16,52 @@ tmp_folder = tempfile._get_default_tempdir() tmp_filename = next(tempfile._get_candidate_names()) tmp_file = f'{tmp_folder}/{tmp_filename}' -dim_size = 16 +dim_size = 128 dim_shape = (dim_size, dim_size, dim_size) partial_factor = 4 +impls = [io_cpu_seq] #, io_cpu, io_gpu] def random(shape, dtype): rnds = np.random.random(shape) * 100 return rnds > .5 if dtype == bool else rnds.astype(dtype) +@pytest.mark.parametrize("impl", impls) @pytest.mark.parametrize("dtype", dtypes_to_test) -def test_dtype(dtype): +def test_dtype(impl, dtype): individual_tmp_file = f'{tmp_file}.{dtype.__name__}' if os.path.exists(individual_tmp_file): os.remove(individual_tmp_file) data = random(dim_shape, dtype) - data[0,0,1] = False partial = dim_size // partial_factor # Write out a new file - io.write_slice(data, individual_tmp_file, (0,0,0), dim_shape) + impl.write_slice(data, individual_tmp_file, (0,0,0), dim_shape) assert os.path.getsize(individual_tmp_file) == data.nbytes # Read back and verify in chunks - read_data = np.zeros((partial, dim_size, dim_size), dtype=dtype) + read_data = np.empty((partial, dim_size, dim_size), dtype=dtype) for i in range(partial_factor): - io.load_slice(read_data, individual_tmp_file, (i*partial,0,0), read_data.shape) + impl.load_slice(read_data, individual_tmp_file, (i*partial,0,0), read_data.shape) assert np.allclose(data[i*partial:(i+1)*partial], read_data) # Append another layer data = np.append(data, random((partial, dim_size, dim_size), dtype), axis=0) - io.write_slice(data[dim_size:], individual_tmp_file, (dim_size,0,0), data.shape) + impl.write_slice(data[dim_size:], individual_tmp_file, (dim_size,0,0), data.shape) assert os.path.getsize(individual_tmp_file) == data.nbytes # Read back and verify in chunks for i in range(partial_factor+1): - io.load_slice(read_data, individual_tmp_file, (i*partial,0,0), read_data.shape) + impl.load_slice(read_data, individual_tmp_file, (i*partial,0,0), read_data.shape) assert np.allclose(data[i*partial:(i+1)*partial], read_data) # Overwrite one of the "middle" chunks data[partial:2*partial] = random((partial, dim_size, dim_size), dtype) - io.write_slice(data[partial:partial*2], individual_tmp_file, (partial,0,0), data.shape) + impl.write_slice(data[partial:partial*2], individual_tmp_file, (partial,0,0), data.shape) + assert os.path.getsize(individual_tmp_file) == data.nbytes # Read back and verify in chunks for i in range(partial_factor+1): - io.load_slice(read_data, individual_tmp_file, (i*partial,0,0), read_data.shape) + impl.load_slice(read_data, individual_tmp_file, (i*partial,0,0), read_data.shape) assert np.allclose(data[i*partial:(i+1)*partial], read_data) # Write past where the file ends @@ -76,6 +80,7 @@ def test_dtype(dtype): os.remove(individual_tmp_file) if __name__ == '__main__': - for dtype in dtypes_to_test: - print (f'Testing {dtype.__name__}') - test_dtype(dtype) \ No newline at end of file + for impl in impls: + for dtype in dtypes_to_test: + print (f'Testing {impl.__name__} on {dtype.__name__}') + test_dtype(impl, dtype) \ No newline at end of file From c23bc53f6c77a762c537ca786c5f260f72054e7e Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 13 Feb 2023 10:04:47 +0100 Subject: [PATCH 067/136] Added file for printing system type ids --- src/exploration/print_cpp_type_ids.cpp | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 src/exploration/print_cpp_type_ids.cpp diff --git a/src/exploration/print_cpp_type_ids.cpp b/src/exploration/print_cpp_type_ids.cpp new file mode 100644 index 0000000..a0191c8 --- /dev/null +++ b/src/exploration/print_cpp_type_ids.cpp @@ -0,0 +1,24 @@ +#include<iostream> +#include<stdint.h> + +int main() { + /* + This class is used to print out the code of the type. + This is handy when debugging the templated type at runtime. + */ + + std::cout << "int8 " << typeid(int8_t).name() << std::endl; + std::cout << "int16 " << typeid(int16_t).name() << std::endl; + std::cout << "int32 " << typeid(int32_t).name() << std::endl; + std::cout << "int64 " << typeid(int64_t).name() << std::endl; + std::cout << "int128 " << typeid(__int128_t).name() << std::endl; + + std::cout << "uint8 " << typeid(uint8_t).name() << std::endl; + std::cout << "uint16 " << typeid(uint16_t).name() << std::endl; + std::cout << "uint32 " << typeid(uint32_t).name() << std::endl; + std::cout << "uint64 " << typeid(uint64_t).name() << std::endl; + std::cout << "uint128 " << typeid(__uint128_t).name() << std::endl; + + std::cout << "float " << typeid(float).name() << std::endl; + std::cout << "double " << typeid(double).name() << std::endl; +} \ No newline at end of file From 61df36171fa2b38e830fe3c37c3f2fa735eb4aac Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 13 Feb 2023 10:05:48 +0100 Subject: [PATCH 068/136] Added a.out to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 224e2b6..a724c3c 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,7 @@ src/meow/runner_processing/* # Compiled files *.so *.so.dSYM +a.out # Ignore the $BONE_DATA symlinks, as they're only there for convinience in vscode data_* \ No newline at end of file From fd47621a2a1b216fe30f61837e629b86bd69a12a Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 13 Feb 2023 10:06:20 +0100 Subject: [PATCH 069/136] Added launch file for vscode python debugging --- .vscode/launch.json | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 .vscode/launch.json diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..7b0145f --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,17 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: 0400_h5tobin", + "type": "python", + "request": "launch", + "program": "${workspaceFolder}/src/processing_steps/0400_h5tobin.py", + "console": "integratedTerminal", + "args": ["770c_pag"], + "justMyCode": false + } + ] +} \ No newline at end of file From f739668946b8738b6eac671f7b9179f55fb34b0a Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 13 Feb 2023 10:09:28 +0100 Subject: [PATCH 070/136] #25 step 0500 runs --- .vscode/launch.json | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 7b0145f..aae8ffc 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -12,6 +12,15 @@ "console": "integratedTerminal", "args": ["770c_pag"], "justMyCode": false - } + }, + { + "name": "Python: 0500_rescale_cupy_bin", + "type": "python", + "request": "launch", + "program": "${workspaceFolder}/src/processing_steps/0500_rescale_cupy_bin.py", + "console": "integratedTerminal", + "args": ["770c_pag"], + "justMyCode": false + }, ] } \ No newline at end of file From 76eb769c1bfd1a136ee5fb26558c4bcd9d15ddb9 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 13 Feb 2023 10:15:30 +0100 Subject: [PATCH 071/136] #25 Moved geometry files --- .../pybind_kernels/cpu => src/lib/cpp/cpu_seq}/geometry.cc | 0 .../pybind_kernels => src}/pybind/geometry-pybind.cc | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename {pre-cleanup-src/pybind_kernels/cpu => src/lib/cpp/cpu_seq}/geometry.cc (100%) rename {pre-cleanup-src/pybind_kernels => src}/pybind/geometry-pybind.cc (98%) diff --git a/pre-cleanup-src/pybind_kernels/cpu/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc similarity index 100% rename from pre-cleanup-src/pybind_kernels/cpu/geometry.cc rename to src/lib/cpp/cpu_seq/geometry.cc diff --git a/pre-cleanup-src/pybind_kernels/pybind/geometry-pybind.cc b/src/pybind/geometry-pybind.cc similarity index 98% rename from pre-cleanup-src/pybind_kernels/pybind/geometry-pybind.cc rename to src/pybind/geometry-pybind.cc index cbf19f4..a7c1f0d 100644 --- a/pre-cleanup-src/pybind_kernels/pybind/geometry-pybind.cc +++ b/src/pybind/geometry-pybind.cc @@ -160,5 +160,5 @@ PYBIND11_MODULE(geometry, m) { m.def("cylinder_projection", &python_api::cylinder_projection); m.def("sample_plane", &python_api::sample_plane<uint16_t>); m.def("sample_plane", &python_api::sample_plane<uint8_t>); - m.def("compute_front_mask", &python_api::compute_front_mask); + m.def("compute_front_mask", &python_api::compute_front_mask); } From 2e67d269d01a3e37e553b36a02178cb444846084 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 13 Feb 2023 10:41:35 +0100 Subject: [PATCH 072/136] #25 Made the geometry file more consistent with the other cpp files --- src/lib/cpp/cpu_seq/geometry.cc | 931 ++++++++++++++++---------------- 1 file changed, 453 insertions(+), 478 deletions(-) diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index 4154c5a..2a155aa 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -11,554 +11,529 @@ using namespace std; #define dot(a,b) (a[0]*b[0] + a[1]*b[1] + a[2]*b[2]) -void print_timestamp(string message) -{ - auto now = chrono::system_clock::to_time_t(chrono::system_clock::now()); - tm local_tm = *localtime(&now); - fprintf(stderr,"%s at %02d:%02d:%02d\n", message.c_str(), local_tm.tm_hour, local_tm.tm_min, local_tm.tm_sec); +void print_timestamp(string message) { + auto now = chrono::system_clock::to_time_t(chrono::system_clock::now()); + tm local_tm = *localtime(&now); + fprintf(stderr,"%s at %02d:%02d:%02d\n", message.c_str(), local_tm.tm_hour, local_tm.tm_min, local_tm.tm_sec); } - // TODO: Fix OpenACC copies & re-enable GPU array<real_t,3> center_of_mass(const input_ndarray<mask_type> voxels) { - // nvc++ doesn't support OpenACC 2.7 array reductions yet. - real_t cmx = 0, cmy = 0, cmz = 0; - size_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; - int64_t image_length = Nx*Ny*Nz; - - print_timestamp("center_of_mass start"); - real_t total_mass = 0; - for(int64_t block_start=0;block_start<image_length;block_start+=acc_block_size){ - - const mask_type *buffer = voxels.data + block_start; - ssize_t this_block_length = min(acc_block_size,image_length-block_start); - - //#pragma acc parallel loop reduction(+:cmx,cmy,cmz,total_mass) copyin(buffer[:this_block_length]) - reduction_loop((+:cmx,cmy,cmz,total_mass),()) - for(int64_t k = 0; k<this_block_length;k++){ - real_t m = buffer[k]; - - int64_t flat_idx = block_start + k; - int64_t x = flat_idx / (Ny*Nz); - int64_t y = (flat_idx / Nz) % Ny; - int64_t z = flat_idx % Nz; - - total_mass += m; - cmx += m*x; cmy += m*y; cmz += m*z; + // nvc++ doesn't support OpenACC 2.7 array reductions yet. + real_t cmx = 0, cmy = 0, cmz = 0; + size_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; + int64_t image_length = Nx*Ny*Nz; + + print_timestamp("center_of_mass start"); + real_t total_mass = 0; + for (int64_t block_start = 0; block_start < image_length; block_start += acc_block_size) { + + const mask_type *buffer = voxels.data + block_start; + ssize_t this_block_length = min(acc_block_size, image_length-block_start); + + //#pragma acc parallel loop reduction(+:cmx,cmy,cmz,total_mass) copyin(buffer[:this_block_length]) + //reduction_loop((+:cmx,cmy,cmz,total_mass),()) + for (int64_t k = 0; k < this_block_length; k++) { + real_t m = buffer[k]; + + int64_t flat_idx = block_start + k; + int64_t x = flat_idx / (Ny*Nz); + int64_t y = (flat_idx / Nz) % Ny; + int64_t z = flat_idx % Nz; + + total_mass += m; + cmx += m*x; cmy += m*y; cmz += m*z; + } } - } - cmx /= total_mass; cmy /= total_mass; cmz /= total_mass; + cmx /= total_mass; cmy /= total_mass; cmz /= total_mass; - print_timestamp("center_of_mass end"); + print_timestamp("center_of_mass end"); - return array<real_t,3>{cmx,cmy,cmz}; + return array<real_t,3>{cmx,cmy,cmz}; } - -array<real_t,9> inertia_matrix_serial(const input_ndarray<mask_type> &voxels, const array<real_t,3> &cm) -{ - real_t - Ixx = 0, Ixy = 0, Ixz = 0, - Iyy = 0, Iyz = 0, - Izz = 0; +array<real_t,9> inertia_matrix_serial(const input_ndarray<mask_type> &voxels, const array<real_t,3> &cm) { + real_t + Ixx = 0, Ixy = 0, Ixz = 0, + Iyy = 0, Iyz = 0, + Izz = 0; - ssize_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; - - print_timestamp("inertia_matrix_serial start"); - for(int64_t X=0,k=0;X<Nx;X++) - for(int64_t Y=0;Y<Ny;Y++) - for(int64_t Z=0;Z<Nz;Z++,k++){ - real_t x = X-cm[0], y = Y-cm[1], z = Z-cm[2]; - - real_t m = voxels.data[k]; - Ixx += m*(y*y+z*z); - Iyy += m*(x*x+z*z); - Izz += m*(x*x+y*y); - Ixy -= m * x*y; - Ixz -= m * x*z; - Iyz -= m * y*z; - } + ssize_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; + + print_timestamp("inertia_matrix_serial start"); + for (int64_t X=0,k=0;X<Nx;X++) { + for (int64_t Y=0;Y<Ny;Y++) { + for (int64_t Z=0;Z<Nz;Z++,k++) { + real_t x = X-cm[0], y = Y-cm[1], z = Z-cm[2]; + + real_t m = voxels.data[k]; + Ixx += m*(y*y+z*z); + Iyy += m*(x*x+z*z); + Izz += m*(x*x+y*y); + Ixy -= m * x*y; + Ixz -= m * x*z; + Iyz -= m * y*z; + } + } + } - print_timestamp("inertia_matrix_serial end"); - return array<real_t,9> { - Ixx, Ixy, Ixz, - Ixy, Iyy, Iyz, - Ixz, Iyz, Izz - }; + print_timestamp("inertia_matrix_serial end"); + return array<real_t,9> { + Ixx, Ixy, Ixz, + Ixy, Iyy, Iyz, + Ixz, Iyz, Izz + }; } - -array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const array<real_t,3> &cm) -{ - // nvc++ doesn't support OpenACC 2.7 array reductions yet, so must name each element. - real_t - M00 = 0, M01 = 0, M02 = 0, - M11 = 0, M12 = 0, - M22 = 0; +array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const array<real_t,3> &cm) { + // nvc++ doesn't support OpenACC 2.7 array reductions yet, so must name each element. + real_t + M00 = 0, M01 = 0, M02 = 0, + M11 = 0, M12 = 0, + M22 = 0; - size_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; - ssize_t image_length = Nx*Ny*Nz; - - print_timestamp("inertia_matrix start"); - for(ssize_t block_start=0;block_start<image_length;block_start+=acc_block_size){ - const mask_type *buffer = voxels.data + block_start; - ssize_t block_length = min(acc_block_size,image_length-block_start); - - reduction_loop((+:M00,M01,M02,M11,M12,M22),()) - for(int64_t k = 0; k<block_length;k++) { //\if(buffer[k] != 0) - int64_t flat_idx = block_start + k; - real_t xs[3] = {(flat_idx / (Ny*Nz)) - cm[0], // x - ((flat_idx / Nz) % Ny) - cm[1], // y - (flat_idx % Nz) - cm[2]}; // z - - real_t m = buffer[k]; - real_t diag = dot(xs,xs); - M00 += m*(diag - xs[0] * xs[0]); - M11 += m*(diag - xs[1] * xs[1]); - M22 += m*(diag - xs[2] * xs[2]); - M01 -= m * xs[0] * xs[1]; - M02 -= m * xs[0] * xs[2]; - M12 -= m * xs[1] * xs[2]; - } - } - print_timestamp("inertia_matrix end"); - return array<real_t,9> { - M00,M01,M02, - M01,M11,M12, - M02,M12,M22}; + size_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; + ssize_t image_length = Nx*Ny*Nz; + + print_timestamp("inertia_matrix start"); + for (ssize_t block_start = 0; block_start < image_length; block_start += acc_block_size) { + const mask_type *buffer = voxels.data + block_start; + ssize_t block_length = min(acc_block_size, image_length-block_start); + + reduction_loop((+:M00,M01,M02,M11,M12,M22),()) + for (int64_t k = 0; k < block_length; k++) { //\if (buffer[k] != 0) + int64_t flat_idx = block_start + k; + real_t xs[3] = { + (flat_idx / (Ny*Nz)) - cm[0], // x + ((flat_idx / Nz) % Ny) - cm[1], // y + (flat_idx % Nz) - cm[2] }; // z + + real_t m = buffer[k]; + real_t diag = dot(xs,xs); + M00 += m*(diag - xs[0] * xs[0]); + M11 += m*(diag - xs[1] * xs[1]); + M22 += m*(diag - xs[2] * xs[2]); + M01 -= m * xs[0] * xs[1]; + M02 -= m * xs[0] * xs[2]; + M12 -= m * xs[1] * xs[2]; + } + } + print_timestamp("inertia_matrix end"); + return array<real_t,9> { + M00, M01, M02, + M01, M11, M12, + M02, M12, M22 }; } - void integrate_axes(const input_ndarray<mask_type> &voxels, - const array<real_t,3> &x0, - const array<real_t,3> &v_axis, - const array<real_t,3> &w_axis, - const real_t v_min, const real_t w_min, - output_ndarray<real_t> output) -{ - ssize_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; - ssize_t Nv = output.shape[0], Nw = output.shape[1]; - int64_t image_length = Nx*Ny*Nz; - real_t *output_data = output.data; - - // TODO: Check v_axis & w_axis projections to certify bounds and get rid of runtime check + const array<real_t,3> &x0, + const array<real_t,3> &v_axis, + const array<real_t,3> &w_axis, + const real_t v_min, const real_t w_min, + output_ndarray<real_t> output) { + ssize_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; + ssize_t Nv = output.shape[0], Nw = output.shape[1]; + int64_t image_length = Nx*Ny*Nz; + real_t *output_data = output.data; + + // TODO: Check v_axis & w_axis projections to certify bounds and get rid of runtime check - for(ssize_t block_start=0;block_start<image_length;block_start += acc_block_size){ - const mask_type *buffer = voxels.data + block_start; - int block_length = min(acc_block_size,image_length-block_start); - - //#pragma acc parallel loop copy(output_data[:Nv*Nw]) copyin(buffer[:block_length], x0, v_axis, w_axis) - parallel_loop((output_data[:Nv*Nw])) - for(int64_t k = 0; k<block_length;k++) if(buffer[k] != 0) { - int64_t flat_idx = block_start + k; - real_t xs[3] = {(flat_idx / (Ny*Nz)) - x0[0], // x - ((flat_idx / Nz) % Ny) - x0[1], // y - (flat_idx % Nz) - x0[2]}; // z - - mask_type voxel = buffer[k]; - real_t v = dot(xs,v_axis), w = dot(xs,w_axis); - int64_t i_v = round(v-v_min), j_w = round(w-w_min); - - if(i_v >= 0 && j_w >= 0 && i_v < Nv && j_w < Nw){ - atomic_statement() - output_data[i_v*Nw + j_w] += voxel; - } - } - } + for (ssize_t block_start = 0; block_start < image_length; block_start += acc_block_size) { + const mask_type *buffer = voxels.data + block_start; + int block_length = min(acc_block_size,image_length-block_start); + + //#pragma acc parallel loop copy(output_data[:Nv*Nw]) copyin(buffer[:block_length], x0, v_axis, w_axis) + parallel_loop((output_data[:Nv*Nw])) + for (int64_t k = 0; k < block_length; k++) { + if (buffer[k] != 0) { + int64_t flat_idx = block_start + k; + real_t xs[3] = { + (flat_idx / (Ny*Nz)) - x0[0], // x + ((flat_idx / Nz) % Ny) - x0[1], // y + (flat_idx % Nz) - x0[2] }; // z + + mask_type voxel = buffer[k]; + real_t v = dot(xs,v_axis), w = dot(xs,w_axis); + int64_t i_v = round(v-v_min), j_w = round(w-w_min); + + if (i_v >= 0 && j_w >= 0 && i_v < Nv && j_w < Nw) { + atomic_statement() + output_data[i_v*Nw + j_w] += voxel; + } + } + } + } } +bool in_bbox(float U, float V, float W, const std::array<float,6> bbox) { + const auto& [U_min,U_max,V_min,V_max,W_min,W_max] = bbox; -bool in_bbox(float U, float V, float W, const std::array<float,6> bbox) -{ - const auto& [U_min,U_max,V_min,V_max,W_min,W_max] = bbox; - - bool inside = U>=U_min && U<=U_max && V>=V_min && V<=V_max && W>=W_min && W<=W_max; + bool inside = U>=U_min && U<=U_max && V>=V_min && V<=V_max && W>=W_min && W<=W_max; - // printf("in_bbox: (%.1f,%.1f,%.1f) \in ([%.1f,%.1f],[%.1f,%.1f],[%.1f,%.1f]) == %d\n", - // U,V,W,U_min,U_max,V_min,V_max,U_min,U_max,inside); - return inside; + // printf("in_bbox: (%.1f,%.1f,%.1f) \in ([%.1f,%.1f],[%.1f,%.1f],[%.1f,%.1f]) == %d\n", + // U,V,W,U_min,U_max,V_min,V_max,U_min,U_max,inside); + return inside; } +template<typename field_type> float resample2x2x2(const field_type *voxels, + const array<ssize_t,3> &shape, + const array<float,3> &X) { + auto [Nx,Ny,Nz] = shape; // Eller omvendt? + if (!in_bbox(X[0],X[1],X[2], {0.5,Nx-1.5, 0.5,Ny-1.5, 0.5,Nz-1.5})) { + uint64_t voxel_index = floor(X[0])*Ny*Nz+floor(X[1])*Ny+floor(X[2]); + return voxels[voxel_index]; + } + float Xfrac[2][3]; // {Xminus[3], Xplus[3]} + int64_t Xint[2][3]; // {Iminus[3], Iplus[3]} + float value = 0; + + for (int i = 0; i < 3; i++) { + double Iminus, Iplus; + Xfrac[0][i] = 1-modf(X[i]-0.5, &Iminus); // 1-{X[i]-1/2}, floor(X[i]-1/2) + Xfrac[1][i] = modf(X[i]+0.5, &Iplus); // {X[i]+1/2}, floor(X[i]+1/2) -template<typename field_type> float resample2x2x2(const field_type *voxels, - const array<ssize_t,3> &shape, - const array<float,3> &X) -{ - auto [Nx,Ny,Nz] = shape; // Eller omvendt? - if(!in_bbox(X[0],X[1],X[2], {0.5,Nx-1.5, 0.5,Ny-1.5, 0.5,Nz-1.5})){ - uint64_t voxel_index = floor(X[0])*Ny*Nz+floor(X[1])*Ny+floor(X[2]); - return voxels[voxel_index]; - } - float Xfrac[2][3]; // {Xminus[3], Xplus[3]} - int64_t Xint[2][3]; // {Iminus[3], Iplus[3]} - float value = 0; - - for(int i=0;i<3;i++){ - double Iminus, Iplus; - Xfrac[0][i] = 1-modf(X[i]-0.5, &Iminus); // 1-{X[i]-1/2}, floor(X[i]-1/2) - Xfrac[1][i] = modf(X[i]+0.5, &Iplus); // {X[i]+1/2}, floor(X[i]+1/2) - - Xint[0][i] = Iminus; - Xint[1][i] = Iplus; - } - - - for(int ijk=0; ijk<=7; ijk++) { - float weight = 1; - int64_t IJK[3] = {0,0,0}; - - for(int axis=0;axis<3;axis++){ // x-1/2 or x+1/2 - int pm = (ijk>>axis) & 1; - IJK[axis] = Xint[pm][axis]; - weight *= Xfrac[pm][axis]; + Xint[0][i] = Iminus; + Xint[1][i] = Iplus; } - auto [I,J,K] = IJK; - // if(I<0 || J<0 || K<0){ - // printf("(I,J,K) = (%ld,%ld,%ld)\n",I,J,K); - // abort(); - // } - // if(I>=int(Nx) || J>=int(Ny) || K>=int(Nz)){ - // printf("(I,J,K) = (%ld,%ld,%ld), (Nx,Ny,Nz) = (%ld,%ld,%ld)\n",I,J,K,Nx,Ny,Nz); - // abort(); - // } - uint64_t voxel_index = I*Ny*Nz+J*Ny+K; - assert(I>=0 && J>=0 && K>=0); - assert(I<Nx && J<Ny && K<Nz); - field_type voxel = voxels[voxel_index]; - value += voxel*weight; - } - return value; + + for (int ijk = 0; ijk <= 7; ijk++) { + float weight = 1; + int64_t IJK[3] = {0,0,0}; + + for (int axis = 0; axis < 3; axis++) { // x-1/2 or x+1/2 + int pm = (ijk>>axis) & 1; + IJK[axis] = Xint[pm][axis]; + weight *= Xfrac[pm][axis]; + } + + auto [I,J,K] = IJK; + // if (I<0 || J<0 || K<0) { + // printf("(I,J,K) = (%ld,%ld,%ld)\n",I,J,K); + // abort(); + // } + // if (I>=int(Nx) || J>=int(Ny) || K>=int(Nz)) { + // printf("(I,J,K) = (%ld,%ld,%ld), (Nx,Ny,Nz) = (%ld,%ld,%ld)\n",I,J,K,Nx,Ny,Nz); + // abort(); + // } + uint64_t voxel_index = I*Ny*Nz+J*Ny+K; + assert(I>=0 && J>=0 && K>=0); + assert(I<Nx && J<Ny && K<Nz); + field_type voxel = voxels[voxel_index]; + value += voxel*weight; + } + return value; } template <typename voxel_type> void sample_plane(const input_ndarray<voxel_type> &voxels, - const real_t voxel_size, // In micrometers - const array<real_t,3> cm, - const array<real_t,3> u_axis, - const array<real_t,3> v_axis, - const array<real_t,4> bbox, // [umin,umax,vmin,vmax] in micrometers - output_ndarray<real_t> plane_samples) -{ - const auto& [umin,umax,vmin,vmax] = bbox; // In micrometers - ssize_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; - ssize_t nu = plane_samples.shape[0], nv = plane_samples.shape[1]; - real_t du = (umax-umin)/nu, dv = (vmax-vmin)/nv; - - #pragma omp parallel for collapse(2) - for(ssize_t ui=0;ui<nu;ui++) - for(ssize_t vj=0;vj<nv;vj++){ - const real_t u = umin + ui*du, v = vmin + vj*dv; - - // X,Y,Z in micrometers; x,y,z in voxel index space - const real_t - X = cm[0] + u*u_axis[0] + v*v_axis[0], - Y = cm[1] + u*u_axis[1] + v*v_axis[1], - Z = cm[2] + u*u_axis[2] + v*v_axis[2]; - - const real_t x = X/voxel_size, y = Y/voxel_size, z = Z/voxel_size; - - // printf("u,v = %g,%g -> %.1f,%.1f,%.1f -> %d, %d, %d\n",u,v,X,Y,Z,int(round(x)),int(round(y)),int(round(z))); - - voxel_type value = 0; - if(in_bbox(x,y,z,{0.5,Nx-0.5, 0.5,Ny-0.5, 0.5,Nz-0.5})) - value = resample2x2x2<voxel_type>(voxels.data,{Nx,Ny,Nz},{x,y,z}); - // else - // fprintf(stderr,"Sampling outside image: x,y,z = %.1f,%.1f,%.1f, Nx,Ny,Nz = %ld,%ld,%ld\n",x,y,z,Nx,Ny,Nz); - - plane_samples.data[ui*nv + vj] = value; + const real_t voxel_size, // In micrometers + const array<real_t,3> cm, + const array<real_t,3> u_axis, + const array<real_t,3> v_axis, + const array<real_t,4> bbox, // [umin,umax,vmin,vmax] in micrometers + output_ndarray<real_t> plane_samples) { + const auto& [umin,umax,vmin,vmax] = bbox; // In micrometers + ssize_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; + ssize_t nu = plane_samples.shape[0], nv = plane_samples.shape[1]; + real_t du = (umax-umin)/nu, dv = (vmax-vmin)/nv; + + #pragma omp parallel for collapse(2) + for (ssize_t ui=0;ui<nu;ui++) { + for (ssize_t vj=0;vj<nv;vj++) { + const real_t u = umin + ui*du, v = vmin + vj*dv; + + // X,Y,Z in micrometers; x,y,z in voxel index space + const real_t + X = cm[0] + u*u_axis[0] + v*v_axis[0], + Y = cm[1] + u*u_axis[1] + v*v_axis[1], + Z = cm[2] + u*u_axis[2] + v*v_axis[2]; + + const real_t x = X/voxel_size, y = Y/voxel_size, z = Z/voxel_size; + + // printf("u,v = %g,%g -> %.1f,%.1f,%.1f -> %d, %d, %d\n",u,v,X,Y,Z,int(round(x)),int(round(y)),int(round(z))); + + voxel_type value = 0; + if (in_bbox(x,y,z,{0.5,Nx-0.5, 0.5,Ny-0.5, 0.5,Nz-0.5})) + value = resample2x2x2<voxel_type>(voxels.data,{Nx,Ny,Nz},{x,y,z}); + // else + // fprintf(stderr,"Sampling outside image: x,y,z = %.1f,%.1f,%.1f, Nx,Ny,Nz = %ld,%ld,%ld\n",x,y,z,Nx,Ny,Nz); + + plane_samples.data[ui*nv + vj] = value; + } } } // NB: xyz are in indices, not micrometers void zero_outside_bbox(const array<real_t,9> &principal_axes, - const array<real_t,6> ¶meter_ranges, - const array<real_t,3> &cm, - output_ndarray<mask_type> voxels) -{ - size_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; - int64_t image_length = Nx*Ny*Nz; + const array<real_t,6> ¶meter_ranges, + const array<real_t,3> &cm, + output_ndarray<mask_type> voxels) { + size_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; + int64_t image_length = Nx*Ny*Nz; - printf("(Nx,Ny,Nz) = (%ld,%ld,%ld), image_length = %ld",Nx,Ny,Nz, image_length); - for(int64_t block_start=0;block_start<image_length;block_start+=acc_block_size){ + printf("(Nx,Ny,Nz) = (%ld,%ld,%ld), image_length = %ld",Nx,Ny,Nz, image_length); - mask_type *buffer = voxels.data + block_start; - ssize_t this_block_length = min(acc_block_size,image_length-block_start); + for (int64_t block_start = 0; block_start < image_length; block_start += acc_block_size) { + mask_type *buffer = voxels.data + block_start; + ssize_t this_block_length = min(acc_block_size, image_length-block_start); - parallel_loop((buffer[:this_block_length])) - for(int64_t k = 0; k<this_block_length;k++){ - int64_t flat_idx = block_start + k; - int64_t x = flat_idx / (Ny*Nz); - int64_t y = (flat_idx / Nz) % Ny; - int64_t z = flat_idx % Nz; - // Boilerplate until here. TODO: macroize or lambda out! - - real_t xs[3] = {x-cm[0], y-cm[1], z-cm[2]}; + parallel_loop((buffer[:this_block_length])) + for (int64_t k = 0; k < this_block_length; k++) { + int64_t flat_idx = block_start + k; + int64_t x = flat_idx / (Ny*Nz); + int64_t y = (flat_idx / Nz) % Ny; + int64_t z = flat_idx % Nz; + // Boilerplate until here. TODO: macroize or lambda out! + + real_t xs[3] = {x-cm[0], y-cm[1], z-cm[2]}; - real_t params[3] = {0,0,0}; + real_t params[3] = {0,0,0}; - for(int uvw=0;uvw<3;uvw++) - for(int xyz=0;xyz<3;xyz++) - params[uvw] += xs[xyz]*principal_axes[uvw*3+xyz]; // u = dot(xs,u_axis), v = dot(xs,v_axis), w = dot(xs,w_axis) + for (int uvw = 0; uvw < 3; uvw++) + for (int xyz = 0; xyz < 3; xyz++) + params[uvw] += xs[xyz] * principal_axes[uvw*3+xyz]; // u = dot(xs,u_axis), v = dot(xs,v_axis), w = dot(xs,w_axis) - bool p = false; + bool p = false; - for(int uvw=0;uvw<3;uvw++){ - real_t param_min = parameter_ranges[uvw*2], param_max = parameter_ranges[uvw*2+1]; - p |= (params[uvw] < param_min) | (params[uvw] > param_max); - } + for (int uvw = 0; uvw < 3; uvw++) { + real_t param_min = parameter_ranges[uvw*2], param_max = parameter_ranges[uvw*2+1]; + p |= (params[uvw] < param_min) | (params[uvw] > param_max); + } - if(p) buffer[k] = 0; + if (p) buffer[k] = 0; + } } - } } -inline vector4 hom_transform(const vector4 &x, const matrix4x4 &M) -{ - vector4 c{{0,0,0,0}}; +inline vector4 hom_transform(const vector4 &x, const matrix4x4 &M) { + vector4 c{{0,0,0,0}}; - for(int i=0;i<4;i++){ - real_t sum = 0; -#pragma simd parallel for reduction(+:sum) - for(int j=0;j<4;j++) - sum += M[i*4+j]*x[j]; - c[i] = sum; - } - return c; - - + for (int i = 0; i < 4; i++) { + real_t sum = 0; + #pragma simd parallel for reduction(+:sum) + for (int j=0;j<4;j++) + sum += M[i*4+j]*x[j]; + c[i] = sum; + } + return c; } - - -#define loop_mask_start(mask_in,mask_out,COPY) { \ - ssize_t Mx = mask_in.shape[0], My = mask_in.shape[1], Mz = mask_in.shape[2]; \ - ssize_t mask_length = Mx*My*Mz; \ - \ -for(ssize_t block_start=0;block_start<mask_length;block_start+=acc_block_size){\ - const mask_type *maskin_buffer = mask_in.data + block_start; \ - mask_type *maskout_buffer = mask_out.data + block_start; \ - ssize_t this_block_length = min(acc_block_size,mask_length-block_start); \ - \ - _Pragma(STR(acc parallel loop copy(maskin_buffer[:this_block_length], maskout_buffer[:this_block_length]) copy COPY)) \ - for(int64_t k = 0; k<this_block_length;k++){ \ - int64_t flat_idx = block_start + k; \ - int64_t X = (flat_idx / (My*Mz)), Y = (flat_idx / Mz) % My, Z = flat_idx % Mz; \ - std::array<real_t,4> Xs = {X*voxel_size, Y*voxel_size, Z*voxel_size, 1}; \ - bool mask_value = maskin_buffer[k]; +#define loop_mask_start(mask_in,mask_out,COPY) { \ + ssize_t Mx = mask_in.shape[0], My = mask_in.shape[1], Mz = mask_in.shape[2]; \ + ssize_t mask_length = Mx*My*Mz; \ + \ + for (ssize_t block_start = 0; block_start < mask_length; block_start += acc_block_size) { \ + const mask_type *maskin_buffer = mask_in.data + block_start; \ + mask_type *maskout_buffer = mask_out.data + block_start; \ + ssize_t this_block_length = min(acc_block_size, mask_length-block_start); \ + \ + _Pragma(STR(acc parallel loop copy(maskin_buffer[:this_block_length], maskout_buffer[:this_block_length]) copy COPY)) \ + for (int64_t k = 0; k < this_block_length; k++) { \ + int64_t flat_idx = block_start + k; \ + int64_t X = (flat_idx / (My*Mz)), Y = (flat_idx / Mz) % My, Z = flat_idx % Mz; \ + std::array<real_t,4> Xs = { X*voxel_size, Y*voxel_size, Z*voxel_size, 1 }; \ + bool mask_value = maskin_buffer[k]; #define loop_mask_end(mask) }}} void fill_implant_mask(const input_ndarray<mask_type> implant_mask, - float voxel_size, - const array<float,6> &bbox, - float r_fraction, - const matrix4x4 &Muvw, - output_ndarray<mask_type> solid_implant_mask, - output_ndarray<float> rsqr_maxs, - output_ndarray<float> profile - ) -{ - real_t theta_min = M_PI, theta_max = -M_PI; - ssize_t n_segments = rsqr_maxs.shape[0]; - const auto [U_min,U_max,V_min,V_max,W_min,W_max] = bbox; - - printf("implant_mask.shape = %ld,%ld,%ld\n",implant_mask.shape[0],implant_mask.shape[1],implant_mask.shape[2]); - printf("solid_implant_mask.shape = %ld,%ld,%ld\n",solid_implant_mask.shape[0],solid_implant_mask.shape[1],solid_implant_mask.shape[2]); - - fprintf(stderr,"voxel_size = %g, U_min = %g, U_max = %g, r_frac = %g, n_segments = %ld\n", - voxel_size, U_min, U_max, r_fraction, n_segments); - - float *rsqr_maxs_d = rsqr_maxs.data; - float *profile_d = profile.data; - - // First pass computes some bounds -- possibly separate out to avoid repeating - loop_mask_start(implant_mask, solid_implant_mask, - (maskin_buffer[:this_block_length], rsqr_maxs_d[:n_segments], Muvw[:16], bbox[:6]) ); - if(mask_value){ - auto [U,V,W,c] = hom_transform(Xs,Muvw); + float voxel_size, + const array<float,6> &bbox, + float r_fraction, + const matrix4x4 &Muvw, + output_ndarray<mask_type> solid_implant_mask, + output_ndarray<float> rsqr_maxs, + output_ndarray<float> profile) { + real_t theta_min = M_PI, theta_max = -M_PI; + ssize_t n_segments = rsqr_maxs.shape[0]; + const auto [U_min,U_max,V_min,V_max,W_min,W_max] = bbox; - real_t r_sqr = V*V+W*W; - real_t theta = atan2(V,W); - - int U_i = floor((U-U_min)*(n_segments-1)/(U_max-U_min)); + printf("implant_mask.shape = %ld,%ld,%ld\n",implant_mask.shape[0],implant_mask.shape[1],implant_mask.shape[2]); + printf("solid_implant_mask.shape = %ld,%ld,%ld\n",solid_implant_mask.shape[0],solid_implant_mask.shape[1],solid_implant_mask.shape[2]); + + fprintf(stderr,"voxel_size = %g, U_min = %g, U_max = %g, r_frac = %g, n_segments = %ld\n", + voxel_size, U_min, U_max, r_fraction, n_segments); - // if(U_i >= 0 && U_i < n_segments){ - if( in_bbox(U,V,W,bbox) ){ - rsqr_maxs_d[U_i] = max(rsqr_maxs_d[U_i], float(r_sqr)); - theta_min = min(theta_min, theta); - theta_max = max(theta_max, theta); - // W_min = min(W_min, W); - } else { - // Otherwise we've calculated it wrong! - // fprintf(stderr,"U-coordinate out of bounds: U_i = %ld, U = %g, U_min = %g, U_max = %g\n",U_i,U,U_min,U_max); + float *rsqr_maxs_d = rsqr_maxs.data; + float *profile_d = profile.data; + + // First pass computes some bounds -- possibly separate out to avoid repeating + loop_mask_start(implant_mask, solid_implant_mask, + (maskin_buffer[:this_block_length], rsqr_maxs_d[:n_segments], Muvw[:16], bbox[:6]) ); + if (mask_value) { + auto [U,V,W,c] = hom_transform(Xs,Muvw); + + real_t r_sqr = V*V+W*W; + real_t theta = atan2(V,W); + + int U_i = floor((U-U_min)*(n_segments-1)/(U_max-U_min)); + + // if (U_i >= 0 && U_i < n_segments) { + if ( in_bbox(U,V,W,bbox) ) { + rsqr_maxs_d[U_i] = max(rsqr_maxs_d[U_i], float(r_sqr)); + theta_min = min(theta_min, theta); + theta_max = max(theta_max, theta); + // W_min = min(W_min, W); + } else { + // Otherwise we've calculated it wrong! + // fprintf(stderr,"U-coordinate out of bounds: U_i = %ld, U = %g, U_min = %g, U_max = %g\n",U_i,U,U_min,U_max); + } } - } - loop_mask_end(implant_mask); + loop_mask_end(implant_mask); - double theta_center = (theta_max+theta_min)/2; + double theta_center = (theta_max+theta_min)/2; - fprintf(stderr,"theta_min, theta_center, theta_max = %g,%g,%g\n", theta_min, theta_center, theta_max); + fprintf(stderr,"theta_min, theta_center, theta_max = %g,%g,%g\n", theta_min, theta_center, theta_max); - // Second pass does the actual work - loop_mask_start(implant_mask, solid_implant_mask, - (rsqr_maxs_d[:n_segments], profile_d[:n_segments]) ); - auto [U,V,W,c] = hom_transform(Xs,Muvw); - float r_sqr = V*V+W*W; - float theta = atan2(V,W); - int U_i = floor((U-U_min)*(n_segments-1)/(U_max-U_min)); + // Second pass does the actual work + loop_mask_start(implant_mask, solid_implant_mask, + (rsqr_maxs_d[:n_segments], profile_d[:n_segments]) ); + auto [U,V,W,c] = hom_transform(Xs,Muvw); + float r_sqr = V*V+W*W; + float theta = atan2(V,W); + int U_i = floor((U-U_min)*(n_segments-1)/(U_max-U_min)); - bool solid_mask_value = false; - if(U_i >= 0 && U_i < n_segments && W>=W_min){ // TODO: Full bounding box check? - solid_mask_value = mask_value | (r_sqr <= r_fraction*rsqr_maxs_d[U_i]); + bool solid_mask_value = false; + if (U_i >= 0 && U_i < n_segments && W >= W_min) { // TODO: Full bounding box check? + solid_mask_value = mask_value | (r_sqr <= r_fraction*rsqr_maxs_d[U_i]); - if(theta >= theta_min && theta <= theta_center && r_sqr <= rsqr_maxs_d[U_i]){ - atomic_statement() - profile_d[U_i] += solid_mask_value; + if (theta >= theta_min && theta <= theta_center && r_sqr <= rsqr_maxs_d[U_i]) { + atomic_statement() + profile_d[U_i] += solid_mask_value; + } } - } - maskout_buffer[k] = solid_mask_value; - - loop_mask_end(implant_mask); + maskout_buffer[k] = solid_mask_value; + + loop_mask_end(implant_mask); } - + void compute_front_mask(const input_ndarray<mask_type> solid_implant, - const float voxel_size, - const matrix4x4 &Muvw, - std::array<float,6> bbox, - output_ndarray<mask_type> front_mask) -{ - const auto [U_min,U_max,V_min,V_max,W_min,W_max] = bbox; - - loop_mask_start(solid_implant, front_mask, - () ); - - if(!mask_value){ - auto [U,V,W,c] = hom_transform(Xs,Muvw); - - maskout_buffer[k] = W>W_min; - } else - maskout_buffer[k] = 0; - - loop_mask_end(solid_implant) + const float voxel_size, + const matrix4x4 &Muvw, + std::array<float,6> bbox, + output_ndarray<mask_type> front_mask) { + const auto [U_min,U_max,V_min,V_max,W_min,W_max] = bbox; + + loop_mask_start(solid_implant, front_mask, () ); + + if (!mask_value) { + auto [U,V,W,c] = hom_transform(Xs,Muvw); + maskout_buffer[k] = W>W_min; + } else + maskout_buffer[k] = 0; + + loop_mask_end(solid_implant) } - void cylinder_projection(const input_ndarray<float> edt, // Euclidean Distance Transform in um, should be low-resolution (will be interpolated) - const input_ndarray<uint8_t> C, // Material classification images (probability per voxel, 0..1 -> 0..255) - float voxel_size, // Voxel size for Cs - float d_min, float d_max, // Distance shell to map to cylinder - float theta_min, float theta_max, // Angle range (wrt cylinder center) - std::array<float,6> bbox, - const matrix4x4 &Muvw, // Transform from zyx (in um) to U'V'W' cylinder FoR (in um) - output_ndarray<float> image, // Probability-weighted volume of (class,theta,U)-voxels - output_ndarray<int64_t> count // Number of (class,theta,U)-voxels - ) -{ - ssize_t n_theta = image.shape[0], n_U = image.shape[1]; - - const auto& [U_min,U_max,V_min,V_max,W_min,W_max] = bbox; - - ssize_t ex = edt.shape[0], ey = edt.shape[1], ez = edt.shape[2]; - ssize_t Cx = C.shape[0], Cy = C.shape[1], Cz = C.shape[2]; - - real_t edx = ex/real_t(Cx), edy = ey/real_t(Cy), edz = ex/real_t(Cz); - - ssize_t edt_length = ex*ey*ez; - ssize_t C_length = Cx*Cy*Cz; + const input_ndarray<uint8_t> C, // Material classification images (probability per voxel, 0..1 -> 0..255) + float voxel_size, // Voxel size for Cs + float d_min, float d_max, // Distance shell to map to cylinder + float theta_min, float theta_max, // Angle range (wrt cylinder center) + std::array<float,6> bbox, + const matrix4x4 &Muvw, // Transform from zyx (in um) to U'V'W' cylinder FoR (in um) + output_ndarray<float> image, // Probability-weighted volume of (class,theta,U)-voxels + output_ndarray<int64_t> count // Number of (class,theta,U)-voxels + ){ + ssize_t n_theta = image.shape[0], n_U = image.shape[1]; + + const auto& [U_min,U_max,V_min,V_max,W_min,W_max] = bbox; + + ssize_t ex = edt.shape[0], ey = edt.shape[1], ez = edt.shape[2]; + ssize_t Cx = C.shape[0], Cy = C.shape[1], Cz = C.shape[2]; + + real_t edx = ex/real_t(Cx), edy = ey/real_t(Cy), edz = ex/real_t(Cz); + + ssize_t edt_length = ex*ey*ez; + ssize_t C_length = Cx*Cy*Cz; - printf("Segmenting from %g to %g micrometers distance of implant.\n",d_min,d_max); + printf("Segmenting from %g to %g micrometers distance of implant.\n",d_min,d_max); - printf("Bounding box is [U_min,U_max,V_min,V_max,W_min,W_max] = [[%g,%g],[%g,%g],[%g,%g]]\n", - U_min,U_max,V_min,V_max,W_min,W_max); - printf("EDT field is (%ld,%ld,%ld)\n",ex,ey,ez); - - real_t th_min = 1234, th_max = -1234; - ssize_t n_shell = 0; - ssize_t n_shell_bbox = 0; + printf("Bounding box is [U_min,U_max,V_min,V_max,W_min,W_max] = [[%g,%g],[%g,%g],[%g,%g]]\n", + U_min,U_max,V_min,V_max,W_min,W_max); + printf("EDT field is (%ld,%ld,%ld)\n",ex,ey,ez); + + real_t th_min = 1234, th_max = -1234; + ssize_t n_shell = 0; + ssize_t n_shell_bbox = 0; - ssize_t block_height = 64; - - //TODO: new acc/openmp macro in parallel.hh - { - float *image_d = image.data; - int64_t *count_d = count.data; - - for(ssize_t block_start=0, edt_block_start=0; - block_start<C_length; - block_start+=block_height*Cy*Cz, edt_block_start+=block_height*ey*ez){ - const uint8_t *C_buffer = C.data + block_start; - const float *edt_block = edt.data + max(block_start-ey*ez,0L); - - ssize_t this_block_length = min(block_height*Cy*Cz,C_length-block_start); - ssize_t this_edt_length = min((block_height+2)*ey*ez,edt_length-block_start); - - //#pragma acc parallel loop copy(C_buffer[:this_block_length], image_d[:n_theta*n_U], count_d[:n_theta*n_U], bbox[:6], Muvw[:16], edt_block[:this_edt_length]) reduction(+:n_shell,n_shell_bbox) - #pragma omp parallel for reduction(+:n_shell,n_shell_bbox) - for(int64_t k = 0; k<this_block_length;k++){ - const int64_t flat_idx = block_start + k; - const int64_t X = (flat_idx / (Cy*Cz)), Y = (flat_idx / Cz) % Cy, Z = flat_idx % Cz; // Integer indices: Cs[c,X,Y,Z] - // Index into local block - const int64_t Xl = (k / (Cy*Cz)), Yl = (k / Cz) % Cy, Zl = k % Cz; - // Index into local edt block. Note EDT has 1-slice padding top+bottom - const float x = (Xl+1)*edx, y = Yl*edy, z = Zl*edy; - - if(x>block_height){ - printf("Block number k=%ld.\nX,Y,Z=%ld,%ld,%ld\nXl,Yl,Zl=%ld,%ld,%ld\nx,y,z=%.2f, %.2f, %.2f\n",k,X,Y,Z,Xl,Yl,Zl,x,y,z); - abort(); - } - - //****** MEAT OF THE IMPLEMENTATION IS HERE ****** - real_t distance = resample2x2x2<float>(edt_block,{this_edt_length/(ey*ez),ey,ez}, - {x,y,z}); - - if(distance > d_min && distance <= d_max){ // TODO: and W>w_min - array<real_t,4> Xs = {X*voxel_size, Y*voxel_size, Z*voxel_size, 1}; - auto [U,V,W,c] = hom_transform(Xs,Muvw); - n_shell ++; - - // printf("distance = %.1f, U,V,W = %.2f,%.2f,%.2f\n",distance,U,V,W); - if(in_bbox(U,V,W,bbox) ){ - - real_t theta = atan2(V,W); - - if(theta>=theta_min && theta<=theta_max){ - n_shell_bbox++; - - - ssize_t theta_i = floor( (theta-theta_min) * (n_theta-1)/(theta_max-theta_min) ); - ssize_t U_i = floor( (U - U_min) * (n_U -1)/( U_max- U_min) ); - - real_t p = C_buffer[k]/255.; - - assert(theta >= theta_min); - assert(theta <= theta_max); - assert(U >= U_min); - assert(U <= U_max); - assert(theta_i >= 0); - assert(theta_i < n_theta); - assert(U_i >= 0); - assert(U_i < n_U); - - if(p>0){ - th_min = min(theta,th_min); - th_max = max(theta,th_max); - - atomic_statement() - image_d[theta_i*n_U + U_i] += p; - - atomic_statement() - count_d[theta_i*n_U + U_i] += 1; - } - } - } - } - } - } - } - printf("n_shell = %ld, n_shell_bbox = %ld\n",n_shell,n_shell_bbox); - printf("theta_min, theta_max = %.2f,%.2f\n",theta_min,theta_max); - printf("th_min, th_max = %.2f,%.2f\n",th_min,th_max); + ssize_t block_height = 64; + + //TODO: new acc/openmp macro in parallel.hh + { + float *image_d = image.data; + int64_t *count_d = count.data; + + for (ssize_t block_start = 0, edt_block_start = 0; block_start < C_length; block_start += block_height*Cy*Cz, edt_block_start += block_height*ey*ez) { + const uint8_t *C_buffer = C.data + block_start; + const float *edt_block = edt.data + max(block_start-ey*ez,0L); + + ssize_t this_block_length = min(block_height*Cy*Cz,C_length-block_start); + ssize_t this_edt_length = min((block_height+2)*ey*ez,edt_length-block_start); + + //#pragma acc parallel loop copy(C_buffer[:this_block_length], image_d[:n_theta*n_U], count_d[:n_theta*n_U], bbox[:6], Muvw[:16], edt_block[:this_edt_length]) reduction(+:n_shell,n_shell_bbox) + #pragma omp parallel for reduction(+:n_shell,n_shell_bbox) + for (int64_t k = 0; k < this_block_length; k++) { + const int64_t flat_idx = block_start + k; + const int64_t X = (flat_idx / (Cy*Cz)), Y = (flat_idx / Cz) % Cy, Z = flat_idx % Cz; // Integer indices: Cs[c,X,Y,Z] + // Index into local block + const int64_t Xl = (k / (Cy*Cz)), Yl = (k / Cz) % Cy, Zl = k % Cz; + // Index into local edt block. Note EDT has 1-slice padding top+bottom + const float x = (Xl+1)*edx, y = Yl*edy, z = Zl*edy; + + if (x > block_height) { + printf("Block number k=%ld.\nX,Y,Z=%ld,%ld,%ld\nXl,Yl,Zl=%ld,%ld,%ld\nx,y,z=%.2f, %.2f, %.2f\n",k,X,Y,Z,Xl,Yl,Zl,x,y,z); + abort(); + } + + //****** MEAT OF THE IMPLEMENTATION IS HERE ****** + real_t distance = resample2x2x2<float>(edt_block, {this_edt_length/(ey*ez),ey,ez}, {x,y,z}); + + if (distance > d_min && distance <= d_max) { // TODO: and W>w_min + array<real_t,4> Xs = {X*voxel_size, Y*voxel_size, Z*voxel_size, 1}; + auto [U,V,W,c] = hom_transform(Xs,Muvw); + n_shell ++; + + // printf("distance = %.1f, U,V,W = %.2f,%.2f,%.2f\n",distance,U,V,W); + if (in_bbox(U,V,W,bbox)) { + real_t theta = atan2(V,W); + + if (theta >= theta_min && theta <= theta_max) { + n_shell_bbox++; + + ssize_t theta_i = floor( (theta-theta_min) * (n_theta-1)/(theta_max-theta_min) ); + ssize_t U_i = floor( (U - U_min) * (n_U -1)/( U_max- U_min) ); + + real_t p = C_buffer[k]/255.; + + assert(theta >= theta_min); + assert(theta <= theta_max); + assert(U >= U_min); + assert(U <= U_max); + assert(theta_i >= 0); + assert(theta_i < n_theta); + assert(U_i >= 0); + assert(U_i < n_U); + + if (p > 0) { + th_min = min(theta,th_min); + th_max = max(theta,th_max); + + atomic_statement() + image_d[theta_i*n_U + U_i] += p; + + atomic_statement() + count_d[theta_i*n_U + U_i] += 1; + } + } + } + } + } + } + } + printf("n_shell = %ld, n_shell_bbox = %ld\n",n_shell,n_shell_bbox); + printf("theta_min, theta_max = %.2f,%.2f\n",theta_min,theta_max); + printf("th_min, th_max = %.2f,%.2f\n",th_min,th_max); } - From 3ee49868fb745e3430aa3ee357dd15d48a9195f2 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 13 Feb 2023 14:21:38 +0100 Subject: [PATCH 073/136] #25 Merged center_of_mass from geometry --- src/Makefile | 2 +- src/lib/cpp/cpu/geometry.cc | 37 ++++++ src/lib/cpp/cpu_seq/geometry.cc | 77 +++++------ src/lib/cpp/gpu/geometry.cc | 50 +++++++ src/lib/cpp/include/datatypes.hh | 13 +- src/lib/cpp/include/geometry.hh | 20 +++ src/pybind/geometry-pybind.cc | 222 ++++++++++++++----------------- 7 files changed, 248 insertions(+), 173 deletions(-) create mode 100644 src/lib/cpp/cpu/geometry.cc create mode 100644 src/lib/cpp/gpu/geometry.cc create mode 100644 src/lib/cpp/include/geometry.hh diff --git a/src/Makefile b/src/Makefile index e26cb0c..1d73b51 100644 --- a/src/Makefile +++ b/src/Makefile @@ -6,7 +6,7 @@ CPP_FOLDER=lib/cpp #CXXFLAGS += -I../contrib/cpptqdm/ -Iinclude CXXFLAGS += -I$(CPP_FOLDER)/include PLATFORMS=cpu_seq cpu gpu -LIBS=io morphology +LIBS=io geometry morphology TARGETS = $(foreach PLATFORM, $(PLATFORMS), $(foreach LIB, $(LIBS), $(CPP_FOLDER)/$(PLATFORM)/$(LIB)$(PYBIND_SUFFIX))) CLEANUP = $(TARGETS) $(foreach PLATFORM, $(PLATFORMS), $(CPP_FOLDER)/$(PLATFORM)/__pycache__) diff --git a/src/lib/cpp/cpu/geometry.cc b/src/lib/cpp/cpu/geometry.cc new file mode 100644 index 0000000..bd32cfb --- /dev/null +++ b/src/lib/cpp/cpu/geometry.cc @@ -0,0 +1,37 @@ +#include <assert.h> +#include <inttypes.h> +#include <stdio.h> +#include <math.h> +using namespace std; + +#include "geometry.hh" + +array<real_t,3> center_of_mass(const input_ndarray<mask_type> voxels) { + uint64_t cmx = 0, cmy = 0, cmz = 0; + size_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; + int64_t image_length = Nx*Ny*Nz; + + print_timestamp("center_of_mass start"); + + uint64_t total_mass = 0; + + #pragma omp parallel for reduction(+:total_mass,cmx,cmy,cmz) + for (int64_t k = 0; k < image_length; k++) { + mask_type m = voxels.data[k]; + + int64_t x = k / (Ny*Nz); + int64_t y = (k / Nz) % Ny; + int64_t z = k % Nz; + + total_mass += m; + cmx += m*x; cmy += m*y; cmz += m*z; + } + real_t + rcmx = cmx / ((real_t) total_mass), + rcmy = cmy / ((real_t) total_mass), + rcmz = cmz / ((real_t) total_mass); + + print_timestamp("center_of_mass end"); + + return array<real_t,3>{ rcmx, rcmy, rcmz }; +} \ No newline at end of file diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index 2a155aa..a3778f4 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -1,55 +1,38 @@ // TODO: Coordinates are named X,Y,Z in c++, but Z,Y,X in python. Homogenize to X,Y,Z! -#include <chrono> #include <assert.h> #include <inttypes.h> #include <stdio.h> #include <math.h> using namespace std; -#include "datatypes.hh" -#include "parallel.hh" +#include "geometry.hh" -#define dot(a,b) (a[0]*b[0] + a[1]*b[1] + a[2]*b[2]) - -void print_timestamp(string message) { - auto now = chrono::system_clock::to_time_t(chrono::system_clock::now()); - tm local_tm = *localtime(&now); - fprintf(stderr,"%s at %02d:%02d:%02d\n", message.c_str(), local_tm.tm_hour, local_tm.tm_min, local_tm.tm_sec); -} - -// TODO: Fix OpenACC copies & re-enable GPU array<real_t,3> center_of_mass(const input_ndarray<mask_type> voxels) { - // nvc++ doesn't support OpenACC 2.7 array reductions yet. - real_t cmx = 0, cmy = 0, cmz = 0; - size_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; + uint64_t cmx = 0, cmy = 0, cmz = 0; + size_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; int64_t image_length = Nx*Ny*Nz; print_timestamp("center_of_mass start"); - real_t total_mass = 0; - for (int64_t block_start = 0; block_start < image_length; block_start += acc_block_size) { - - const mask_type *buffer = voxels.data + block_start; - ssize_t this_block_length = min(acc_block_size, image_length-block_start); - //#pragma acc parallel loop reduction(+:cmx,cmy,cmz,total_mass) copyin(buffer[:this_block_length]) - //reduction_loop((+:cmx,cmy,cmz,total_mass),()) - for (int64_t k = 0; k < this_block_length; k++) { - real_t m = buffer[k]; + uint64_t total_mass = 0; + for (int64_t k = 0; k < image_length; k++) { + mask_type m = voxels.data[k]; - int64_t flat_idx = block_start + k; - int64_t x = flat_idx / (Ny*Nz); - int64_t y = (flat_idx / Nz) % Ny; - int64_t z = flat_idx % Nz; + int64_t x = k / (Ny*Nz); + int64_t y = (k / Nz) % Ny; + int64_t z = k % Nz; - total_mass += m; - cmx += m*x; cmy += m*y; cmz += m*z; - } + total_mass += m; + cmx += m*x; cmy += m*y; cmz += m*z; } - cmx /= total_mass; cmy /= total_mass; cmz /= total_mass; + real_t + rcmx = cmx / ((real_t) total_mass), + rcmy = cmy / ((real_t) total_mass), + rcmz = cmz / ((real_t) total_mass); print_timestamp("center_of_mass end"); - return array<real_t,3>{cmx,cmy,cmz}; + return array<real_t,3>{ rcmx, rcmy, rcmz }; } array<real_t,9> inertia_matrix_serial(const input_ndarray<mask_type> &voxels, const array<real_t,3> &cm) { @@ -100,7 +83,7 @@ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const arr const mask_type *buffer = voxels.data + block_start; ssize_t block_length = min(acc_block_size, image_length-block_start); - reduction_loop((+:M00,M01,M02,M11,M12,M22),()) + //reduction_loop((+:M00,M01,M02,M11,M12,M22),()) for (int64_t k = 0; k < block_length; k++) { //\if (buffer[k] != 0) int64_t flat_idx = block_start + k; real_t xs[3] = { @@ -143,7 +126,7 @@ void integrate_axes(const input_ndarray<mask_type> &voxels, int block_length = min(acc_block_size,image_length-block_start); //#pragma acc parallel loop copy(output_data[:Nv*Nw]) copyin(buffer[:block_length], x0, v_axis, w_axis) - parallel_loop((output_data[:Nv*Nw])) + //parallel_loop((output_data[:Nv*Nw])) for (int64_t k = 0; k < block_length; k++) { if (buffer[k] != 0) { int64_t flat_idx = block_start + k; @@ -157,7 +140,7 @@ void integrate_axes(const input_ndarray<mask_type> &voxels, int64_t i_v = round(v-v_min), j_w = round(w-w_min); if (i_v >= 0 && j_w >= 0 && i_v < Nv && j_w < Nw) { - atomic_statement() + //atomic_statement() output_data[i_v*Nw + j_w] += voxel; } } @@ -277,7 +260,7 @@ void zero_outside_bbox(const array<real_t,9> &principal_axes, mask_type *buffer = voxels.data + block_start; ssize_t this_block_length = min(acc_block_size, image_length-block_start); - parallel_loop((buffer[:this_block_length])) + //parallel_loop((buffer[:this_block_length])) for (int64_t k = 0; k < this_block_length; k++) { int64_t flat_idx = block_start + k; int64_t x = flat_idx / (Ny*Nz); @@ -337,7 +320,7 @@ inline vector4 hom_transform(const vector4 &x, const matrix4x4 &M) { #define loop_mask_end(mask) }}} - +/* void fill_implant_mask(const input_ndarray<mask_type> implant_mask, float voxel_size, const array<float,6> &bbox, @@ -360,8 +343,7 @@ void fill_implant_mask(const input_ndarray<mask_type> implant_mask, float *profile_d = profile.data; // First pass computes some bounds -- possibly separate out to avoid repeating - loop_mask_start(implant_mask, solid_implant_mask, - (maskin_buffer[:this_block_length], rsqr_maxs_d[:n_segments], Muvw[:16], bbox[:6]) ); + //loop_mask_start(implant_mask, solid_implant_mask, (maskin_buffer[:this_block_length], rsqr_maxs_d[:n_segments], Muvw[:16], bbox[:6]) ); if (mask_value) { auto [U,V,W,c] = hom_transform(Xs,Muvw); @@ -381,14 +363,14 @@ void fill_implant_mask(const input_ndarray<mask_type> implant_mask, // fprintf(stderr,"U-coordinate out of bounds: U_i = %ld, U = %g, U_min = %g, U_max = %g\n",U_i,U,U_min,U_max); } } - loop_mask_end(implant_mask); + //loop_mask_end(implant_mask); double theta_center = (theta_max+theta_min)/2; fprintf(stderr,"theta_min, theta_center, theta_max = %g,%g,%g\n", theta_min, theta_center, theta_max); // Second pass does the actual work - loop_mask_start(implant_mask, solid_implant_mask, + //loop_mask_start(implant_mask, solid_implant_mask, (rsqr_maxs_d[:n_segments], profile_d[:n_segments]) ); auto [U,V,W,c] = hom_transform(Xs,Muvw); float r_sqr = V*V+W*W; @@ -400,15 +382,15 @@ void fill_implant_mask(const input_ndarray<mask_type> implant_mask, solid_mask_value = mask_value | (r_sqr <= r_fraction*rsqr_maxs_d[U_i]); if (theta >= theta_min && theta <= theta_center && r_sqr <= rsqr_maxs_d[U_i]) { - atomic_statement() + //atomic_statement() profile_d[U_i] += solid_mask_value; } } maskout_buffer[k] = solid_mask_value; - loop_mask_end(implant_mask); + //loop_mask_end(implant_mask); } - + void compute_front_mask(const input_ndarray<mask_type> solid_implant, const float voxel_size, const matrix4x4 &Muvw, @@ -426,6 +408,7 @@ void compute_front_mask(const input_ndarray<mask_type> solid_implant, loop_mask_end(solid_implant) } +*/ void cylinder_projection(const input_ndarray<float> edt, // Euclidean Distance Transform in um, should be low-resolution (will be interpolated) const input_ndarray<uint8_t> C, // Material classification images (probability per voxel, 0..1 -> 0..255) @@ -521,10 +504,10 @@ void cylinder_projection(const input_ndarray<float> edt, // Euclidean Distance th_min = min(theta,th_min); th_max = max(theta,th_max); - atomic_statement() + //atomic_statement() image_d[theta_i*n_U + U_i] += p; - atomic_statement() + //atomic_statement() count_d[theta_i*n_U + U_i] += 1; } } diff --git a/src/lib/cpp/gpu/geometry.cc b/src/lib/cpp/gpu/geometry.cc new file mode 100644 index 0000000..0ff35a1 --- /dev/null +++ b/src/lib/cpp/gpu/geometry.cc @@ -0,0 +1,50 @@ +#include <assert.h> +#include <inttypes.h> +#include <stdio.h> +#include <math.h> +using namespace std; + +#include "geometry.hh" + +array<real_t,3> center_of_mass(const input_ndarray<mask_type> voxels) { + // nvc++ doesn't support OpenACC 2.7 array reductions yet. + uint64_t cmx = 0, cmy = 0, cmz = 0; + size_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; + int64_t image_length = Nx*Ny*Nz; + + print_timestamp("center_of_mass start"); + + uint64_t total_mass = 0; + + #pragma acc data copy(total_mass,cmx,cmy,cmz) + { + for (int64_t block_start = 0; block_start < image_length; block_start += acc_block_size) { + const mask_type *buffer = voxels.data + block_start; + ssize_t this_block_size = min(acc_block_size, image_length-block_start); + + #pragma acc data copyin(buffer[:this_block_size]) + { + #pragma acc parallel loop reduction(+:total_mass,cmx,cmy,cmz) + for (int64_t k = 0; k < this_block_size; k++) { + mask_type m = buffer[k]; + + int64_t flat_idx = block_start + k; + int64_t x = flat_idx / (Ny*Nz); + int64_t y = (flat_idx / Nz) % Ny; + int64_t z = flat_idx % Nz; + + total_mass += m; + cmx += m*x; cmy += m*y; cmz += m*z; + } + } + } + } + real_t + rcmx = cmx / ((real_t) total_mass), + rcmy = cmy / ((real_t) total_mass), + rcmz = cmz / ((real_t) total_mass); + + print_timestamp("center_of_mass end"); + + return array<real_t,3>{rcmx, rcmy, rcmz}; +} \ No newline at end of file diff --git a/src/lib/cpp/include/datatypes.hh b/src/lib/cpp/include/datatypes.hh index 4f92c5b..3defac0 100644 --- a/src/lib/cpp/include/datatypes.hh +++ b/src/lib/cpp/include/datatypes.hh @@ -2,6 +2,9 @@ #define datatypes_h #include <array> #include <vector> +#include <pybind11/pybind11.h> +#include <pybind11/stl.h> +#include <pybind11/numpy.h> typedef uint8_t mask_type; // TODO: Template + explicit instantiation typedef uint16_t voxel_type; @@ -10,7 +13,15 @@ typedef uint16_t field_type; typedef float gauss_type; typedef float real_t; -constexpr ssize_t acc_block_size = 1024 * 1024 * 1024/sizeof(mask_type); // 1 GB +namespace py = pybind11; +template <typename voxel_type> +using np_array = py::array_t<voxel_type, py::array::c_style | py::array::forcecast>; + +typedef py::array_t<mask_type, py::array::c_style | py::array::forcecast> np_maskarray; +typedef py::array_t<real_t, py::array::c_style | py::array::forcecast> np_realarray; +typedef py::array_t<uint8_t, py::array::c_style | py::array::forcecast> np_bytearray; + +constexpr ssize_t acc_block_size = 1024 * 1024 * 1024 / sizeof(mask_type); // 1 GB struct plane_t { array<real_t,3> cm, u_axis, v_axis; diff --git a/src/lib/cpp/include/geometry.hh b/src/lib/cpp/include/geometry.hh new file mode 100644 index 0000000..7073758 --- /dev/null +++ b/src/lib/cpp/include/geometry.hh @@ -0,0 +1,20 @@ +#ifndef geometry_h +#define geometry_h + +using namespace std; + +#include "datatypes.hh" +#include <chrono> +#include <string> + +#define dot(a,b) (a[0]*b[0] + a[1]*b[1] + a[2]*b[2]) + +void print_timestamp(string message) { + auto now = chrono::system_clock::to_time_t(chrono::system_clock::now()); + tm local_tm = *localtime(&now); + fprintf(stderr,"%s at %02d:%02d:%02d\n", message.c_str(), local_tm.tm_hour, local_tm.tm_min, local_tm.tm_sec); +} + +array<real_t,3> center_of_mass(const input_ndarray<mask_type> voxels); + +#endif \ No newline at end of file diff --git a/src/pybind/geometry-pybind.cc b/src/pybind/geometry-pybind.cc index a7c1f0d..b738ef0 100644 --- a/src/pybind/geometry-pybind.cc +++ b/src/pybind/geometry-pybind.cc @@ -1,164 +1,138 @@ -#include <pybind11/pybind11.h> -#include <pybind11/stl.h> -#include <pybind11/numpy.h> - #include "geometry.cc" -namespace python_api { - namespace py = pybind11; - template <typename voxel_type> - using np_array = py::array_t<voxel_type, py::array::c_style | py::array::forcecast>; - - typedef py::array_t<mask_type, py::array::c_style | py::array::forcecast> np_maskarray; - typedef py::array_t<real_t, py::array::c_style | py::array::forcecast> np_realarray; - typedef py::array_t<uint8_t, py::array::c_style | py::array::forcecast> np_bytearray; +namespace python_api { - array<real_t,3> center_of_mass(const np_maskarray &np_voxels){ - auto voxels_info = np_voxels.request(); +array<real_t,3> center_of_mass(const np_maskarray &np_voxels){ + auto voxels_info = np_voxels.request(); return ::center_of_mass({voxels_info.ptr,voxels_info.shape}); - } - - - - array<real_t,9> inertia_matrix(const np_maskarray &np_voxels, array<real_t,3>& cm){ - auto voxels_info = np_voxels.request(); +} +/* +array<real_t,9> inertia_matrix(const np_maskarray &np_voxels, array<real_t,3>& cm){ + auto voxels_info = np_voxels.request(); - return ::inertia_matrix({voxels_info.ptr,voxels_info.shape}, cm); - } + return inertia_matrix({voxels_info.ptr,voxels_info.shape}, cm); +} - array<real_t,9> inertia_matrix_serial(const np_maskarray &np_voxels, array<real_t,3>& cm){ - auto voxels_info = np_voxels.request(); +array<real_t,9> inertia_matrix_serial(const np_maskarray &np_voxels, array<real_t,3>& cm){ + auto voxels_info = np_voxels.request(); - return ::inertia_matrix_serial({voxels_info.ptr,voxels_info.shape}, cm); - } - + return inertia_matrix_serial({voxels_info.ptr,voxels_info.shape}, cm); +} template <typename voxel_type> void sample_plane(const np_array<voxel_type> &np_voxels, - const real_t voxel_size, // In micrometers - const array<real_t,3> cm, - const array<real_t,3> u_axis, - const array<real_t,3> v_axis, - const array<real_t,4> bbox, // [umin,umax,vmin,vmax] in micrometers - np_array<float> np_plane_samples) - { + const real_t voxel_size, // In micrometers + const array<real_t,3> cm, + const array<real_t,3> u_axis, + const array<real_t,3> v_axis, + const array<real_t,4> bbox, // [umin,umax,vmin,vmax] in micrometers + np_array<float> np_plane_samples) { auto voxels_info = np_voxels.request(); auto plane_samples_info = np_plane_samples.request(); - ::sample_plane<voxel_type>({voxels_info.ptr, voxels_info.shape}, voxel_size, - cm,u_axis,v_axis,bbox, - {plane_samples_info.ptr, plane_samples_info.shape}); - } + sample_plane<voxel_type>({voxels_info.ptr, voxels_info.shape}, voxel_size, + cm,u_axis,v_axis,bbox, + {plane_samples_info.ptr, plane_samples_info.shape}); +} - - - void integrate_axes(const np_maskarray &np_voxels, - const array<real_t,3> &x0, - const array<real_t,3> &v_axis, - const array<real_t,3> &w_axis, - const real_t v_min, const real_t w_min, - np_realarray &output) - { +void integrate_axes(const np_maskarray &np_voxels, + const array<real_t,3> &x0, + const array<real_t,3> &v_axis, + const array<real_t,3> &w_axis, + const real_t v_min, const real_t w_min, + np_realarray &output) { auto voxels_info = np_voxels.request(); auto output_info = output.request(); - ::integrate_axes({voxels_info.ptr, voxels_info.shape}, - x0,v_axis,w_axis, - v_min, w_min, - {output_info.ptr, output_info.shape}); - } + integrate_axes({voxels_info.ptr, voxels_info.shape}, + x0,v_axis,w_axis, + v_min, w_min, + {output_info.ptr, output_info.shape}); +} - void zero_outside_bbox(const array<real_t,9> &principal_axes, - const array<real_t,6> ¶meter_ranges, - const array<real_t,3> &cm, // TOOD: Med eller uden voxelsize? - np_maskarray &np_voxels) - { +void zero_outside_bbox(const array<real_t,9> &principal_axes, + const array<real_t,6> ¶meter_ranges, + const array<real_t,3> &cm, // TOOD: Med eller uden voxelsize? + np_maskarray &np_voxels) { auto voxels_info = np_voxels.request(); - ::zero_outside_bbox(principal_axes, - parameter_ranges, - cm, - {voxels_info.ptr, voxels_info.shape}); - } + zero_outside_bbox(principal_axes, + parameter_ranges, + cm, + {voxels_info.ptr, voxels_info.shape}); +} void fill_implant_mask(const np_maskarray implant_mask, - float voxel_size, - const array<float,6> &bbox, - float r_fraction, - const matrix4x4 &Muvw, - np_maskarray solid_implant_mask, - np_array<float> rsqr_maxs, - np_array<float> profile - ) -{ - auto implant_info = implant_mask.request(), - solid_implant_info = solid_implant_mask.request(), - rsqr_info = rsqr_maxs.request(), - profile_info = profile.request(); - - return ::fill_implant_mask({implant_info.ptr, implant_info.shape}, - voxel_size, bbox, r_fraction, Muvw, - {solid_implant_info.ptr, solid_implant_info.shape}, - {rsqr_info.ptr, rsqr_info.shape}, - {profile_info.ptr, profile_info.shape} - ); + float voxel_size, + const array<float,6> &bbox, + float r_fraction, + const matrix4x4 &Muvw, + np_maskarray solid_implant_mask, + np_array<float> rsqr_maxs, + np_array<float> profile + ) { + auto implant_info = implant_mask.request(), + solid_implant_info = solid_implant_mask.request(), + rsqr_info = rsqr_maxs.request(), + profile_info = profile.request(); + + return fill_implant_mask({implant_info.ptr, implant_info.shape}, + voxel_size, bbox, r_fraction, Muvw, + {solid_implant_info.ptr, solid_implant_info.shape}, + {rsqr_info.ptr, rsqr_info.shape}, + {profile_info.ptr, profile_info.shape} + ); } void compute_front_mask(const np_array<uint8_t> &np_solid_implant, - const float voxel_size, - const matrix4x4 &Muvw, - std::array<float,6> bbox, - np_array<mask_type> &np_front_mask) -{ - auto solid_implant_info = np_solid_implant.request(); - auto front_mask_info = np_front_mask.request(); - - ::compute_front_mask({solid_implant_info.ptr, solid_implant_info.shape}, - voxel_size, Muvw, bbox, - {front_mask_info.ptr, front_mask_info.shape}); + const float voxel_size, + const matrix4x4 &Muvw, + std::array<float,6> bbox, + np_array<mask_type> &np_front_mask) { + auto solid_implant_info = np_solid_implant.request(); + auto front_mask_info = np_front_mask.request(); + + ::compute_front_mask({solid_implant_info.ptr, solid_implant_info.shape}, + voxel_size, Muvw, bbox, + {front_mask_info.ptr, front_mask_info.shape}); } - - void cylinder_projection(const np_array<float> &np_edt, // Euclidean Distance Transform in um, should be low-resolution (will be interpolated) - const np_bytearray &np_Cs, // Material classification images (probability per voxel, 0..1 -> 0..255) - float Cs_voxel_size, // Voxel size for Cs - float d_min, float d_max, // Distance shell to map to cylinder - float theta_min, float theta_max, // Angle range (wrt cylinder center) - const array<float,6> &bbox, // Implant bounding box (in U'V'W'-coordinates) - const matrix4x4 &Muvw, // Transform from zyx (in um) to U'V'W' cylinder FoR (in um) - np_array<float> &np_images, // Probability-weighted volume of (class,theta,U)-voxels - np_array<uint64_t> &np_counts // Number of (class,theta,U)-voxels - ) - { +void cylinder_projection(const np_array<float> &np_edt, // Euclidean Distance Transform in um, should be low-resolution (will be interpolated) + const np_bytearray &np_Cs, // Material classification images (probability per voxel, 0..1 -> 0..255) + float Cs_voxel_size, // Voxel size for Cs + float d_min, float d_max, // Distance shell to map to cylinder + float theta_min, float theta_max, // Angle range (wrt cylinder center) + const array<float,6> &bbox, // Implant bounding box (in U'V'W'-coordinates) + const matrix4x4 &Muvw, // Transform from zyx (in um) to U'V'W' cylinder FoR (in um) + np_array<float> &np_images, // Probability-weighted volume of (class,theta,U)-voxels + np_array<uint64_t> &np_counts // Number of (class,theta,U)-voxels + ) { auto edt_info = np_edt.request(); auto Cs_info = np_Cs.request(); auto images_info = np_images.request(); auto counts_info = np_counts.request(); ::cylinder_projection({edt_info.ptr,edt_info.shape}, - {Cs_info.ptr, Cs_info.shape}, - Cs_voxel_size,d_min,d_max,theta_min,theta_max,bbox,Muvw, - {images_info.ptr, images_info.shape}, - {counts_info.ptr, counts_info.shape}); - } - -} - - - + {Cs_info.ptr, Cs_info.shape}, + Cs_voxel_size,d_min,d_max,theta_min,theta_max,bbox,Muvw, + {images_info.ptr, images_info.shape}, + {counts_info.ptr, counts_info.shape}); +}*/ +} + PYBIND11_MODULE(geometry, m) { m.doc() = "Voxel Geometry Module"; // optional module docstring m.def("center_of_mass", &python_api::center_of_mass); - m.def("inertia_matrix", &python_api::inertia_matrix); - m.def("inertia_matrix_serial",&python_api::inertia_matrix_serial); - m.def("integrate_axes", &python_api::integrate_axes); - m.def("zero_outside_bbox", &python_api::zero_outside_bbox); - m.def("fill_implant_mask", &python_api::fill_implant_mask); - m.def("cylinder_projection", &python_api::cylinder_projection); - m.def("sample_plane", &python_api::sample_plane<uint16_t>); - m.def("sample_plane", &python_api::sample_plane<uint8_t>); - m.def("compute_front_mask", &python_api::compute_front_mask); + //m.def("inertia_matrix", &python_api::inertia_matrix); + //m.def("inertia_matrix_serial",&python_api::inertia_matrix_serial); + //m.def("integrate_axes", &python_api::integrate_axes); + //m.def("zero_outside_bbox", &python_api::zero_outside_bbox); + //m.def("fill_implant_mask", &python_api::fill_implant_mask); + //m.def("cylinder_projection", &python_api::cylinder_projection); + //m.def("sample_plane", &python_api::sample_plane<uint16_t>); + //m.def("sample_plane", &python_api::sample_plane<uint8_t>); + //m.def("compute_front_mask", &python_api::compute_front_mask); } From 8a17c7107097e01f0b1c854cd3cf6a94ee53a9cf Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 13 Feb 2023 14:22:03 +0100 Subject: [PATCH 074/136] #16 Added unit test for center_of_mass --- src/test/test_geometry.py | 45 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 src/test/test_geometry.py diff --git a/src/test/test_geometry.py b/src/test/test_geometry.py new file mode 100644 index 0000000..4b32258 --- /dev/null +++ b/src/test/test_geometry.py @@ -0,0 +1,45 @@ +''' +Unit tests for the geometry library. +''' +import sys +sys.path.append(sys.path[0]+'/../lib/cpp') +import cpu_seq.geometry as m_cpu_seq +import cpu.geometry as m_cpu +import gpu.geometry as m_gpu + +import datetime +from functools import partial +import numpy as np +import pytest + +# Parameters +#n = 2344 # ~12 GB, used for testing whether blocked works. +n = 128 + +def run_with_warmup(f): + f() + start = datetime.datetime.now() + result = f() + end = datetime.datetime.now() + return result, end - start + +def test_center_of_mass(): + voxels = np.random.randint(0, 255, (n,n,n), np.uint8) + + baseline_f = partial(m_cpu_seq.center_of_mass, voxels) + cpu_f = partial(m_cpu.center_of_mass, voxels) + gpu_f = partial(m_gpu.center_of_mass, voxels) + + baseline, baseline_t = run_with_warmup(baseline_f) + print (f'Sequential ran in {baseline_t}') + + cpu, cpu_t = run_with_warmup(cpu_f) + print (f'Parallel CPU ran in {cpu_t}, which is {baseline_t / cpu_t} times faster than sequential') + assert np.allclose(baseline, cpu) + + gpu, gpu_t = run_with_warmup(gpu_f) + print (f'GPU ran in {gpu_t}, which is {baseline_t / gpu_t} times faster than sequential') + assert np.allclose(baseline, gpu) + +if __name__ == '__main__': + test_center_of_mass() \ No newline at end of file From caf0934ebc580f039c2aa3628a7b7c4c38eafebc Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 13 Feb 2023 14:22:36 +0100 Subject: [PATCH 075/136] #15 Removed unfair warmup for morphology benchmark --- src/test/test_morphology.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/test/test_morphology.py b/src/test/test_morphology.py index d99ccae..c48cc40 100644 --- a/src/test/test_morphology.py +++ b/src/test/test_morphology.py @@ -38,7 +38,6 @@ def test_morphology(r, m, op, nd): result = np.empty_like(implant_mask) f = getattr(m, f'{op}_3d_sphere') - f(implant_mask, r, result) fsta = datetime.datetime.now() f(implant_mask, r, result) fend = datetime.datetime.now() From abc215efc7997b98966a0313d5a2a644fcc9ccab Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 13 Feb 2023 21:03:10 +0100 Subject: [PATCH 076/136] #25 Moved inertia_matrix from geometry.cc --- src/lib/cpp/cpu/geometry.cc | 47 +++++++++++++++++++++- src/lib/cpp/cpu_seq/geometry.cc | 70 ++++++++++----------------------- src/lib/cpp/gpu/geometry.cc | 57 +++++++++++++++++++++++++++ src/lib/cpp/include/geometry.hh | 7 ++-- src/pybind/geometry-pybind.cc | 15 +++---- src/test/test_geometry.py | 54 +++++++++++++++++++------ 6 files changed, 173 insertions(+), 77 deletions(-) diff --git a/src/lib/cpp/cpu/geometry.cc b/src/lib/cpp/cpu/geometry.cc index bd32cfb..4a41724 100644 --- a/src/lib/cpp/cpu/geometry.cc +++ b/src/lib/cpp/cpu/geometry.cc @@ -14,7 +14,7 @@ array<real_t,3> center_of_mass(const input_ndarray<mask_type> voxels) { print_timestamp("center_of_mass start"); uint64_t total_mass = 0; - + #pragma omp parallel for reduction(+:total_mass,cmx,cmy,cmz) for (int64_t k = 0; k < image_length; k++) { mask_type m = voxels.data[k]; @@ -34,4 +34,49 @@ array<real_t,3> center_of_mass(const input_ndarray<mask_type> voxels) { print_timestamp("center_of_mass end"); return array<real_t,3>{ rcmx, rcmy, rcmz }; +} + +array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const array<real_t,3> &cm) { + real_t + Ixx = 0, Ixy = 0, Ixz = 0, + Iyy = 0, Iyz = 0, + Izz = 0; + + ssize_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; + + print_timestamp("inertia_matrix_serial start"); + + #pragma omp parallel for collapse(3) reduction(+:Ixx,Iyy,Izz) reduction(-:Ixy,Ixz,Iyz) + for (int64_t X = 0; X < Nx; X++) { + for (int64_t Y = 0; Y < Ny; Y++) { + for (int64_t Z = 0; Z < Nz; Z++) { + + // TODO shouldn't the loops be interchanged to match the access pattern? (Naming-wise, that is) + int64_t k = X*Ny*Nz + Y*Nz + Z; + mask_type m = voxels.data[k]; + + // m guards this, and this removes branches + // if (m != 0) + real_t + x = X - cm[0], + y = Y - cm[1], + z = Z - cm[2]; + + Ixx += m * (y*y + z*z); + Iyy += m * (x*x + z*z); + Izz += m * (x*x + y*y); + Ixy -= m * x*y; + Ixz -= m * x*z; + Iyz -= m * y*z; + } + } + } + + print_timestamp("inertia_matrix_serial end"); + + return array<real_t,9> { + Ixx, Ixy, Ixz, + Ixy, Iyy, Iyz, + Ixz, Iyz, Izz + }; } \ No newline at end of file diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index a3778f4..ddf962d 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -35,7 +35,7 @@ array<real_t,3> center_of_mass(const input_ndarray<mask_type> voxels) { return array<real_t,3>{ rcmx, rcmy, rcmz }; } -array<real_t,9> inertia_matrix_serial(const input_ndarray<mask_type> &voxels, const array<real_t,3> &cm) { +array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const array<real_t,3> &cm) { real_t Ixx = 0, Ixy = 0, Ixz = 0, Iyy = 0, Iyz = 0, @@ -44,15 +44,24 @@ array<real_t,9> inertia_matrix_serial(const input_ndarray<mask_type> &voxels, co ssize_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; print_timestamp("inertia_matrix_serial start"); - for (int64_t X=0,k=0;X<Nx;X++) { - for (int64_t Y=0;Y<Ny;Y++) { - for (int64_t Z=0;Z<Nz;Z++,k++) { - real_t x = X-cm[0], y = Y-cm[1], z = Z-cm[2]; + + int64_t k = 0; + for (int64_t X = 0; X < Nx; X++) { + for (int64_t Y = 0; Y < Ny; Y++) { + for (int64_t Z = 0; Z < Nz; Z++) { + mask_type m = voxels.data[k]; + k++; + + // m guards this, and then branches are removed + //if (m != 0) + real_t + x = X - cm[0], + y = Y - cm[1], + z = Z - cm[2]; - real_t m = voxels.data[k]; - Ixx += m*(y*y+z*z); - Iyy += m*(x*x+z*z); - Izz += m*(x*x+y*y); + Ixx += m * (y*y + z*z); + Iyy += m * (x*x + z*z); + Izz += m * (x*x + y*y); Ixy -= m * x*y; Ixz -= m * x*z; Iyz -= m * y*z; @@ -60,7 +69,8 @@ array<real_t,9> inertia_matrix_serial(const input_ndarray<mask_type> &voxels, co } } - print_timestamp("inertia_matrix_serial end"); + print_timestamp("inertia_matrix_serial end"); + return array<real_t,9> { Ixx, Ixy, Ixz, Ixy, Iyy, Iyz, @@ -68,46 +78,6 @@ array<real_t,9> inertia_matrix_serial(const input_ndarray<mask_type> &voxels, co }; } -array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const array<real_t,3> &cm) { - // nvc++ doesn't support OpenACC 2.7 array reductions yet, so must name each element. - real_t - M00 = 0, M01 = 0, M02 = 0, - M11 = 0, M12 = 0, - M22 = 0; - - size_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; - ssize_t image_length = Nx*Ny*Nz; - - print_timestamp("inertia_matrix start"); - for (ssize_t block_start = 0; block_start < image_length; block_start += acc_block_size) { - const mask_type *buffer = voxels.data + block_start; - ssize_t block_length = min(acc_block_size, image_length-block_start); - - //reduction_loop((+:M00,M01,M02,M11,M12,M22),()) - for (int64_t k = 0; k < block_length; k++) { //\if (buffer[k] != 0) - int64_t flat_idx = block_start + k; - real_t xs[3] = { - (flat_idx / (Ny*Nz)) - cm[0], // x - ((flat_idx / Nz) % Ny) - cm[1], // y - (flat_idx % Nz) - cm[2] }; // z - - real_t m = buffer[k]; - real_t diag = dot(xs,xs); - M00 += m*(diag - xs[0] * xs[0]); - M11 += m*(diag - xs[1] * xs[1]); - M22 += m*(diag - xs[2] * xs[2]); - M01 -= m * xs[0] * xs[1]; - M02 -= m * xs[0] * xs[2]; - M12 -= m * xs[1] * xs[2]; - } - } - print_timestamp("inertia_matrix end"); - return array<real_t,9> { - M00, M01, M02, - M01, M11, M12, - M02, M12, M22 }; -} - void integrate_axes(const input_ndarray<mask_type> &voxels, const array<real_t,3> &x0, const array<real_t,3> &v_axis, diff --git a/src/lib/cpp/gpu/geometry.cc b/src/lib/cpp/gpu/geometry.cc index 0ff35a1..87584dc 100644 --- a/src/lib/cpp/gpu/geometry.cc +++ b/src/lib/cpp/gpu/geometry.cc @@ -47,4 +47,61 @@ array<real_t,3> center_of_mass(const input_ndarray<mask_type> voxels) { print_timestamp("center_of_mass end"); return array<real_t,3>{rcmx, rcmy, rcmz}; +} + +array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const array<real_t,3> &cm) { + // nvc++ doesn't support OpenACC 2.7 array reductions yet, so must name each element. + real_t + Ixx = 0, Ixy = 0, Ixz = 0, + Iyy = 0, Iyz = 0, + Izz = 0; + + size_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; + ssize_t image_length = Nx*Ny*Nz; + + print_timestamp("inertia_matrix start"); + + #pragma acc data copy(Ixx, Ixy, Ixz, Iyy, Iyz, Izz) + { + for (ssize_t block_start = 0; block_start < image_length; block_start += acc_block_size) { + const mask_type *buffer = voxels.data + block_start; + ssize_t this_block_size = min(acc_block_size, image_length - block_start); + + #pragma acc data copyin(buffer[:this_block_size]) + { + #pragma acc parallel loop reduction(+:Ixx,Iyy,Izz) reduction(-:Ixy,Ixz,Iyz) + for (int64_t k = 0; k < this_block_size; k++) { //\if (buffer[k] != 0) + mask_type m = buffer[k]; + + // m guards this, and GPUs doesn't like branches + //if (m != 0) + int64_t + flat_idx = block_start + k, + X = flat_idx / (Ny * Nz), + Y = ((flat_idx) / Nz) % Ny, + Z = flat_idx % Nz; + + real_t + x = X - cm[0], + y = Y - cm[1], + z = Z - cm[2]; + + Ixx += m * (y*y + z*z); + Iyy += m * (x*x + z*z); + Izz += m * (x*x + y*y); + Ixy -= m * x*y; + Ixz -= m * x*z; + Iyz -= m * y*z; + } + } + } + } + + print_timestamp("inertia_matrix end"); + + return array<real_t,9> { + Ixx, Ixy, Ixz, + Ixy, Iyy, Iyz, + Ixz, Iyz, Izz + }; } \ No newline at end of file diff --git a/src/lib/cpp/include/geometry.hh b/src/lib/cpp/include/geometry.hh index 7073758..0f729cd 100644 --- a/src/lib/cpp/include/geometry.hh +++ b/src/lib/cpp/include/geometry.hh @@ -10,11 +10,12 @@ using namespace std; #define dot(a,b) (a[0]*b[0] + a[1]*b[1] + a[2]*b[2]) void print_timestamp(string message) { - auto now = chrono::system_clock::to_time_t(chrono::system_clock::now()); - tm local_tm = *localtime(&now); - fprintf(stderr,"%s at %02d:%02d:%02d\n", message.c_str(), local_tm.tm_hour, local_tm.tm_min, local_tm.tm_sec); + //auto now = chrono::system_clock::to_time_t(chrono::system_clock::now()); + //tm local_tm = *localtime(&now); + //fprintf(stderr,"%s at %02d:%02d:%02d\n", message.c_str(), local_tm.tm_hour, local_tm.tm_min, local_tm.tm_sec); } array<real_t,3> center_of_mass(const input_ndarray<mask_type> voxels); +array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const array<real_t,3> &cm); #endif \ No newline at end of file diff --git a/src/pybind/geometry-pybind.cc b/src/pybind/geometry-pybind.cc index b738ef0..c22bf72 100644 --- a/src/pybind/geometry-pybind.cc +++ b/src/pybind/geometry-pybind.cc @@ -5,20 +5,16 @@ namespace python_api { array<real_t,3> center_of_mass(const np_maskarray &np_voxels){ auto voxels_info = np_voxels.request(); - return ::center_of_mass({voxels_info.ptr,voxels_info.shape}); + return ::center_of_mass({voxels_info.ptr, voxels_info.shape}); } -/* + array<real_t,9> inertia_matrix(const np_maskarray &np_voxels, array<real_t,3>& cm){ auto voxels_info = np_voxels.request(); - return inertia_matrix({voxels_info.ptr,voxels_info.shape}, cm); + return ::inertia_matrix({voxels_info.ptr, voxels_info.shape}, cm); } -array<real_t,9> inertia_matrix_serial(const np_maskarray &np_voxels, array<real_t,3>& cm){ - auto voxels_info = np_voxels.request(); - - return inertia_matrix_serial({voxels_info.ptr,voxels_info.shape}, cm); -} +/* template <typename voxel_type> void sample_plane(const np_array<voxel_type> &np_voxels, @@ -126,8 +122,7 @@ PYBIND11_MODULE(geometry, m) { m.doc() = "Voxel Geometry Module"; // optional module docstring m.def("center_of_mass", &python_api::center_of_mass); - //m.def("inertia_matrix", &python_api::inertia_matrix); - //m.def("inertia_matrix_serial",&python_api::inertia_matrix_serial); + m.def("inertia_matrix", &python_api::inertia_matrix); //m.def("integrate_axes", &python_api::integrate_axes); //m.def("zero_outside_bbox", &python_api::zero_outside_bbox); //m.def("fill_implant_mask", &python_api::fill_implant_mask); diff --git a/src/test/test_geometry.py b/src/test/test_geometry.py index 4b32258..4ddb051 100644 --- a/src/test/test_geometry.py +++ b/src/test/test_geometry.py @@ -16,6 +16,17 @@ #n = 2344 # ~12 GB, used for testing whether blocked works. n = 128 +def assert_with_print(a, b): + all_close = np.allclose(a, b) + if not all_close: + na, nb = np.array(a), np.array(b) + print (na) + print (nb) + nabs = np.abs(na - nb) + print (nabs) + print (np.sum(nabs)) + assert all_close + def run_with_warmup(f): f() start = datetime.datetime.now() @@ -23,23 +34,40 @@ def run_with_warmup(f): end = datetime.datetime.now() return result, end - start -def test_center_of_mass(): - voxels = np.random.randint(0, 255, (n,n,n), np.uint8) - - baseline_f = partial(m_cpu_seq.center_of_mass, voxels) - cpu_f = partial(m_cpu.center_of_mass, voxels) - gpu_f = partial(m_gpu.center_of_mass, voxels) - +def compare_fs(func, baseline_f, cpu_f, gpu_f, should_assert=True): baseline, baseline_t = run_with_warmup(baseline_f) - print (f'Sequential ran in {baseline_t}') + print (f'({func}) Sequential ran in {baseline_t}') cpu, cpu_t = run_with_warmup(cpu_f) - print (f'Parallel CPU ran in {cpu_t}, which is {baseline_t / cpu_t} times faster than sequential') - assert np.allclose(baseline, cpu) + print (f'({func}) Parallel CPU ran in {cpu_t}, which is {baseline_t / cpu_t} times faster than sequential') + if should_assert: assert_with_print(baseline, cpu) gpu, gpu_t = run_with_warmup(gpu_f) - print (f'GPU ran in {gpu_t}, which is {baseline_t / gpu_t} times faster than sequential') - assert np.allclose(baseline, gpu) + print (f'({func}) GPU ran in {gpu_t}, which is {baseline_t / gpu_t} times faster than sequential') + if should_assert: assert_with_print(baseline, gpu) + + +def test_center_of_mass(): + voxels = np.random.randint(0, 256, (n,n,n), np.uint8) + + baseline = partial(m_cpu_seq.center_of_mass, voxels) + cpu = partial(m_cpu.center_of_mass, voxels) + gpu = partial(m_gpu.center_of_mass, voxels) + + compare_fs('center_of_mass', baseline, cpu, gpu) + + +def test_inertia_matrix(): + voxels = np.random.randint(0, 2, (n,n,n), np.uint8) + cm = m_gpu.center_of_mass(voxels) + + baseline = partial(m_cpu_seq.inertia_matrix, voxels, cm) + cpu = partial(m_cpu.inertia_matrix, voxels, cm) + gpu = partial(m_gpu.inertia_matrix, voxels, cm) + + # TODO assert disabled due to floating point associativity error accumulation + compare_fs('inertia_matrix', baseline, cpu, gpu, should_assert=False) if __name__ == '__main__': - test_center_of_mass() \ No newline at end of file + test_center_of_mass() + test_inertia_matrix() \ No newline at end of file From 799789ecf9de7001b75d0defad5a6398d7422f81 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 13 Feb 2023 21:03:28 +0100 Subject: [PATCH 077/136] Added missing dependency in Makefile target --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 1d73b51..c689bad 100644 --- a/src/Makefile +++ b/src/Makefile @@ -28,7 +28,7 @@ endif all: $(TARGETS) define GEN_RULE -$(CPP_FOLDER)/$(PLATFORM)/$(LIB)$(PYBIND_SUFFIX): pybind/$(LIB)-pybind.cc $(CPP_FOLDER)/$(PLATFORM)/$(LIB).cc +$(CPP_FOLDER)/$(PLATFORM)/$(LIB)$(PYBIND_SUFFIX): pybind/$(LIB)-pybind.cc $(CPP_FOLDER)/$(PLATFORM)/$(LIB).cc $(CPP_FOLDER)/include/$(LIB).hh $(CXX) $(CXXFLAGS) $(PYBIND_FLAGS) -I$(CPP_FOLDER)/$(PLATFORM) $$< -o $(CPP_FOLDER)/$(PLATFORM)/$(LIB)$(PYBIND_SUFFIX) endef From 516d7cbfc5d04f0c96783f66970431fbef73d91f Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 13 Feb 2023 21:22:57 +0100 Subject: [PATCH 078/136] #25 Commented out non-processing-steps functions --- src/lib/cpp/cpu_seq/geometry.cc | 44 ++++++++++++++++----------------- src/lib/cpp/gpu/geometry.cc | 44 ++++++++++++++++++++++++++++++++- 2 files changed, 65 insertions(+), 23 deletions(-) diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index ddf962d..06cd754 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -78,6 +78,7 @@ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const arr }; } +/* TODO only called in test.py. Postponed for now. void integrate_axes(const input_ndarray<mask_type> &voxels, const array<real_t,3> &x0, const array<real_t,3> &v_axis, @@ -90,33 +91,30 @@ void integrate_axes(const input_ndarray<mask_type> &voxels, real_t *output_data = output.data; // TODO: Check v_axis & w_axis projections to certify bounds and get rid of runtime check - - for (ssize_t block_start = 0; block_start < image_length; block_start += acc_block_size) { - const mask_type *buffer = voxels.data + block_start; - int block_length = min(acc_block_size,image_length-block_start); - - //#pragma acc parallel loop copy(output_data[:Nv*Nw]) copyin(buffer[:block_length], x0, v_axis, w_axis) - //parallel_loop((output_data[:Nv*Nw])) - for (int64_t k = 0; k < block_length; k++) { - if (buffer[k] != 0) { - int64_t flat_idx = block_start + k; - real_t xs[3] = { - (flat_idx / (Ny*Nz)) - x0[0], // x - ((flat_idx / Nz) % Ny) - x0[1], // y - (flat_idx % Nz) - x0[2] }; // z - - mask_type voxel = buffer[k]; - real_t v = dot(xs,v_axis), w = dot(xs,w_axis); - int64_t i_v = round(v-v_min), j_w = round(w-w_min); - - if (i_v >= 0 && j_w >= 0 && i_v < Nv && j_w < Nw) { - //atomic_statement() - output_data[i_v*Nw + j_w] += voxel; + int64_t k = 0: + for (int64_t X = 0; X < Nx; X++) { + for (int64_t Y = 0; Y < Ny; Y++) { + for (int64_t Z = 0; Z < Nz; Z++) { + if (buffer[k] != 0) { + real_t xs[3] = { + (flat_idx / (Ny*Nz)) - x0[0], // x + ((flat_idx / Nz) % Ny) - x0[1], // y + (flat_idx % Nz) - x0[2] }; // z + + mask_type voxel = buffer[k]; + real_t v = dot(xs, v_axis), w = dot(xs,w_axis); + int64_t i_v = round(v-v_min), j_w = round(w-w_min); + + if (i_v >= 0 && j_w >= 0 && i_v < Nv && j_w < Nw) { + output_data[i_v*Nw + j_w] += voxel; + } } + k++; } } } } +*/ bool in_bbox(float U, float V, float W, const std::array<float,6> bbox) { const auto& [U_min,U_max,V_min,V_max,W_min,W_max] = bbox; @@ -216,6 +214,7 @@ template <typename voxel_type> void sample_plane(const input_ndarray<voxel_type> } } +/* TODO only called in test.py. Postpone for now. // NB: xyz are in indices, not micrometers void zero_outside_bbox(const array<real_t,9> &principal_axes, const array<real_t,6> ¶meter_ranges, @@ -258,6 +257,7 @@ void zero_outside_bbox(const array<real_t,9> &principal_axes, } } } +*/ inline vector4 hom_transform(const vector4 &x, const matrix4x4 &M) { vector4 c{{0,0,0,0}}; diff --git a/src/lib/cpp/gpu/geometry.cc b/src/lib/cpp/gpu/geometry.cc index 87584dc..5c0ce52 100644 --- a/src/lib/cpp/gpu/geometry.cc +++ b/src/lib/cpp/gpu/geometry.cc @@ -104,4 +104,46 @@ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const arr Ixy, Iyy, Iyz, Ixz, Iyz, Izz }; -} \ No newline at end of file +} + +/* TODO Only called in test.py. Postponed for now. +void integrate_axes(const input_ndarray<mask_type> &voxels, + const array<real_t,3> &x0, + const array<real_t,3> &v_axis, + const array<real_t,3> &w_axis, + const real_t v_min, const real_t w_min, + output_ndarray<real_t> output) { + ssize_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; + ssize_t Nv = output.shape[0], Nw = output.shape[1]; + int64_t image_length = Nx*Ny*Nz; + real_t *output_data = output.data; + + // TODO: Check v_axis & w_axis projections to certify bounds and get rid of runtime check + + for (ssize_t block_start = 0; block_start < image_length; block_start += acc_block_size) { + const mask_type *buffer = voxels.data + block_start; + int block_length = min(acc_block_size,image_length-block_start); + + //#pragma acc parallel loop copy(output_data[:Nv*Nw]) copyin(buffer[:block_length], x0, v_axis, w_axis) + //parallel_loop((output_data[:Nv*Nw])) + for (int64_t k = 0; k < block_length; k++) { + if (buffer[k] != 0) { + int64_t flat_idx = block_start + k; + real_t xs[3] = { + (flat_idx / (Ny*Nz)) - x0[0], // x + ((flat_idx / Nz) % Ny) - x0[1], // y + (flat_idx % Nz) - x0[2] }; // z + + mask_type voxel = buffer[k]; + real_t v = dot(xs,v_axis), w = dot(xs,w_axis); + int64_t i_v = round(v-v_min), j_w = round(w-w_min); + + if (i_v >= 0 && j_w >= 0 && i_v < Nv && j_w < Nw) { + //atomic_statement() + output_data[i_v*Nw + j_w] += voxel; + } + } + } + } +} +*/ \ No newline at end of file From 2fcc48f447e3eb0554ffc75aece4e506003688b3 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Wed, 15 Feb 2023 11:52:55 +0100 Subject: [PATCH 079/136] #25 Added boilerplate macros --- src/lib/cpp/cpu_seq/geometry.cc | 26 ++++++++------- src/lib/cpp/include/boilerplate.hh | 51 ++++++++++++++++++++++++++++++ src/lib/cpp/include/datatypes.hh | 4 ++- 3 files changed, 68 insertions(+), 13 deletions(-) create mode 100644 src/lib/cpp/include/boilerplate.hh diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index 06cd754..8c10286 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -6,25 +6,27 @@ using namespace std; #include "geometry.hh" +#include "boilerplate.hh" -array<real_t,3> center_of_mass(const input_ndarray<mask_type> voxels) { - uint64_t cmx = 0, cmy = 0, cmz = 0; - size_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; - int64_t image_length = Nx*Ny*Nz; +array<real_t, 3> center_of_mass(const input_ndarray<mask_type> voxels) { + unpack_numpy(voxels); print_timestamp("center_of_mass start"); + uint64_t cmz = 0, cmy = 0, cmx = 0; uint64_t total_mass = 0; - for (int64_t k = 0; k < image_length; k++) { - mask_type m = voxels.data[k]; - int64_t x = k / (Ny*Nz); - int64_t y = (k / Nz) % Ny; - int64_t z = k % Nz; + for_3d_begin(voxels); + + mask_type m = voxels.data[flat_index]; total_mass += m; - cmx += m*x; cmy += m*y; cmz += m*z; - } + cmx += m * x; + cmy += m * y; + cmz += m * z; + + for_3d_end(); + real_t rcmx = cmx / ((real_t) total_mass), rcmy = cmy / ((real_t) total_mass), @@ -32,7 +34,7 @@ array<real_t,3> center_of_mass(const input_ndarray<mask_type> voxels) { print_timestamp("center_of_mass end"); - return array<real_t,3>{ rcmx, rcmy, rcmz }; + return array<real_t, 3>{ rcmz, rcmy, rcmx }; } array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const array<real_t,3> &cm) { diff --git a/src/lib/cpp/include/boilerplate.hh b/src/lib/cpp/include/boilerplate.hh new file mode 100644 index 0000000..b71c6c0 --- /dev/null +++ b/src/lib/cpp/include/boilerplate.hh @@ -0,0 +1,51 @@ +#ifndef boilerplate_h +#define boilerplate_h + +// TODO it seems like vscode doesn't pick this up. +/// \def for_block_begin(arr) +/// Inserts boilerplate code for accessing \a arr in a blocked (chunked) manner. +#define for_block_begin(arr) \ + for (int64_t block_start = 0; block_start < arr##_length; block_start += acc_block_size<arr##_type>) { \ + const arr##_type *arr##_buffer = arr.data + block_start; \ + ssize_t arr##_buffer_length = min(acc_block_size<arr##_type>, arr##_length-block_start); \ + _Pragma(STR(acc data copyin(arr##_buffer[:arr##_buffer_length]))) \ + { \ + +#define for_block_end() } } + +#define for_3d_begin(arr) \ + for (int64_t z = 0; z < arr##_Nz; z++) { \ + for (int64_t y = 0; y < arr##_Ny; y++) { \ + for (int64_t x = 0; x < arr##_Nx; x++) { \ + int64_t flat_index = z*arr##_Ny*arr##_Nx + y*arr##_Nx + x; + +#define for_3d_end() }}} + +#define for_flat_begin_1(arr) for_flat_begin(arr, arr) +#define for_flat_begin_2(arr, global_prefix) \ + for (int64_t flat_index = 0; flat_index < arr##_length; flat_index++) { \ + int64_t \ + global_prefix##_index = arr##_start + flat_index \ + z = global_prefix##_index / (arr##_Ny*arr##_Nx), \ + y = (global_prefix##_index / arr##_Nx) % arr##_Ny, \ + x = global_prefix##_index % arr##_Nx; + +#define for_flat_end() } + +// TODO I'm not sure this'll expand right. +#define for_flat_block_begin(arr) \ + for_block_begin(arr) \ + for_flat_begin_2(arr##_buffer, global) + +#define for_flat_block_end() \ + for_flat_end() \ + for_block_end() + +#define unpack_numpy(arr) \ + ssize_t \ + arr##_Nz = arr.shape[0], \ + arr##_Ny = arr.shape[1], \ + arr##_Nx = arr.shape[2], \ + arr##_length = arr##_Nz*arr##_Ny*arr##_Nx; + +#endif \ No newline at end of file diff --git a/src/lib/cpp/include/datatypes.hh b/src/lib/cpp/include/datatypes.hh index 3defac0..cbe1213 100644 --- a/src/lib/cpp/include/datatypes.hh +++ b/src/lib/cpp/include/datatypes.hh @@ -8,6 +8,7 @@ typedef uint8_t mask_type; // TODO: Template + explicit instantiation typedef uint16_t voxel_type; +typedef mask_type voxels_type; //typedef float field_type; typedef uint16_t field_type; typedef float gauss_type; @@ -21,7 +22,8 @@ typedef py::array_t<mask_type, py::array::c_style | py::array::forcecast> np_mas typedef py::array_t<real_t, py::array::c_style | py::array::forcecast> np_realarray; typedef py::array_t<uint8_t, py::array::c_style | py::array::forcecast> np_bytearray; -constexpr ssize_t acc_block_size = 1024 * 1024 * 1024 / sizeof(mask_type); // 1 GB +template <typename T> +constexpr ssize_t acc_block_size = 1024 * 1024 * 1024 / sizeof(T); // 1 GB struct plane_t { array<real_t,3> cm, u_axis, v_axis; From c8528c969ea83f11a903b9865c8d738a38fa2e0e Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Wed, 15 Feb 2023 11:53:16 +0100 Subject: [PATCH 080/136] #25 Added all of the include files as a dependency in Makefile --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index c689bad..623eea2 100644 --- a/src/Makefile +++ b/src/Makefile @@ -28,7 +28,7 @@ endif all: $(TARGETS) define GEN_RULE -$(CPP_FOLDER)/$(PLATFORM)/$(LIB)$(PYBIND_SUFFIX): pybind/$(LIB)-pybind.cc $(CPP_FOLDER)/$(PLATFORM)/$(LIB).cc $(CPP_FOLDER)/include/$(LIB).hh +$(CPP_FOLDER)/$(PLATFORM)/$(LIB)$(PYBIND_SUFFIX): pybind/$(LIB)-pybind.cc $(CPP_FOLDER)/$(PLATFORM)/$(LIB).cc $(CPP_FOLDER)/include/*.hh $(CXX) $(CXXFLAGS) $(PYBIND_FLAGS) -I$(CPP_FOLDER)/$(PLATFORM) $$< -o $(CPP_FOLDER)/$(PLATFORM)/$(LIB)$(PYBIND_SUFFIX) endef From ec9b2f62b2ad992117332642805226dd5c9e4d1d Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Wed, 15 Feb 2023 11:53:36 +0100 Subject: [PATCH 081/136] #34 Added example docstring in geometry.hh --- src/lib/cpp/include/geometry.hh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/lib/cpp/include/geometry.hh b/src/lib/cpp/include/geometry.hh index 0f729cd..2f448ef 100644 --- a/src/lib/cpp/include/geometry.hh +++ b/src/lib/cpp/include/geometry.hh @@ -15,7 +15,21 @@ void print_timestamp(string message) { //fprintf(stderr,"%s at %02d:%02d:%02d\n", message.c_str(), local_tm.tm_hour, local_tm.tm_min, local_tm.tm_sec); } +/* +Computes the center of mass of the given tomography. + +@param voxels The given tomography. +@returns The 3D coordinates of the center of mass (in Z, Y, X). +*/ array<real_t,3> center_of_mass(const input_ndarray<mask_type> voxels); + +/* +Computes the inertia matrix of the given tomography based of the given center of mass. + +@param voxels The given tomography. +@param cm The given center of mass. +@returns The 3x3 inertia matrix. +*/ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const array<real_t,3> &cm); #endif \ No newline at end of file From b0f5bbde2a07c6b1f3ffea1f142a9223994ab3f0 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Wed, 15 Feb 2023 11:54:00 +0100 Subject: [PATCH 082/136] #25 Removed trailing whitespace --- src/lib/cpp/cpu/geometry.cc | 32 ++++----- src/lib/cpp/cpu_seq/geometry.cc | 112 ++++++++++++++++---------------- src/lib/cpp/gpu/geometry.cc | 48 +++++++------- src/pybind/geometry-pybind.cc | 26 ++++---- src/test/test_geometry.py | 10 +-- 5 files changed, 114 insertions(+), 114 deletions(-) diff --git a/src/lib/cpp/cpu/geometry.cc b/src/lib/cpp/cpu/geometry.cc index 4a41724..1d0340b 100644 --- a/src/lib/cpp/cpu/geometry.cc +++ b/src/lib/cpp/cpu/geometry.cc @@ -12,12 +12,12 @@ array<real_t,3> center_of_mass(const input_ndarray<mask_type> voxels) { int64_t image_length = Nx*Ny*Nz; print_timestamp("center_of_mass start"); - - uint64_t total_mass = 0; + + uint64_t total_mass = 0; #pragma omp parallel for reduction(+:total_mass,cmx,cmy,cmz) for (int64_t k = 0; k < image_length; k++) { - mask_type m = voxels.data[k]; + mask_type m = voxels.data[k]; int64_t x = k / (Ny*Nz); int64_t y = (k / Nz) % Ny; @@ -30,8 +30,8 @@ array<real_t,3> center_of_mass(const input_ndarray<mask_type> voxels) { rcmx = cmx / ((real_t) total_mass), rcmy = cmy / ((real_t) total_mass), rcmz = cmz / ((real_t) total_mass); - - print_timestamp("center_of_mass end"); + + print_timestamp("center_of_mass end"); return array<real_t,3>{ rcmx, rcmy, rcmz }; } @@ -41,37 +41,37 @@ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const arr Ixx = 0, Ixy = 0, Ixz = 0, Iyy = 0, Iyz = 0, Izz = 0; - + ssize_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; print_timestamp("inertia_matrix_serial start"); - + #pragma omp parallel for collapse(3) reduction(+:Ixx,Iyy,Izz) reduction(-:Ixy,Ixz,Iyz) for (int64_t X = 0; X < Nx; X++) { for (int64_t Y = 0; Y < Ny; Y++) { for (int64_t Z = 0; Z < Nz; Z++) { // TODO shouldn't the loops be interchanged to match the access pattern? (Naming-wise, that is) - int64_t k = X*Ny*Nz + Y*Nz + Z; + int64_t k = X*Ny*Nz + Y*Nz + Z; mask_type m = voxels.data[k]; - + // m guards this, and this removes branches - // if (m != 0) - real_t - x = X - cm[0], - y = Y - cm[1], + // if (m != 0) + real_t + x = X - cm[0], + y = Y - cm[1], z = Z - cm[2]; - + Ixx += m * (y*y + z*z); Iyy += m * (x*x + z*z); - Izz += m * (x*x + y*y); + Izz += m * (x*x + y*y); Ixy -= m * x*y; Ixz -= m * x*z; Iyz -= m * y*z; } } } - + print_timestamp("inertia_matrix_serial end"); return array<real_t,9> { diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index 8c10286..a1e9ef2 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -14,13 +14,13 @@ array<real_t, 3> center_of_mass(const input_ndarray<mask_type> voxels) { print_timestamp("center_of_mass start"); uint64_t cmz = 0, cmy = 0, cmx = 0; - uint64_t total_mass = 0; + uint64_t total_mass = 0; for_3d_begin(voxels); mask_type m = voxels.data[flat_index]; - total_mass += m; + total_mass += m; cmx += m * x; cmy += m * y; cmz += m * z; @@ -31,8 +31,8 @@ array<real_t, 3> center_of_mass(const input_ndarray<mask_type> voxels) { rcmx = cmx / ((real_t) total_mass), rcmy = cmy / ((real_t) total_mass), rcmz = cmz / ((real_t) total_mass); - - print_timestamp("center_of_mass end"); + + print_timestamp("center_of_mass end"); return array<real_t, 3>{ rcmz, rcmy, rcmx }; } @@ -42,7 +42,7 @@ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const arr Ixx = 0, Ixy = 0, Ixz = 0, Iyy = 0, Iyz = 0, Izz = 0; - + ssize_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; print_timestamp("inertia_matrix_serial start"); @@ -53,24 +53,24 @@ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const arr for (int64_t Z = 0; Z < Nz; Z++) { mask_type m = voxels.data[k]; k++; - + // m guards this, and then branches are removed - //if (m != 0) - real_t - x = X - cm[0], - y = Y - cm[1], + //if (m != 0) + real_t + x = X - cm[0], + y = Y - cm[1], z = Z - cm[2]; - + Ixx += m * (y*y + z*z); Iyy += m * (x*x + z*z); - Izz += m * (x*x + y*y); + Izz += m * (x*x + y*y); Ixy -= m * x*y; Ixz -= m * x*z; Iyz -= m * y*z; } } } - + print_timestamp("inertia_matrix_serial end"); return array<real_t,9> { @@ -82,13 +82,13 @@ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const arr /* TODO only called in test.py. Postponed for now. void integrate_axes(const input_ndarray<mask_type> &voxels, - const array<real_t,3> &x0, + const array<real_t,3> &x0, const array<real_t,3> &v_axis, const array<real_t,3> &w_axis, const real_t v_min, const real_t w_min, output_ndarray<real_t> output) { ssize_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; - ssize_t Nv = output.shape[0], Nw = output.shape[1]; + ssize_t Nv = output.shape[0], Nw = output.shape[1]; int64_t image_length = Nx*Ny*Nz; real_t *output_data = output.data; @@ -133,7 +133,7 @@ template<typename field_type> float resample2x2x2(const field_type *voxels, const array<float,3> &X) { auto [Nx,Ny,Nz] = shape; // Eller omvendt? if (!in_bbox(X[0],X[1],X[2], {0.5,Nx-1.5, 0.5,Ny-1.5, 0.5,Nz-1.5})) { - uint64_t voxel_index = floor(X[0])*Ny*Nz+floor(X[1])*Ny+floor(X[2]); + uint64_t voxel_index = floor(X[0])*Ny*Nz+floor(X[1])*Ny+floor(X[2]); return voxels[voxel_index]; } float Xfrac[2][3]; // {Xminus[3], Xplus[3]} @@ -171,7 +171,7 @@ template<typename field_type> float resample2x2x2(const field_type *voxels, // } uint64_t voxel_index = I*Ny*Nz+J*Ny+K; assert(I>=0 && J>=0 && K>=0); - assert(I<Nx && J<Ny && K<Nz); + assert(I<Nx && J<Ny && K<Nz); field_type voxel = voxels[voxel_index]; value += voxel*weight; } @@ -182,7 +182,7 @@ template <typename voxel_type> void sample_plane(const input_ndarray<voxel_type> const real_t voxel_size, // In micrometers const array<real_t,3> cm, const array<real_t,3> u_axis, - const array<real_t,3> v_axis, + const array<real_t,3> v_axis, const array<real_t,4> bbox, // [umin,umax,vmin,vmax] in micrometers output_ndarray<real_t> plane_samples) { const auto& [umin,umax,vmin,vmax] = bbox; // In micrometers @@ -195,8 +195,8 @@ template <typename voxel_type> void sample_plane(const input_ndarray<voxel_type> for (ssize_t vj=0;vj<nv;vj++) { const real_t u = umin + ui*du, v = vmin + vj*dv; - // X,Y,Z in micrometers; x,y,z in voxel index space - const real_t + // X,Y,Z in micrometers; x,y,z in voxel index space + const real_t X = cm[0] + u*u_axis[0] + v*v_axis[0], Y = cm[1] + u*u_axis[1] + v*v_axis[1], Z = cm[2] + u*u_axis[2] + v*v_axis[2]; @@ -204,7 +204,7 @@ template <typename voxel_type> void sample_plane(const input_ndarray<voxel_type> const real_t x = X/voxel_size, y = Y/voxel_size, z = Z/voxel_size; // printf("u,v = %g,%g -> %.1f,%.1f,%.1f -> %d, %d, %d\n",u,v,X,Y,Z,int(round(x)),int(round(y)),int(round(z))); - + voxel_type value = 0; if (in_bbox(x,y,z,{0.5,Nx-0.5, 0.5,Ny-0.5, 0.5,Nz-0.5})) value = resample2x2x2<voxel_type>(voxels.data,{Nx,Ny,Nz},{x,y,z}); @@ -216,7 +216,7 @@ template <typename voxel_type> void sample_plane(const input_ndarray<voxel_type> } } -/* TODO only called in test.py. Postpone for now. +/* TODO only called in test.py. Postpone for now. // NB: xyz are in indices, not micrometers void zero_outside_bbox(const array<real_t,9> &principal_axes, const array<real_t,6> ¶meter_ranges, @@ -238,13 +238,13 @@ void zero_outside_bbox(const array<real_t,9> &principal_axes, int64_t y = (flat_idx / Nz) % Ny; int64_t z = flat_idx % Nz; // Boilerplate until here. TODO: macroize or lambda out! - + real_t xs[3] = {x-cm[0], y-cm[1], z-cm[2]}; real_t params[3] = {0,0,0}; - for (int uvw = 0; uvw < 3; uvw++) - for (int xyz = 0; xyz < 3; xyz++) + for (int uvw = 0; uvw < 3; uvw++) + for (int xyz = 0; xyz < 3; xyz++) params[uvw] += xs[xyz] * principal_axes[uvw*3+xyz]; // u = dot(xs,u_axis), v = dot(xs,v_axis), w = dot(xs,w_axis) bool p = false; @@ -266,7 +266,7 @@ inline vector4 hom_transform(const vector4 &x, const matrix4x4 &M) { for (int i = 0; i < 4; i++) { real_t sum = 0; - #pragma simd parallel for reduction(+:sum) + #pragma simd parallel for reduction(+:sum) for (int j=0;j<4;j++) sum += M[i*4+j]*x[j]; c[i] = sum; @@ -290,7 +290,7 @@ inline vector4 hom_transform(const vector4 &x, const matrix4x4 &M) { std::array<real_t,4> Xs = { X*voxel_size, Y*voxel_size, Z*voxel_size, 1 }; \ bool mask_value = maskin_buffer[k]; -#define loop_mask_end(mask) }}} +#define loop_mask_end(mask) }}} /* void fill_implant_mask(const input_ndarray<mask_type> implant_mask, @@ -301,24 +301,24 @@ void fill_implant_mask(const input_ndarray<mask_type> implant_mask, output_ndarray<mask_type> solid_implant_mask, output_ndarray<float> rsqr_maxs, output_ndarray<float> profile) { - real_t theta_min = M_PI, theta_max = -M_PI; + real_t theta_min = M_PI, theta_max = -M_PI; ssize_t n_segments = rsqr_maxs.shape[0]; const auto [U_min,U_max,V_min,V_max,W_min,W_max] = bbox; - + printf("implant_mask.shape = %ld,%ld,%ld\n",implant_mask.shape[0],implant_mask.shape[1],implant_mask.shape[2]); printf("solid_implant_mask.shape = %ld,%ld,%ld\n",solid_implant_mask.shape[0],solid_implant_mask.shape[1],solid_implant_mask.shape[2]); - + fprintf(stderr,"voxel_size = %g, U_min = %g, U_max = %g, r_frac = %g, n_segments = %ld\n", voxel_size, U_min, U_max, r_fraction, n_segments); float *rsqr_maxs_d = rsqr_maxs.data; float *profile_d = profile.data; - + // First pass computes some bounds -- possibly separate out to avoid repeating //loop_mask_start(implant_mask, solid_implant_mask, (maskin_buffer[:this_block_length], rsqr_maxs_d[:n_segments], Muvw[:16], bbox[:6]) ); if (mask_value) { auto [U,V,W,c] = hom_transform(Xs,Muvw); - + real_t r_sqr = V*V+W*W; real_t theta = atan2(V,W); @@ -359,25 +359,25 @@ void fill_implant_mask(const input_ndarray<mask_type> implant_mask, } } maskout_buffer[k] = solid_mask_value; - + //loop_mask_end(implant_mask); } void compute_front_mask(const input_ndarray<mask_type> solid_implant, const float voxel_size, - const matrix4x4 &Muvw, + const matrix4x4 &Muvw, std::array<float,6> bbox, output_ndarray<mask_type> front_mask) { const auto [U_min,U_max,V_min,V_max,W_min,W_max] = bbox; - loop_mask_start(solid_implant, front_mask, () ); + loop_mask_start(solid_implant, front_mask, () ); if (!mask_value) { auto [U,V,W,c] = hom_transform(Xs,Muvw); maskout_buffer[k] = W>W_min; } else maskout_buffer[k] = 0; - + loop_mask_end(solid_implant) } */ @@ -400,24 +400,24 @@ void cylinder_projection(const input_ndarray<float> edt, // Euclidean Distance ssize_t Cx = C.shape[0], Cy = C.shape[1], Cz = C.shape[2]; real_t edx = ex/real_t(Cx), edy = ey/real_t(Cy), edz = ex/real_t(Cz); - + ssize_t edt_length = ex*ey*ez; - ssize_t C_length = Cx*Cy*Cz; + ssize_t C_length = Cx*Cy*Cz; printf("Segmenting from %g to %g micrometers distance of implant.\n",d_min,d_max); printf("Bounding box is [U_min,U_max,V_min,V_max,W_min,W_max] = [[%g,%g],[%g,%g],[%g,%g]]\n", U_min,U_max,V_min,V_max,W_min,W_max); printf("EDT field is (%ld,%ld,%ld)\n",ex,ey,ez); - + real_t th_min = 1234, th_max = -1234; ssize_t n_shell = 0; ssize_t n_shell_bbox = 0; ssize_t block_height = 64; - + //TODO: new acc/openmp macro in parallel.hh - { + { float *image_d = image.data; int64_t *count_d = count.data; @@ -429,8 +429,8 @@ void cylinder_projection(const input_ndarray<float> edt, // Euclidean Distance ssize_t this_edt_length = min((block_height+2)*ey*ez,edt_length-block_start); //#pragma acc parallel loop copy(C_buffer[:this_block_length], image_d[:n_theta*n_U], count_d[:n_theta*n_U], bbox[:6], Muvw[:16], edt_block[:this_edt_length]) reduction(+:n_shell,n_shell_bbox) - #pragma omp parallel for reduction(+:n_shell,n_shell_bbox) - for (int64_t k = 0; k < this_block_length; k++) { + #pragma omp parallel for reduction(+:n_shell,n_shell_bbox) + for (int64_t k = 0; k < this_block_length; k++) { const int64_t flat_idx = block_start + k; const int64_t X = (flat_idx / (Cy*Cz)), Y = (flat_idx / Cz) % Cy, Z = flat_idx % Cz; // Integer indices: Cs[c,X,Y,Z] // Index into local block @@ -442,10 +442,10 @@ void cylinder_projection(const input_ndarray<float> edt, // Euclidean Distance printf("Block number k=%ld.\nX,Y,Z=%ld,%ld,%ld\nXl,Yl,Zl=%ld,%ld,%ld\nx,y,z=%.2f, %.2f, %.2f\n",k,X,Y,Z,Xl,Yl,Zl,x,y,z); abort(); } - + //****** MEAT OF THE IMPLEMENTATION IS HERE ****** real_t distance = resample2x2x2<float>(edt_block, {this_edt_length/(ey*ez),ey,ez}, {x,y,z}); - + if (distance > d_min && distance <= d_max) { // TODO: and W>w_min array<real_t,4> Xs = {X*voxel_size, Y*voxel_size, Z*voxel_size, 1}; auto [U,V,W,c] = hom_transform(Xs,Muvw); @@ -457,29 +457,29 @@ void cylinder_projection(const input_ndarray<float> edt, // Euclidean Distance if (theta >= theta_min && theta <= theta_max) { n_shell_bbox++; - + ssize_t theta_i = floor( (theta-theta_min) * (n_theta-1)/(theta_max-theta_min) ); ssize_t U_i = floor( (U - U_min) * (n_U -1)/( U_max- U_min) ); - + real_t p = C_buffer[k]/255.; - + assert(theta >= theta_min); assert(theta <= theta_max); assert(U >= U_min); - assert(U <= U_max); + assert(U <= U_max); assert(theta_i >= 0); assert(theta_i < n_theta); assert(U_i >= 0); - assert(U_i < n_U); - + assert(U_i < n_U); + if (p > 0) { th_min = min(theta,th_min); - th_max = max(theta,th_max); - + th_max = max(theta,th_max); + //atomic_statement() image_d[theta_i*n_U + U_i] += p; - - //atomic_statement() + + //atomic_statement() count_d[theta_i*n_U + U_i] += 1; } } @@ -490,5 +490,5 @@ void cylinder_projection(const input_ndarray<float> edt, // Euclidean Distance } printf("n_shell = %ld, n_shell_bbox = %ld\n",n_shell,n_shell_bbox); printf("theta_min, theta_max = %.2f,%.2f\n",theta_min,theta_max); - printf("th_min, th_max = %.2f,%.2f\n",th_min,th_max); + printf("th_min, th_max = %.2f,%.2f\n",th_min,th_max); } diff --git a/src/lib/cpp/gpu/geometry.cc b/src/lib/cpp/gpu/geometry.cc index 5c0ce52..891c579 100644 --- a/src/lib/cpp/gpu/geometry.cc +++ b/src/lib/cpp/gpu/geometry.cc @@ -7,7 +7,7 @@ using namespace std; #include "geometry.hh" array<real_t,3> center_of_mass(const input_ndarray<mask_type> voxels) { - // nvc++ doesn't support OpenACC 2.7 array reductions yet. + // nvc++ doesn't support OpenACC 2.7 array reductions yet. uint64_t cmx = 0, cmy = 0, cmz = 0; size_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; int64_t image_length = Nx*Ny*Nz; @@ -15,14 +15,14 @@ array<real_t,3> center_of_mass(const input_ndarray<mask_type> voxels) { print_timestamp("center_of_mass start"); uint64_t total_mass = 0; - + #pragma acc data copy(total_mass,cmx,cmy,cmz) { - for (int64_t block_start = 0; block_start < image_length; block_start += acc_block_size) { + for (int64_t block_start = 0; block_start < image_length; block_start += acc_block_size<mask_type>) { const mask_type *buffer = voxels.data + block_start; - ssize_t this_block_size = min(acc_block_size, image_length-block_start); + ssize_t this_block_size = min(acc_block_size<mask_type>, image_length-block_start); - #pragma acc data copyin(buffer[:this_block_size]) + #pragma acc data copyin(buffer[:this_block_size]) { #pragma acc parallel loop reduction(+:total_mass,cmx,cmy,cmz) for (int64_t k = 0; k < this_block_size; k++) { @@ -39,12 +39,12 @@ array<real_t,3> center_of_mass(const input_ndarray<mask_type> voxels) { } } } - real_t + real_t rcmx = cmx / ((real_t) total_mass), rcmy = cmy / ((real_t) total_mass), rcmz = cmz / ((real_t) total_mass); - - print_timestamp("center_of_mass end"); + + print_timestamp("center_of_mass end"); return array<real_t,3>{rcmx, rcmy, rcmz}; } @@ -55,19 +55,19 @@ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const arr Ixx = 0, Ixy = 0, Ixz = 0, Iyy = 0, Iyz = 0, Izz = 0; - + size_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; ssize_t image_length = Nx*Ny*Nz; print_timestamp("inertia_matrix start"); - #pragma acc data copy(Ixx, Ixy, Ixz, Iyy, Iyz, Izz) + #pragma acc data copy(Ixx, Ixy, Ixz, Iyy, Iyz, Izz) { - for (ssize_t block_start = 0; block_start < image_length; block_start += acc_block_size) { + for (ssize_t block_start = 0; block_start < image_length; block_start += acc_block_size<mask_type>) { const mask_type *buffer = voxels.data + block_start; - ssize_t this_block_size = min(acc_block_size, image_length - block_start); + ssize_t this_block_size = min(acc_block_size<mask_type>, image_length - block_start); - #pragma acc data copyin(buffer[:this_block_size]) + #pragma acc data copyin(buffer[:this_block_size]) { #pragma acc parallel loop reduction(+:Ixx,Iyy,Izz) reduction(-:Ixy,Ixz,Iyz) for (int64_t k = 0; k < this_block_size; k++) { //\if (buffer[k] != 0) @@ -75,20 +75,20 @@ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const arr // m guards this, and GPUs doesn't like branches //if (m != 0) - int64_t + int64_t flat_idx = block_start + k, X = flat_idx / (Ny * Nz), Y = ((flat_idx) / Nz) % Ny, Z = flat_idx % Nz; - - real_t - x = X - cm[0], - y = Y - cm[1], + + real_t + x = X - cm[0], + y = Y - cm[1], z = Z - cm[2]; - + Ixx += m * (y*y + z*z); Iyy += m * (x*x + z*z); - Izz += m * (x*x + y*y); + Izz += m * (x*x + y*y); Ixy -= m * x*y; Ixz -= m * x*z; Iyz -= m * y*z; @@ -106,20 +106,20 @@ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const arr }; } -/* TODO Only called in test.py. Postponed for now. +/* TODO Only called in test.py. Postponed for now. void integrate_axes(const input_ndarray<mask_type> &voxels, - const array<real_t,3> &x0, + const array<real_t,3> &x0, const array<real_t,3> &v_axis, const array<real_t,3> &w_axis, const real_t v_min, const real_t w_min, output_ndarray<real_t> output) { ssize_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; - ssize_t Nv = output.shape[0], Nw = output.shape[1]; + ssize_t Nv = output.shape[0], Nw = output.shape[1]; int64_t image_length = Nx*Ny*Nz; real_t *output_data = output.data; // TODO: Check v_axis & w_axis projections to certify bounds and get rid of runtime check - + for (ssize_t block_start = 0; block_start < image_length; block_start += acc_block_size) { const mask_type *buffer = voxels.data + block_start; int block_length = min(acc_block_size,image_length-block_start); diff --git a/src/pybind/geometry-pybind.cc b/src/pybind/geometry-pybind.cc index c22bf72..d82f749 100644 --- a/src/pybind/geometry-pybind.cc +++ b/src/pybind/geometry-pybind.cc @@ -2,15 +2,15 @@ namespace python_api { -array<real_t,3> center_of_mass(const np_maskarray &np_voxels){ +array<real_t, 3> center_of_mass(const np_maskarray &np_voxels){ auto voxels_info = np_voxels.request(); return ::center_of_mass({voxels_info.ptr, voxels_info.shape}); } -array<real_t,9> inertia_matrix(const np_maskarray &np_voxels, array<real_t,3>& cm){ +array<real_t, 9> inertia_matrix(const np_maskarray &np_voxels, array<real_t, 3>& cm){ auto voxels_info = np_voxels.request(); - + return ::inertia_matrix({voxels_info.ptr, voxels_info.shape}, cm); } @@ -21,19 +21,19 @@ void sample_plane(const np_array<voxel_type> &np_voxels, const real_t voxel_size, // In micrometers const array<real_t,3> cm, const array<real_t,3> u_axis, - const array<real_t,3> v_axis, + const array<real_t,3> v_axis, const array<real_t,4> bbox, // [umin,umax,vmin,vmax] in micrometers np_array<float> np_plane_samples) { auto voxels_info = np_voxels.request(); auto plane_samples_info = np_plane_samples.request(); - + sample_plane<voxel_type>({voxels_info.ptr, voxels_info.shape}, voxel_size, cm,u_axis,v_axis,bbox, {plane_samples_info.ptr, plane_samples_info.shape}); } - + void integrate_axes(const np_maskarray &np_voxels, - const array<real_t,3> &x0, + const array<real_t,3> &x0, const array<real_t,3> &v_axis, const array<real_t,3> &w_axis, const real_t v_min, const real_t w_min, @@ -52,10 +52,10 @@ void zero_outside_bbox(const array<real_t,9> &principal_axes, const array<real_t,3> &cm, // TOOD: Med eller uden voxelsize? np_maskarray &np_voxels) { auto voxels_info = np_voxels.request(); - + zero_outside_bbox(principal_axes, parameter_ranges, - cm, + cm, {voxels_info.ptr, voxels_info.shape}); } @@ -83,12 +83,12 @@ void fill_implant_mask(const np_maskarray implant_mask, void compute_front_mask(const np_array<uint8_t> &np_solid_implant, const float voxel_size, - const matrix4x4 &Muvw, + const matrix4x4 &Muvw, std::array<float,6> bbox, np_array<mask_type> &np_front_mask) { auto solid_implant_info = np_solid_implant.request(); auto front_mask_info = np_front_mask.request(); - + ::compute_front_mask({solid_implant_info.ptr, solid_implant_info.shape}, voxel_size, Muvw, bbox, {front_mask_info.ptr, front_mask_info.shape}); @@ -117,13 +117,13 @@ void cylinder_projection(const np_array<float> &np_edt, // Euclidean Distance }*/ } - + PYBIND11_MODULE(geometry, m) { m.doc() = "Voxel Geometry Module"; // optional module docstring m.def("center_of_mass", &python_api::center_of_mass); m.def("inertia_matrix", &python_api::inertia_matrix); - //m.def("integrate_axes", &python_api::integrate_axes); + //m.def("integrate_axes", &python_api::integrate_axes); //m.def("zero_outside_bbox", &python_api::zero_outside_bbox); //m.def("fill_implant_mask", &python_api::fill_implant_mask); //m.def("cylinder_projection", &python_api::cylinder_projection); diff --git a/src/test/test_geometry.py b/src/test/test_geometry.py index 4ddb051..6cc9f37 100644 --- a/src/test/test_geometry.py +++ b/src/test/test_geometry.py @@ -43,9 +43,9 @@ def compare_fs(func, baseline_f, cpu_f, gpu_f, should_assert=True): if should_assert: assert_with_print(baseline, cpu) gpu, gpu_t = run_with_warmup(gpu_f) - print (f'({func}) GPU ran in {gpu_t}, which is {baseline_t / gpu_t} times faster than sequential') + print (f'({func}) GPU ran in {gpu_t}, which is {baseline_t / gpu_t} times faster than sequential') if should_assert: assert_with_print(baseline, gpu) - + def test_center_of_mass(): voxels = np.random.randint(0, 256, (n,n,n), np.uint8) @@ -56,7 +56,7 @@ def test_center_of_mass(): compare_fs('center_of_mass', baseline, cpu, gpu) - + def test_inertia_matrix(): voxels = np.random.randint(0, 2, (n,n,n), np.uint8) cm = m_gpu.center_of_mass(voxels) @@ -64,10 +64,10 @@ def test_inertia_matrix(): baseline = partial(m_cpu_seq.inertia_matrix, voxels, cm) cpu = partial(m_cpu.inertia_matrix, voxels, cm) gpu = partial(m_gpu.inertia_matrix, voxels, cm) - + # TODO assert disabled due to floating point associativity error accumulation compare_fs('inertia_matrix', baseline, cpu, gpu, should_assert=False) - + if __name__ == '__main__': test_center_of_mass() test_inertia_matrix() \ No newline at end of file From 4a1e6c1496adafa1610d9c9de1f34be38bcc9840 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 17 Feb 2023 12:14:08 +0100 Subject: [PATCH 083/136] #25 I have stared into the abyss and it stared back --- src/Makefile | 30 ++++--- src/lib/cpp/cpu/geometry.cc | 33 ++------ src/lib/cpp/cpu_seq/geometry.cc | 38 +++++---- src/lib/cpp/gpu/geometry.cc | 48 ++--------- src/lib/cpp/include/boilerplate.hh | 127 ++++++++++++++++++++--------- src/lib/cpp/include/geometry.hh | 12 ++- src/pybind/geometry-pybind.cc | 21 ++++- 7 files changed, 163 insertions(+), 146 deletions(-) diff --git a/src/Makefile b/src/Makefile index 623eea2..7597ea2 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,35 +1,41 @@ # Define constants and collections PYTHON = python3.10 -PYBIND_FLAGS += $(shell $(PYTHON) -m pybind11 --include) -march=native -Wall -shared -fPIC -fopenmp -g -std=c++17 -O3 +PYBIND_FLAGS += $(shell $(PYTHON) -m pybind11 --include) -march=native -Wall -shared -fPIC -g -std=c++17 -O3 PYBIND_SUFFIX = $(shell $(PYTHON)-config --extension-suffix) + +# Detect OS for OS specific changes +ifeq ($(shell uname -s), Darwin) # Mac OSX +CXX = g++-12 # Use homebrew gcc, as system gcc is an alias for clang +CXXFLAGS += -undefined dynamic_lookup # https://pybind11.readthedocs.io/en/stable/compiling.html#building-manually +CLEANUP += $(TARGETS) $(foreach TARGET, $(TARGETS), $(TARGET).dSYM) # These are also generated on Mac +endif + CPP_FOLDER=lib/cpp #CXXFLAGS += -I../contrib/cpptqdm/ -Iinclude CXXFLAGS += -I$(CPP_FOLDER)/include -PLATFORMS=cpu_seq cpu gpu +PLATFORMS=cpu_seq cpu +cpu_seq_CXX=$(CXX) +cpu_CXX=$(cpu_seq_CXX) +cpu_FLAGS=-fopenmp + LIBS=io geometry morphology TARGETS = $(foreach PLATFORM, $(PLATFORMS), $(foreach LIB, $(LIBS), $(CPP_FOLDER)/$(PLATFORM)/$(LIB)$(PYBIND_SUFFIX))) CLEANUP = $(TARGETS) $(foreach PLATFORM, $(PLATFORMS), $(CPP_FOLDER)/$(PLATFORM)/__pycache__) # Detect if OpenACC can be used ifneq (, $(shell which nvc++)) -CXX = nvc++ -CXXFLAGS += -acc=gpu -Minfo=accel -tp=native +PLATFORMS += gpu +gpu_CXX = nvc++ +gpu_FLAGS = -acc=gpu -Minfo=accel -tp=native else $(info OpenACC compiler nvc++ not found. Compiling without.) endif -# Detect OS for OS specific changes -ifeq ($(shell uname -s), Darwin) # Mac OSX -CXX = g++-12 # Use homebrew gcc, as system gcc is an alias for clang -CXXFLAGS += -undefined dynamic_lookup # https://pybind11.readthedocs.io/en/stable/compiling.html#building-manually -CLEANUP += $(TARGETS) $(foreach TARGET, $(TARGETS), $(TARGET).dSYM) # These are also generated on Mac -endif - all: $(TARGETS) define GEN_RULE $(CPP_FOLDER)/$(PLATFORM)/$(LIB)$(PYBIND_SUFFIX): pybind/$(LIB)-pybind.cc $(CPP_FOLDER)/$(PLATFORM)/$(LIB).cc $(CPP_FOLDER)/include/*.hh - $(CXX) $(CXXFLAGS) $(PYBIND_FLAGS) -I$(CPP_FOLDER)/$(PLATFORM) $$< -o $(CPP_FOLDER)/$(PLATFORM)/$(LIB)$(PYBIND_SUFFIX) + $($(PLATFORM)_CXX) $($(PLATFORM)_FLAGS) $(CXXFLAGS) $(PYBIND_FLAGS) -I$(CPP_FOLDER)/$(PLATFORM) $$< -o $(CPP_FOLDER)/$(PLATFORM)/$(LIB)$(PYBIND_SUFFIX) endef $(foreach PLATFORM, $(PLATFORMS), \ diff --git a/src/lib/cpp/cpu/geometry.cc b/src/lib/cpp/cpu/geometry.cc index 1d0340b..728e933 100644 --- a/src/lib/cpp/cpu/geometry.cc +++ b/src/lib/cpp/cpu/geometry.cc @@ -5,35 +5,12 @@ using namespace std; #include "geometry.hh" +#include "../cpu_seq/geometry.cc" -array<real_t,3> center_of_mass(const input_ndarray<mask_type> voxels) { - uint64_t cmx = 0, cmy = 0, cmz = 0; - size_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; - int64_t image_length = Nx*Ny*Nz; +namespace cpu_par { - print_timestamp("center_of_mass start"); - - uint64_t total_mass = 0; - - #pragma omp parallel for reduction(+:total_mass,cmx,cmy,cmz) - for (int64_t k = 0; k < image_length; k++) { - mask_type m = voxels.data[k]; - - int64_t x = k / (Ny*Nz); - int64_t y = (k / Nz) % Ny; - int64_t z = k % Nz; - - total_mass += m; - cmx += m*x; cmy += m*y; cmz += m*z; - } - real_t - rcmx = cmx / ((real_t) total_mass), - rcmy = cmy / ((real_t) total_mass), - rcmz = cmz / ((real_t) total_mass); - - print_timestamp("center_of_mass end"); - - return array<real_t,3>{ rcmx, rcmy, rcmz }; +array<real_t,3> center_of_mass(const input_ndarray<mask_type> &mask) { + return cpu_seq::center_of_mass(mask); } array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const array<real_t,3> &cm) { @@ -79,4 +56,6 @@ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const arr Ixy, Iyy, Iyz, Ixz, Iyz, Izz }; +} + } \ No newline at end of file diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index a1e9ef2..609970c 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -3,34 +3,35 @@ #include <inttypes.h> #include <stdio.h> #include <math.h> -using namespace std; -#include "geometry.hh" #include "boilerplate.hh" +#include "geometry.hh" + +using namespace std; +namespace cpu_seq { -array<real_t, 3> center_of_mass(const input_ndarray<mask_type> voxels) { - unpack_numpy(voxels); +array<real_t, 3> center_of_mass(const input_ndarray<mask_type> &mask) { + UNPACK_NUMPY(mask); print_timestamp("center_of_mass start"); - uint64_t cmz = 0, cmy = 0, cmx = 0; - uint64_t total_mass = 0; + uint64_t total_mass = 0, cmz = 0, cmy = 0, cmx = 0; - for_3d_begin(voxels); + BLOCK_BEGIN(mask, reduction(+:total_mass,cmz,cmy,cmx)); { - mask_type m = voxels.data[flat_index]; + mask_type m = mask_buffer[flat_index]; - total_mass += m; - cmx += m * x; - cmy += m * y; - cmz += m * z; + total_mass += m; + cmz += m * z; + cmy += m * y; + cmx += m * x; - for_3d_end(); + } BLOCK_END(); real_t - rcmx = cmx / ((real_t) total_mass), + rcmz = cmz / ((real_t) total_mass), rcmy = cmy / ((real_t) total_mass), - rcmz = cmz / ((real_t) total_mass); + rcmx = cmx / ((real_t) total_mass); print_timestamp("center_of_mass end"); @@ -80,6 +81,8 @@ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const arr }; } +} + /* TODO only called in test.py. Postponed for now. void integrate_axes(const input_ndarray<mask_type> &voxels, const array<real_t,3> &x0, @@ -116,7 +119,6 @@ void integrate_axes(const input_ndarray<mask_type> &voxels, } } } -*/ bool in_bbox(float U, float V, float W, const std::array<float,6> bbox) { const auto& [U_min,U_max,V_min,V_max,W_min,W_max] = bbox; @@ -259,7 +261,6 @@ void zero_outside_bbox(const array<real_t,9> &principal_axes, } } } -*/ inline vector4 hom_transform(const vector4 &x, const matrix4x4 &M) { vector4 c{{0,0,0,0}}; @@ -380,7 +381,6 @@ void compute_front_mask(const input_ndarray<mask_type> solid_implant, loop_mask_end(solid_implant) } -*/ void cylinder_projection(const input_ndarray<float> edt, // Euclidean Distance Transform in um, should be low-resolution (will be interpolated) const input_ndarray<uint8_t> C, // Material classification images (probability per voxel, 0..1 -> 0..255) @@ -492,3 +492,5 @@ void cylinder_projection(const input_ndarray<float> edt, // Euclidean Distance printf("theta_min, theta_max = %.2f,%.2f\n",theta_min,theta_max); printf("th_min, th_max = %.2f,%.2f\n",th_min,th_max); } + +*/ \ No newline at end of file diff --git a/src/lib/cpp/gpu/geometry.cc b/src/lib/cpp/gpu/geometry.cc index 891c579..6d75136 100644 --- a/src/lib/cpp/gpu/geometry.cc +++ b/src/lib/cpp/gpu/geometry.cc @@ -5,48 +5,12 @@ using namespace std; #include "geometry.hh" +#include "../cpu_seq/geometry.cc" -array<real_t,3> center_of_mass(const input_ndarray<mask_type> voxels) { - // nvc++ doesn't support OpenACC 2.7 array reductions yet. - uint64_t cmx = 0, cmy = 0, cmz = 0; - size_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; - int64_t image_length = Nx*Ny*Nz; - - print_timestamp("center_of_mass start"); - - uint64_t total_mass = 0; - - #pragma acc data copy(total_mass,cmx,cmy,cmz) - { - for (int64_t block_start = 0; block_start < image_length; block_start += acc_block_size<mask_type>) { - const mask_type *buffer = voxels.data + block_start; - ssize_t this_block_size = min(acc_block_size<mask_type>, image_length-block_start); - - #pragma acc data copyin(buffer[:this_block_size]) - { - #pragma acc parallel loop reduction(+:total_mass,cmx,cmy,cmz) - for (int64_t k = 0; k < this_block_size; k++) { - mask_type m = buffer[k]; - - int64_t flat_idx = block_start + k; - int64_t x = flat_idx / (Ny*Nz); - int64_t y = (flat_idx / Nz) % Ny; - int64_t z = flat_idx % Nz; - - total_mass += m; - cmx += m*x; cmy += m*y; cmz += m*z; - } - } - } - } - real_t - rcmx = cmx / ((real_t) total_mass), - rcmy = cmy / ((real_t) total_mass), - rcmz = cmz / ((real_t) total_mass); +namespace gpu { - print_timestamp("center_of_mass end"); - - return array<real_t,3>{rcmx, rcmy, rcmz}; +array<real_t,3> center_of_mass(const input_ndarray<mask_type> &mask) { + return cpu_seq::center_of_mass(mask); } array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const array<real_t,3> &cm) { @@ -146,4 +110,6 @@ void integrate_axes(const input_ndarray<mask_type> &voxels, } } } -*/ \ No newline at end of file +*/ + +} \ No newline at end of file diff --git a/src/lib/cpp/include/boilerplate.hh b/src/lib/cpp/include/boilerplate.hh index b71c6c0..0b5b1bc 100644 --- a/src/lib/cpp/include/boilerplate.hh +++ b/src/lib/cpp/include/boilerplate.hh @@ -1,51 +1,98 @@ #ifndef boilerplate_h #define boilerplate_h -// TODO it seems like vscode doesn't pick this up. -/// \def for_block_begin(arr) -/// Inserts boilerplate code for accessing \a arr in a blocked (chunked) manner. -#define for_block_begin(arr) \ - for (int64_t block_start = 0; block_start < arr##_length; block_start += acc_block_size<arr##_type>) { \ - const arr##_type *arr##_buffer = arr.data + block_start; \ - ssize_t arr##_buffer_length = min(acc_block_size<arr##_type>, arr##_length-block_start); \ - _Pragma(STR(acc data copyin(arr##_buffer[:arr##_buffer_length]))) \ - { \ - -#define for_block_end() } } - -#define for_3d_begin(arr) \ - for (int64_t z = 0; z < arr##_Nz; z++) { \ - for (int64_t y = 0; y < arr##_Ny; y++) { \ - for (int64_t x = 0; x < arr##_Nx; x++) { \ - int64_t flat_index = z*arr##_Ny*arr##_Nx + y*arr##_Nx + x; - -#define for_3d_end() }}} - -#define for_flat_begin_1(arr) for_flat_begin(arr, arr) -#define for_flat_begin_2(arr, global_prefix) \ - for (int64_t flat_index = 0; flat_index < arr##_length; flat_index++) { \ +// Gaze upon the glory of 3-layered macros for building string literals for _Pragma +#define STRINGIFY(X) #X +#define TOKEN_COMBINER(X) STRINGIFY(X) +#define PRAGMA(X) _Pragma(TOKEN_COMBINER(X)) + +#ifdef _OPENACC +#define PARALLEL_TERM acc parallel loop +#else +#ifdef _OPENMP +#define PARALLEL_TERM omp parallel for +#else +#define PARALLEL_TERM +#endif +#endif + +// TODO attempt at docstring; not quite working. + +/// Inserts boilerplate code for accessing the given parameter, ARR, in a blocked (chunked) manner. +/// Following this call, the following variables will be exposed: +/// +/// - `block_start`: the address of the current block. +/// +/// @param ARR The array that will be accessed. +#define FOR_BLOCK_BEGIN(ARR) \ + for (int64_t ARR##_buffer_start = 0; ARR##_buffer_start < ARR##_length; ARR##_buffer_start += acc_block_size<ARR##_type>) { \ + const ARR##_type *ARR##_buffer = ARR.data + ARR##_buffer_start; \ + ssize_t ARR##_buffer_length = min(acc_block_size<ARR##_type>, ARR##_length-ARR##_buffer_start); \ + PRAGMA(acc data copyin(ARR##_buffer[:ARR##_buffer_length])) \ + { + +#define FOR_BLOCK_END() } } + +#define FOR_3D_BEGIN(ARR, EXTRA_PRAGMA_CLAUSE) \ + PRAGMA(PARALLEL_TERM collapse(3) EXTRA_PRAGMA_CLAUSE) \ + for (int64_t z = 0; z < ARR##_Nz; z++) { \ + for (int64_t y = 0; y < ARR##_Ny; y++) { \ + for (int64_t x = 0; x < ARR##_Nx; x++) { \ + +#define FOR_3D_END() }}} + +#define FOR_FLAT_BEGIN(ARR, global_prefix, EXTRA_PRAGMA_CLAUSE) \ + PRAGMA(PARALLEL_TERM EXTRA_PRAGMA_CLAUSE) \ + for (int64_t flat_index = 0; flat_index < ARR##_length; flat_index++) { \ int64_t \ - global_prefix##_index = arr##_start + flat_index \ - z = global_prefix##_index / (arr##_Ny*arr##_Nx), \ - y = (global_prefix##_index / arr##_Nx) % arr##_Ny, \ - x = global_prefix##_index % arr##_Nx; + global_prefix##_index = ARR##_start + flat_index, \ + z = global_prefix##_index / (ARR##_Ny * ARR##_Nx), \ + y = (global_prefix##_index / ARR##_Nx) % ARR##_Ny, \ + x = global_prefix##_index % ARR##_Nx; + +#define FOR_FLAT_END() } + +#define PUSH_N_DOWN_TO_BUFFER(ARR) \ + ssize_t \ + ARR##_buffer_Nz = ARR##_Nz, \ + ARR##_buffer_Ny = ARR##_Ny, \ + ARR##_buffer_Nx = ARR##_Nx; + +#ifdef _OPENACC +#define BLOCK_BEGIN(ARR, EXTRA_PRAGMA_CLAUSE) \ + FOR_BLOCK_BEGIN(ARR) \ + PUSH_N_DOWN_TO_BUFFER(ARR) \ + FOR_FLAT_BEGIN(ARR##_buffer, global, EXTRA_PRAGMA_CLAUSE) + +#define BLOCK_END() \ + FOR_FLAT_END() \ + FOR_BLOCK_END() +#else +#ifdef _OPENMP // Should also capture OpenACC, which is why it's second. +#define BLOCK_BEGIN(ARR, EXTRA_PRAGMA_CLAUSE) \ + const ARR##_type *ARR##_buffer = ARR.data; \ + FOR_3D_BEGIN(ARR, EXTRA_PRAGMA_CLAUSE) \ + int64_t flat_index = z*ARR##_Ny*ARR##_Nx + y*ARR##_Nx + x; -#define for_flat_end() } +#define BLOCK_END() FOR_3D_END() +#else +#define BLOCK_BEGIN(ARR, EXTRA_PRAGMA_CLAUSE) \ + int64_t flat_index = 0; \ + const ARR##_type *ARR##_buffer = ARR.data; \ + FOR_3D_BEGIN(ARR, EXTRA_PRAGMA_CLAUSE) -// TODO I'm not sure this'll expand right. -#define for_flat_block_begin(arr) \ - for_block_begin(arr) \ - for_flat_begin_2(arr##_buffer, global) +#define BLOCK_END() \ + flat_index++; \ + FOR_3D_END() -#define for_flat_block_end() \ - for_flat_end() \ - for_block_end() +#endif +#endif -#define unpack_numpy(arr) \ +#define UNPACK_NUMPY(ARR) \ ssize_t \ - arr##_Nz = arr.shape[0], \ - arr##_Ny = arr.shape[1], \ - arr##_Nx = arr.shape[2], \ - arr##_length = arr##_Nz*arr##_Ny*arr##_Nx; + ARR##_Nz = ARR.shape[0], \ + ARR##_Ny = ARR.shape[1], \ + ARR##_Nx = ARR.shape[2], \ + ARR##_length = ARR##_Nz*ARR##_Ny*ARR##_Nx; #endif \ No newline at end of file diff --git a/src/lib/cpp/include/geometry.hh b/src/lib/cpp/include/geometry.hh index 2f448ef..25bef7e 100644 --- a/src/lib/cpp/include/geometry.hh +++ b/src/lib/cpp/include/geometry.hh @@ -10,18 +10,20 @@ using namespace std; #define dot(a,b) (a[0]*b[0] + a[1]*b[1] + a[2]*b[2]) void print_timestamp(string message) { - //auto now = chrono::system_clock::to_time_t(chrono::system_clock::now()); - //tm local_tm = *localtime(&now); - //fprintf(stderr,"%s at %02d:%02d:%02d\n", message.c_str(), local_tm.tm_hour, local_tm.tm_min, local_tm.tm_sec); + auto now = chrono::system_clock::to_time_t(chrono::system_clock::now()); + tm local_tm = *localtime(&now); + fprintf(stderr,"%s at %02d:%02d:%02d\n", message.c_str(), local_tm.tm_hour, local_tm.tm_min, local_tm.tm_sec); } +namespace NS { + /* Computes the center of mass of the given tomography. @param voxels The given tomography. @returns The 3D coordinates of the center of mass (in Z, Y, X). */ -array<real_t,3> center_of_mass(const input_ndarray<mask_type> voxels); +array<real_t,3> center_of_mass(const input_ndarray<mask_type> &voxels); /* Computes the inertia matrix of the given tomography based of the given center of mass. @@ -32,4 +34,6 @@ Computes the inertia matrix of the given tomography based of the given center of */ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const array<real_t,3> &cm); +} + #endif \ No newline at end of file diff --git a/src/pybind/geometry-pybind.cc b/src/pybind/geometry-pybind.cc index d82f749..d52c83c 100644 --- a/src/pybind/geometry-pybind.cc +++ b/src/pybind/geometry-pybind.cc @@ -1,17 +1,30 @@ +#ifdef _OPENACC +#warning "Using GPU" +#define NS gpu +#else +#ifdef _OPENMP +#warning "Using OpenMP" +#define NS cpu_par +#else +#warning "Using sequential" +#define NS cpu_seq +#endif +#endif + #include "geometry.cc" namespace python_api { -array<real_t, 3> center_of_mass(const np_maskarray &np_voxels){ +array<real_t, 3> center_of_mass(const np_maskarray &np_voxels) { auto voxels_info = np_voxels.request(); - return ::center_of_mass({voxels_info.ptr, voxels_info.shape}); + return NS::center_of_mass({voxels_info.ptr, voxels_info.shape}); } -array<real_t, 9> inertia_matrix(const np_maskarray &np_voxels, array<real_t, 3>& cm){ +array<real_t, 9> inertia_matrix(const np_maskarray &np_voxels, array<real_t, 3> &cm) { auto voxels_info = np_voxels.request(); - return ::inertia_matrix({voxels_info.ptr, voxels_info.shape}, cm); + return NS::inertia_matrix({voxels_info.ptr, voxels_info.shape}, cm); } /* From 51943cb9a2621c8931a5656a81552ddd48e37b5d Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 17 Feb 2023 16:41:58 +0100 Subject: [PATCH 084/136] #25 implemented single source inertia matrix --- src/lib/cpp/cpu/geometry.cc | 45 ++------------------------ src/lib/cpp/cpu_seq/geometry.cc | 48 +++++++++++++-------------- src/lib/cpp/gpu/geometry.cc | 57 ++------------------------------- 3 files changed, 27 insertions(+), 123 deletions(-) diff --git a/src/lib/cpp/cpu/geometry.cc b/src/lib/cpp/cpu/geometry.cc index 728e933..8da364d 100644 --- a/src/lib/cpp/cpu/geometry.cc +++ b/src/lib/cpp/cpu/geometry.cc @@ -13,49 +13,8 @@ array<real_t,3> center_of_mass(const input_ndarray<mask_type> &mask) { return cpu_seq::center_of_mass(mask); } -array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const array<real_t,3> &cm) { - real_t - Ixx = 0, Ixy = 0, Ixz = 0, - Iyy = 0, Iyz = 0, - Izz = 0; - - ssize_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; - - print_timestamp("inertia_matrix_serial start"); - - #pragma omp parallel for collapse(3) reduction(+:Ixx,Iyy,Izz) reduction(-:Ixy,Ixz,Iyz) - for (int64_t X = 0; X < Nx; X++) { - for (int64_t Y = 0; Y < Ny; Y++) { - for (int64_t Z = 0; Z < Nz; Z++) { - - // TODO shouldn't the loops be interchanged to match the access pattern? (Naming-wise, that is) - int64_t k = X*Ny*Nz + Y*Nz + Z; - mask_type m = voxels.data[k]; - - // m guards this, and this removes branches - // if (m != 0) - real_t - x = X - cm[0], - y = Y - cm[1], - z = Z - cm[2]; - - Ixx += m * (y*y + z*z); - Iyy += m * (x*x + z*z); - Izz += m * (x*x + y*y); - Ixy -= m * x*y; - Ixz -= m * x*z; - Iyz -= m * y*z; - } - } - } - - print_timestamp("inertia_matrix_serial end"); - - return array<real_t,9> { - Ixx, Ixy, Ixz, - Ixy, Iyy, Iyz, - Ixz, Iyz, Izz - }; +array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &mask, const array<real_t,3> &cm) { + return cpu_seq::inertia_matrix(mask, cm); } } \ No newline at end of file diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index 609970c..2901829 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -18,6 +18,7 @@ array<real_t, 3> center_of_mass(const input_ndarray<mask_type> &mask) { uint64_t total_mass = 0, cmz = 0, cmy = 0, cmx = 0; BLOCK_BEGIN(mask, reduction(+:total_mass,cmz,cmy,cmx)); { + // TODO James approves; now RUN! mask_type m = mask_buffer[flat_index]; @@ -38,39 +39,35 @@ array<real_t, 3> center_of_mass(const input_ndarray<mask_type> &mask) { return array<real_t, 3>{ rcmz, rcmy, rcmx }; } -array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const array<real_t,3> &cm) { +array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &mask, const array<real_t,3> &cm) { + UNPACK_NUMPY(mask); + real_t Ixx = 0, Ixy = 0, Ixz = 0, Iyy = 0, Iyz = 0, Izz = 0; - ssize_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; - print_timestamp("inertia_matrix_serial start"); - int64_t k = 0; - for (int64_t X = 0; X < Nx; X++) { - for (int64_t Y = 0; Y < Ny; Y++) { - for (int64_t Z = 0; Z < Nz; Z++) { - mask_type m = voxels.data[k]; - k++; + BLOCK_BEGIN(mask, reduction(+:Ixx, Iyy, Izz) reduction(-:Ixy,Ixz,Iyz)) { - // m guards this, and then branches are removed - //if (m != 0) - real_t - x = X - cm[0], - y = Y - cm[1], - z = Z - cm[2]; - - Ixx += m * (y*y + z*z); - Iyy += m * (x*x + z*z); - Izz += m * (x*x + y*y); - Ixy -= m * x*y; - Ixz -= m * x*z; - Iyz -= m * y*z; - } - } - } + mask_type m = mask_buffer[flat_index]; + + // m guards this, and then branches are removed + //if (m != 0) + real_t + X = x - cm[0], + Y = y - cm[1], + Z = z - cm[2]; + + Ixx += m * (Y*Y + Z*Z); + Iyy += m * (X*X + Z*Z); + Izz += m * (X*X + Y*Y); + Ixy -= m * X*Y; + Ixz -= m * X*Z; + Iyz -= m * Y*Z; + + } BLOCK_END(); print_timestamp("inertia_matrix_serial end"); @@ -79,6 +76,7 @@ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const arr Ixy, Iyy, Iyz, Ixz, Iyz, Izz }; + } } diff --git a/src/lib/cpp/gpu/geometry.cc b/src/lib/cpp/gpu/geometry.cc index 6d75136..5d54e79 100644 --- a/src/lib/cpp/gpu/geometry.cc +++ b/src/lib/cpp/gpu/geometry.cc @@ -13,61 +13,8 @@ array<real_t,3> center_of_mass(const input_ndarray<mask_type> &mask) { return cpu_seq::center_of_mass(mask); } -array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const array<real_t,3> &cm) { - // nvc++ doesn't support OpenACC 2.7 array reductions yet, so must name each element. - real_t - Ixx = 0, Ixy = 0, Ixz = 0, - Iyy = 0, Iyz = 0, - Izz = 0; - - size_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; - ssize_t image_length = Nx*Ny*Nz; - - print_timestamp("inertia_matrix start"); - - #pragma acc data copy(Ixx, Ixy, Ixz, Iyy, Iyz, Izz) - { - for (ssize_t block_start = 0; block_start < image_length; block_start += acc_block_size<mask_type>) { - const mask_type *buffer = voxels.data + block_start; - ssize_t this_block_size = min(acc_block_size<mask_type>, image_length - block_start); - - #pragma acc data copyin(buffer[:this_block_size]) - { - #pragma acc parallel loop reduction(+:Ixx,Iyy,Izz) reduction(-:Ixy,Ixz,Iyz) - for (int64_t k = 0; k < this_block_size; k++) { //\if (buffer[k] != 0) - mask_type m = buffer[k]; - - // m guards this, and GPUs doesn't like branches - //if (m != 0) - int64_t - flat_idx = block_start + k, - X = flat_idx / (Ny * Nz), - Y = ((flat_idx) / Nz) % Ny, - Z = flat_idx % Nz; - - real_t - x = X - cm[0], - y = Y - cm[1], - z = Z - cm[2]; - - Ixx += m * (y*y + z*z); - Iyy += m * (x*x + z*z); - Izz += m * (x*x + y*y); - Ixy -= m * x*y; - Ixz -= m * x*z; - Iyz -= m * y*z; - } - } - } - } - - print_timestamp("inertia_matrix end"); - - return array<real_t,9> { - Ixx, Ixy, Ixz, - Ixy, Iyy, Iyz, - Ixz, Iyz, Izz - }; +array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &mask, const array<real_t,3> &cm) { + return cpu_seq::inertia_matrix(mask, cm); } /* TODO Only called in test.py. Postponed for now. From 0f39d2719fa2f5dd027709d083a11c4f851c4d58 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Sun, 19 Feb 2023 14:09:32 +0100 Subject: [PATCH 085/136] #25 Moved geometry::in_bbox --- src/lib/cpp/cpu/geometry.cc | 4 +++ src/lib/cpp/cpu_seq/geometry.cc | 64 ++++++++++----------------------- src/lib/cpp/gpu/geometry.cc | 4 +++ src/lib/cpp/include/geometry.hh | 2 ++ 4 files changed, 28 insertions(+), 46 deletions(-) diff --git a/src/lib/cpp/cpu/geometry.cc b/src/lib/cpp/cpu/geometry.cc index 8da364d..6a6e615 100644 --- a/src/lib/cpp/cpu/geometry.cc +++ b/src/lib/cpp/cpu/geometry.cc @@ -13,6 +13,10 @@ array<real_t,3> center_of_mass(const input_ndarray<mask_type> &mask) { return cpu_seq::center_of_mass(mask); } +bool in_bbox(float U, float V, float W, const std::array<float, 6> &bbox) { + return cpu_seq::in_bbox(U, V, W, bbox); +} + array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &mask, const array<real_t,3> &cm) { return cpu_seq::inertia_matrix(mask, cm); } diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index 2901829..08dfbea 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -39,6 +39,23 @@ array<real_t, 3> center_of_mass(const input_ndarray<mask_type> &mask) { return array<real_t, 3>{ rcmz, rcmy, rcmx }; } +bool in_bbox(float U, float V, float W, const std::array<float, 6> &bbox) { + const auto& [U_min, U_max, V_min, V_max, W_min, W_max] = bbox; + + bool inside = + U >= U_min && + U <= U_max && + V >= V_min && + V <= V_max && + W >= W_min && + W <= W_max; + + // printf("in_bbox: (%.1f,%.1f,%.1f) \in ([%.1f,%.1f],[%.1f,%.1f],[%.1f,%.1f]) == %d\n", + // U,V,W,U_min,U_max,V_min,V_max,U_min,U_max,inside); + + return inside; +} + array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &mask, const array<real_t,3> &cm) { UNPACK_NUMPY(mask); @@ -81,52 +98,7 @@ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &mask, const array } -/* TODO only called in test.py. Postponed for now. -void integrate_axes(const input_ndarray<mask_type> &voxels, - const array<real_t,3> &x0, - const array<real_t,3> &v_axis, - const array<real_t,3> &w_axis, - const real_t v_min, const real_t w_min, - output_ndarray<real_t> output) { - ssize_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; - ssize_t Nv = output.shape[0], Nw = output.shape[1]; - int64_t image_length = Nx*Ny*Nz; - real_t *output_data = output.data; - - // TODO: Check v_axis & w_axis projections to certify bounds and get rid of runtime check - int64_t k = 0: - for (int64_t X = 0; X < Nx; X++) { - for (int64_t Y = 0; Y < Ny; Y++) { - for (int64_t Z = 0; Z < Nz; Z++) { - if (buffer[k] != 0) { - real_t xs[3] = { - (flat_idx / (Ny*Nz)) - x0[0], // x - ((flat_idx / Nz) % Ny) - x0[1], // y - (flat_idx % Nz) - x0[2] }; // z - - mask_type voxel = buffer[k]; - real_t v = dot(xs, v_axis), w = dot(xs,w_axis); - int64_t i_v = round(v-v_min), j_w = round(w-w_min); - - if (i_v >= 0 && j_w >= 0 && i_v < Nv && j_w < Nw) { - output_data[i_v*Nw + j_w] += voxel; - } - } - k++; - } - } - } -} - -bool in_bbox(float U, float V, float W, const std::array<float,6> bbox) { - const auto& [U_min,U_max,V_min,V_max,W_min,W_max] = bbox; - - bool inside = U>=U_min && U<=U_max && V>=V_min && V<=V_max && W>=W_min && W<=W_max; - - // printf("in_bbox: (%.1f,%.1f,%.1f) \in ([%.1f,%.1f],[%.1f,%.1f],[%.1f,%.1f]) == %d\n", - // U,V,W,U_min,U_max,V_min,V_max,U_min,U_max,inside); - return inside; -} +/* template<typename field_type> float resample2x2x2(const field_type *voxels, const array<ssize_t,3> &shape, diff --git a/src/lib/cpp/gpu/geometry.cc b/src/lib/cpp/gpu/geometry.cc index 5d54e79..2192c7f 100644 --- a/src/lib/cpp/gpu/geometry.cc +++ b/src/lib/cpp/gpu/geometry.cc @@ -13,6 +13,10 @@ array<real_t,3> center_of_mass(const input_ndarray<mask_type> &mask) { return cpu_seq::center_of_mass(mask); } +bool in_bbox(float U, float V, float W, const std::array<float, 6> &bbox) { + return cpu_seq::in_bbox(U, V, W, bbox); +} + array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &mask, const array<real_t,3> &cm) { return cpu_seq::inertia_matrix(mask, cm); } diff --git a/src/lib/cpp/include/geometry.hh b/src/lib/cpp/include/geometry.hh index 25bef7e..07cb1dd 100644 --- a/src/lib/cpp/include/geometry.hh +++ b/src/lib/cpp/include/geometry.hh @@ -25,6 +25,8 @@ Computes the center of mass of the given tomography. */ array<real_t,3> center_of_mass(const input_ndarray<mask_type> &voxels); +bool in_bbox(float U, float V, float W, const std::array<float, 6> &bbox); + /* Computes the inertia matrix of the given tomography based of the given center of mass. From 4080522092c109fbe46ac3879f93a05148acf98a Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 3 Mar 2023 13:52:14 +0100 Subject: [PATCH 086/136] #25 Moved geometry::sample_plane and resample2x2x2 --- src/lib/cpp/cpu/geometry.cc | 19 ++++++ src/lib/cpp/cpu_seq/geometry.cc | 82 ++++++++++++++---------- src/lib/cpp/gpu/geometry.cc | 18 ++++++ src/lib/cpp/include/geometry.hh | 14 ++++ src/processing_steps/0700_implant_FoR.py | 2 +- src/pybind/geometry-pybind.cc | 20 +++--- 6 files changed, 111 insertions(+), 44 deletions(-) diff --git a/src/lib/cpp/cpu/geometry.cc b/src/lib/cpp/cpu/geometry.cc index 6a6e615..4548dd7 100644 --- a/src/lib/cpp/cpu/geometry.cc +++ b/src/lib/cpp/cpu/geometry.cc @@ -9,6 +9,7 @@ using namespace std; namespace cpu_par { +// TODO look at function aliasing. Currently doesn't work, as it clashes with the header file prototype. array<real_t,3> center_of_mass(const input_ndarray<mask_type> &mask) { return cpu_seq::center_of_mass(mask); } @@ -21,4 +22,22 @@ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &mask, const array return cpu_seq::inertia_matrix(mask, cm); } +template <typename T> +float resample2x2x2(const T *voxels, + const array<ssize_t, 3> &shape, + const array<float, 3> &X) { + return cpu_seq::resample2x2x2(voxels, shape, X); +} + +template <typename T> +void sample_plane(const input_ndarray<T> &voxels, + const real_t voxel_size, // In micrometers + const array<real_t, 3> cm, + const array<real_t, 3> u_axis, + const array<real_t, 3> v_axis, + const array<real_t, 4> bbox, // [umin,umax,vmin,vmax] in micrometers + output_ndarray<real_t> plane_samples) { + return cpu_seq::sample_plane(voxels, voxel_size, cm, u_axis, v_axis, bbox, plane_samples); +} + } \ No newline at end of file diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index 08dfbea..c79ca2d 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -93,41 +93,38 @@ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &mask, const array Ixy, Iyy, Iyz, Ixz, Iyz, Izz }; - -} - } -/* +template <typename T> +float resample2x2x2(const T *voxels, + const array<ssize_t, 3> &shape, + const array<float, 3> &X) { + auto [Nz,Ny,Nx] = shape; -template<typename field_type> float resample2x2x2(const field_type *voxels, - const array<ssize_t,3> &shape, - const array<float,3> &X) { - auto [Nx,Ny,Nz] = shape; // Eller omvendt? - if (!in_bbox(X[0],X[1],X[2], {0.5,Nx-1.5, 0.5,Ny-1.5, 0.5,Nz-1.5})) { - uint64_t voxel_index = floor(X[0])*Ny*Nz+floor(X[1])*Ny+floor(X[2]); + if (!in_bbox(X[0], X[1], X[2], {0.5f, Nx-0.5f, 0.5f, Ny-0.5f, 0.5f, Nz-0.5f})) { + uint64_t voxel_index = floor(X[0])*Ny*Nz + floor(X[1])*Ny + floor(X[2]); return voxels[voxel_index]; } + float Xfrac[2][3]; // {Xminus[3], Xplus[3]} - int64_t Xint[2][3]; // {Iminus[3], Iplus[3]} + int64_t Xint[2][3]; // {Iminus[3], Iplus[3]} float value = 0; for (int i = 0; i < 3; i++) { - double Iminus, Iplus; - Xfrac[0][i] = 1-modf(X[i]-0.5, &Iminus); // 1-{X[i]-1/2}, floor(X[i]-1/2) - Xfrac[1][i] = modf(X[i]+0.5, &Iplus); // {X[i]+1/2}, floor(X[i]+1/2) + float Iminus, Iplus; + Xfrac[0][i] = 1-modf(X[i]-0.5f, &Iminus); // 1-{X[i]-1/2}, floor(X[i]-1/2) + Xfrac[1][i] = modf(X[i]+0.5f, &Iplus); // {X[i]+1/2}, floor(X[i]+1/2) Xint[0][i] = Iminus; Xint[1][i] = Iplus; } - for (int ijk = 0; ijk <= 7; ijk++) { float weight = 1; int64_t IJK[3] = {0,0,0}; for (int axis = 0; axis < 3; axis++) { // x-1/2 or x+1/2 - int pm = (ijk>>axis) & 1; + int pm = (ijk >> axis) & 1; IJK[axis] = Xint[pm][axis]; weight *= Xfrac[pm][axis]; } @@ -144,28 +141,36 @@ template<typename field_type> float resample2x2x2(const field_type *voxels, uint64_t voxel_index = I*Ny*Nz+J*Ny+K; assert(I>=0 && J>=0 && K>=0); assert(I<Nx && J<Ny && K<Nz); - field_type voxel = voxels[voxel_index]; + float voxel = (float) voxels[voxel_index]; value += voxel*weight; } + return value; } -template <typename voxel_type> void sample_plane(const input_ndarray<voxel_type> &voxels, - const real_t voxel_size, // In micrometers - const array<real_t,3> cm, - const array<real_t,3> u_axis, - const array<real_t,3> v_axis, - const array<real_t,4> bbox, // [umin,umax,vmin,vmax] in micrometers - output_ndarray<real_t> plane_samples) { +template <typename T> +void sample_plane(const input_ndarray<T> &voxels, + const real_t voxel_size, // In micrometers + const array<real_t, 3> cm, + const array<real_t, 3> u_axis, + const array<real_t, 3> v_axis, + const array<real_t, 4> bbox, // [umin,umax,vmin,vmax] in micrometers + output_ndarray<real_t> plane_samples) { const auto& [umin,umax,vmin,vmax] = bbox; // In micrometers - ssize_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; - ssize_t nu = plane_samples.shape[0], nv = plane_samples.shape[1]; - real_t du = (umax-umin)/nu, dv = (vmax-vmin)/nv; + UNPACK_NUMPY(voxels); + ssize_t + nu = plane_samples.shape[0], + nv = plane_samples.shape[1]; + real_t + du = (umax - umin) / nu, + dv = (vmax - vmin) / nv; - #pragma omp parallel for collapse(2) - for (ssize_t ui=0;ui<nu;ui++) { - for (ssize_t vj=0;vj<nv;vj++) { - const real_t u = umin + ui*du, v = vmin + vj*dv; + //#pragma omp parallel for collapse(2) + for (ssize_t ui = 0; ui < nu; ui++) { + for (ssize_t vj = 0; vj < nv; vj++) { + const real_t + u = umin + ui*du, + v = vmin + vj*dv; // X,Y,Z in micrometers; x,y,z in voxel index space const real_t @@ -173,13 +178,17 @@ template <typename voxel_type> void sample_plane(const input_ndarray<voxel_type> Y = cm[1] + u*u_axis[1] + v*v_axis[1], Z = cm[2] + u*u_axis[2] + v*v_axis[2]; - const real_t x = X/voxel_size, y = Y/voxel_size, z = Z/voxel_size; + const real_t + x = X / voxel_size, + y = Y / voxel_size, + z = Z / voxel_size; // printf("u,v = %g,%g -> %.1f,%.1f,%.1f -> %d, %d, %d\n",u,v,X,Y,Z,int(round(x)),int(round(y)),int(round(z))); - voxel_type value = 0; - if (in_bbox(x,y,z,{0.5,Nx-0.5, 0.5,Ny-0.5, 0.5,Nz-0.5})) - value = resample2x2x2<voxel_type>(voxels.data,{Nx,Ny,Nz},{x,y,z}); + T value = 0; + std::array<float, 6> bbox = {0.5f, voxels_Nx-0.5f, 0.5f, voxels_Ny-0.5f, 0.5f, voxels_Nz-0.5f}; + if (in_bbox(x,y,z, bbox)) + value = (T) floor(resample2x2x2<T>(voxels.data, {voxels_Nx, voxels_Ny, voxels_Nz}, {x, y, z})); // else // fprintf(stderr,"Sampling outside image: x,y,z = %.1f,%.1f,%.1f, Nx,Ny,Nz = %ld,%ld,%ld\n",x,y,z,Nx,Ny,Nz); @@ -188,6 +197,9 @@ template <typename voxel_type> void sample_plane(const input_ndarray<voxel_type> } } +} + +/* /* TODO only called in test.py. Postpone for now. // NB: xyz are in indices, not micrometers void zero_outside_bbox(const array<real_t,9> &principal_axes, diff --git a/src/lib/cpp/gpu/geometry.cc b/src/lib/cpp/gpu/geometry.cc index 2192c7f..dc6448d 100644 --- a/src/lib/cpp/gpu/geometry.cc +++ b/src/lib/cpp/gpu/geometry.cc @@ -21,6 +21,24 @@ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &mask, const array return cpu_seq::inertia_matrix(mask, cm); } +template <typename T> +float resample2x2x2(const T *voxels, + const array<ssize_t, 3> &shape, + const array<float, 3> &X) { + return cpu_seq::resample2x2x2(voxels, shape, X); +} + +template <typename T> +void sample_plane(const input_ndarray<T> &voxels, + const real_t voxel_size, // In micrometers + const array<real_t, 3> cm, + const array<real_t, 3> u_axis, + const array<real_t, 3> v_axis, + const array<real_t, 4> bbox, // [umin,umax,vmin,vmax] in micrometers + output_ndarray<real_t> plane_samples) { + return cpu_seq::sample_plane(voxels, voxel_size, cm, u_axis, v_axis, bbox, plane_samples); +} + /* TODO Only called in test.py. Postponed for now. void integrate_axes(const input_ndarray<mask_type> &voxels, const array<real_t,3> &x0, diff --git a/src/lib/cpp/include/geometry.hh b/src/lib/cpp/include/geometry.hh index 07cb1dd..237844b 100644 --- a/src/lib/cpp/include/geometry.hh +++ b/src/lib/cpp/include/geometry.hh @@ -36,6 +36,20 @@ Computes the inertia matrix of the given tomography based of the given center of */ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const array<real_t,3> &cm); +template <typename T> +float resample2x2x2(const T *voxels, + const array<ssize_t,3> &shape, + const array<float,3> &X); + +template <typename T> +void sample_plane(const input_ndarray<T> &voxels, + const real_t voxel_size, // In micrometers + const array<real_t, 3> cm, + const array<real_t, 3> u_axis, + const array<real_t, 3> v_axis, + const array<real_t, 4> bbox, // [umin,umax,vmin,vmax] in micrometers + output_ndarray<real_t> plane_samples); + } #endif \ No newline at end of file diff --git a/src/processing_steps/0700_implant_FoR.py b/src/processing_steps/0700_implant_FoR.py index 0ed5ae1..dd65424 100644 --- a/src/processing_steps/0700_implant_FoR.py +++ b/src/processing_steps/0700_implant_FoR.py @@ -2,7 +2,7 @@ sys.path.append(sys.path[0]+"/../") from config.constants import * from config.paths import hdf5_root, binary_root -from lib.cpp.cpu_seq.geometry import center_of_mass, inertia_matrix, integrate_axes, sample_plane +from lib.cpp.cpu_seq.geometry import center_of_mass, inertia_matrix, sample_plane from lib.cpp.gpu.morphology import erode_3d_sphere as erode_3d, dilate_3d_sphere as dilate_3d import matplotlib.pyplot as plt from matplotlib.colors import colorConverter diff --git a/src/pybind/geometry-pybind.cc b/src/pybind/geometry-pybind.cc index d52c83c..3e9be7c 100644 --- a/src/pybind/geometry-pybind.cc +++ b/src/pybind/geometry-pybind.cc @@ -27,24 +27,28 @@ array<real_t, 9> inertia_matrix(const np_maskarray &np_voxels, array<real_t, 3> return NS::inertia_matrix({voxels_info.ptr, voxels_info.shape}, cm); } -/* - -template <typename voxel_type> -void sample_plane(const np_array<voxel_type> &np_voxels, +template <typename T> +void sample_plane(const np_array<T> &np_voxels, const real_t voxel_size, // In micrometers const array<real_t,3> cm, const array<real_t,3> u_axis, const array<real_t,3> v_axis, const array<real_t,4> bbox, // [umin,umax,vmin,vmax] in micrometers - np_array<float> np_plane_samples) { + np_array<real_t> np_plane_samples) { auto voxels_info = np_voxels.request(); auto plane_samples_info = np_plane_samples.request(); - sample_plane<voxel_type>({voxels_info.ptr, voxels_info.shape}, voxel_size, + NS::sample_plane<T>({voxels_info.ptr, voxels_info.shape}, voxel_size, cm,u_axis,v_axis,bbox, {plane_samples_info.ptr, plane_samples_info.shape}); } +real_t resample2x2x2(const np_array<uint8_t> &np_voxels) { + auto voxels_info = np_voxels.request(); + return 0.0f; +} + +/* void integrate_axes(const np_maskarray &np_voxels, const array<real_t,3> &x0, const array<real_t,3> &v_axis, @@ -140,7 +144,7 @@ PYBIND11_MODULE(geometry, m) { //m.def("zero_outside_bbox", &python_api::zero_outside_bbox); //m.def("fill_implant_mask", &python_api::fill_implant_mask); //m.def("cylinder_projection", &python_api::cylinder_projection); - //m.def("sample_plane", &python_api::sample_plane<uint16_t>); - //m.def("sample_plane", &python_api::sample_plane<uint8_t>); + m.def("sample_plane", &python_api::sample_plane<uint16_t>); + m.def("sample_plane", &python_api::sample_plane<uint8_t>); //m.def("compute_front_mask", &python_api::compute_front_mask); } From 1405c48e4302cabbab27a3670e140828eda85933 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 3 Mar 2023 13:53:09 +0100 Subject: [PATCH 087/136] #25 Added functionality to the geometry test helper functions --- src/test/test_geometry.py | 52 +++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/src/test/test_geometry.py b/src/test/test_geometry.py index 6cc9f37..6871ce2 100644 --- a/src/test/test_geometry.py +++ b/src/test/test_geometry.py @@ -16,36 +16,52 @@ #n = 2344 # ~12 GB, used for testing whether blocked works. n = 128 -def assert_with_print(a, b): - all_close = np.allclose(a, b) +def assert_with_print(a, b, tolerance=1e-7, names=None): + na, nb = np.array(a), np.array(b) + nabs = np.abs(na - nb) + all_close = np.alltrue(nabs < tolerance) if not all_close: - na, nb = np.array(a), np.array(b) - print (na) - print (nb) - nabs = np.abs(na - nb) - print (nabs) - print (np.sum(nabs)) + print ('a', na) + print ('b', nb) + print ('absolute error (AE) (abs(a-b))', nabs) + print ('AE sum', np.sum(nabs)) + diffs = np.argwhere(nabs > tolerance) + print (f'differing on {diffs.shape} elements') + for i in diffs[:5]: # Only print 5 first + print ('differing index (i), a[i], b[i] =', i, a[i[0], i[1]], b[i[0], i[1]]) + if not names is None: + print (names) assert all_close -def run_with_warmup(f): - f() +def run_with_warmup(f, allocate_result=None): + ''' + Runs the given function and returns the result and how long time it took to run. + + @param allocate_result Defines whether the memory for the result should be allocated before running. If it should, it should be a tuple of the shape and the dtype of the array. None otherwise. + ''' + alloc = lambda x: np.zeros(x[0], x[1]) + f() if allocate_result is None else f(alloc(allocate_result)) + result = alloc(allocate_result) start = datetime.datetime.now() - result = f() + if allocate_result is None: + result = f() + else: + f(result) end = datetime.datetime.now() return result, end - start -def compare_fs(func, baseline_f, cpu_f, gpu_f, should_assert=True): - baseline, baseline_t = run_with_warmup(baseline_f) +def compare_fs(func, baseline_f, cpu_f, gpu_f, should_assert=True, tolerance=1e-7, + allocate_result: tuple[tuple[int],np.dtype]=None): + baseline, baseline_t = run_with_warmup(baseline_f, allocate_result) print (f'({func}) Sequential ran in {baseline_t}') - cpu, cpu_t = run_with_warmup(cpu_f) + cpu, cpu_t = run_with_warmup(cpu_f, allocate_result) print (f'({func}) Parallel CPU ran in {cpu_t}, which is {baseline_t / cpu_t} times faster than sequential') - if should_assert: assert_with_print(baseline, cpu) + if should_assert: assert_with_print(baseline, cpu, tolerance, 'cpu_seq vs cpu') - gpu, gpu_t = run_with_warmup(gpu_f) + gpu, gpu_t = run_with_warmup(gpu_f, allocate_result) print (f'({func}) GPU ran in {gpu_t}, which is {baseline_t / gpu_t} times faster than sequential') - if should_assert: assert_with_print(baseline, gpu) - + if should_assert: assert_with_print(baseline, gpu, tolerance, 'cpu_seq vs gpu') def test_center_of_mass(): voxels = np.random.randint(0, 256, (n,n,n), np.uint8) From 1cedce1459552845c50360f0189e59a73d59bc70 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 3 Mar 2023 14:06:58 +0100 Subject: [PATCH 088/136] #25 Added launch configuration for geometry test --- .vscode/launch.json | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.vscode/launch.json b/.vscode/launch.json index aae8ffc..b48c6cc 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -4,6 +4,15 @@ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", "configurations": [ + { + "name": "Python: Test geometry", + "type": "python", + "request": "launch", + "program": "${workspaceFolder}/src/test/test_geometry.py", + "console": "integratedTerminal", + "args": [], + "justMyCode": false + }, { "name": "Python: 0400_h5tobin", "type": "python", From 3fb1ae6ff92e61b91cdb97f8d1cae8b41ff73ff3 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 3 Mar 2023 14:07:29 +0100 Subject: [PATCH 089/136] #25 The geometry tests wasn't as flexible as first assumed. --- src/test/test_geometry.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/test/test_geometry.py b/src/test/test_geometry.py index 6871ce2..a3e6530 100644 --- a/src/test/test_geometry.py +++ b/src/test/test_geometry.py @@ -41,11 +41,12 @@ def run_with_warmup(f, allocate_result=None): ''' alloc = lambda x: np.zeros(x[0], x[1]) f() if allocate_result is None else f(alloc(allocate_result)) - result = alloc(allocate_result) - start = datetime.datetime.now() if allocate_result is None: + start = datetime.datetime.now() result = f() else: + result = alloc(allocate_result) + start = datetime.datetime.now() f(result) end = datetime.datetime.now() return result, end - start @@ -66,20 +67,21 @@ def compare_fs(func, baseline_f, cpu_f, gpu_f, should_assert=True, tolerance=1e- def test_center_of_mass(): voxels = np.random.randint(0, 256, (n,n,n), np.uint8) - baseline = partial(m_cpu_seq.center_of_mass, voxels) - cpu = partial(m_cpu.center_of_mass, voxels) - gpu = partial(m_gpu.center_of_mass, voxels) - - compare_fs('center_of_mass', baseline, cpu, gpu) + baseline, cpu, gpu = [ + partial(impl.center_of_mass, voxels) + for impl in [m_cpu_seq, m_cpu, m_gpu] + ] + compare_fs('center_of_mass', baseline, cpu, gpu, tolerance=1e-5) def test_inertia_matrix(): voxels = np.random.randint(0, 2, (n,n,n), np.uint8) cm = m_gpu.center_of_mass(voxels) - baseline = partial(m_cpu_seq.inertia_matrix, voxels, cm) - cpu = partial(m_cpu.inertia_matrix, voxels, cm) - gpu = partial(m_gpu.inertia_matrix, voxels, cm) + baseline, cpu, gpu = [ + partial(impl.inertia_matrix, voxels, cm) + for impl in [m_cpu_seq, m_cpu, m_gpu] + ] # TODO assert disabled due to floating point associativity error accumulation compare_fs('inertia_matrix', baseline, cpu, gpu, should_assert=False) From 530cde9476e37bb136704ce4f2cf5eb1f783191b Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 3 Mar 2023 14:07:53 +0100 Subject: [PATCH 090/136] #25 Added test for geometry::sample_plane --- src/test/test_geometry.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/test/test_geometry.py b/src/test/test_geometry.py index a3e6530..3d0a3ae 100644 --- a/src/test/test_geometry.py +++ b/src/test/test_geometry.py @@ -86,6 +86,29 @@ def test_inertia_matrix(): # TODO assert disabled due to floating point associativity error accumulation compare_fs('inertia_matrix', baseline, cpu, gpu, should_assert=False) +@pytest.mark.parametrize("dtype", [np.uint8, np.uint16]) +def test_sample_plane(dtype): + # TODO something that isn't just random data? + n = 128 + voxels = np.random.randint(0, np.iinfo(dtype).max, (n,n,n), dtype) + voxel_size = 42 + cm = m_cpu.center_of_mass(voxels) + im = np.array(m_cpu.inertia_matrix(voxels, cm)).reshape((3,3)) + ls,E = np.linalg.eigh(im) + E[:,0] *= -1 + ix = np.argsort(np.abs(ls)) + ls, E = ls[ix], E[:,ix] + UVW = E.T + _, v_vec, w_vec = UVW + cpu_seq, cpu, gpu = [ + partial(impl.sample_plane, voxels, voxel_size, cm, v_vec, w_vec, [0, 1024, 0, 1024]) + for impl in [m_cpu_seq, m_cpu, m_gpu] + ] + + # TODO the function is unstable, even when they're all calling the sequential implementation, t least when comparing gcc against nvcc, but it differs at most with 1. Hence the higher tolerance for this test. Can be tested with something like for i in range(10000): + compare_fs('sample_plane', cpu_seq, cpu, gpu, True, 1.1, ((800,800), np.float32)) + if __name__ == '__main__': test_center_of_mass() - test_inertia_matrix() \ No newline at end of file + test_inertia_matrix() + test_sample_plane(np.uint8) \ No newline at end of file From 701614a7a4d3ddb30d938539782e2ca520d19027 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 3 Mar 2023 15:05:05 +0100 Subject: [PATCH 091/136] #25 Added additional C++ warnings. Removed the ones from python / pybind --- src/Makefile | 10 ++++++---- src/pybind/geometry-pybind.cc | 6 +++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/Makefile b/src/Makefile index 7597ea2..71ba57c 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,6 +1,7 @@ # Define constants and collections PYTHON = python3.10 -PYBIND_FLAGS += $(shell $(PYTHON) -m pybind11 --include) -march=native -Wall -shared -fPIC -g -std=c++17 -O3 +PYBIND_FLAGS = $(shell $(PYTHON) -m pybind11 --include) +CXXFLAGS += $(subst -I,-isystem ,$(PYBIND_FLAGS)) # We don't care about warnings from the python headers PYBIND_SUFFIX = $(shell $(PYTHON)-config --extension-suffix) # Detect OS for OS specific changes @@ -12,7 +13,7 @@ endif CPP_FOLDER=lib/cpp #CXXFLAGS += -I../contrib/cpptqdm/ -Iinclude -CXXFLAGS += -I$(CPP_FOLDER)/include +CXXFLAGS += -I$(CPP_FOLDER)/include -march=native -Wall -Wextra -Wfloat-equal -Wundef -Wshadow -shared -fPIC -g -std=c++17 -O3 PLATFORMS=cpu_seq cpu cpu_seq_CXX=$(CXX) cpu_CXX=$(cpu_seq_CXX) @@ -26,7 +27,8 @@ CLEANUP = $(TARGETS) $(foreach PLATFORM, $(PLATFORMS), $(CPP_FOLDER)/$(PLATFORM) ifneq (, $(shell which nvc++)) PLATFORMS += gpu gpu_CXX = nvc++ -gpu_FLAGS = -acc=gpu -Minfo=accel -tp=native +gpu_FLAGS = -acc=gpu -tp=native -Xcudafe --display_error_number #-Minfo=accel +gpu_FLAGS += --diag_suppress 1626 # Remove the annoying pybind warning that routine is both inline and noinline else $(info OpenACC compiler nvc++ not found. Compiling without.) endif @@ -35,7 +37,7 @@ all: $(TARGETS) define GEN_RULE $(CPP_FOLDER)/$(PLATFORM)/$(LIB)$(PYBIND_SUFFIX): pybind/$(LIB)-pybind.cc $(CPP_FOLDER)/$(PLATFORM)/$(LIB).cc $(CPP_FOLDER)/include/*.hh - $($(PLATFORM)_CXX) $($(PLATFORM)_FLAGS) $(CXXFLAGS) $(PYBIND_FLAGS) -I$(CPP_FOLDER)/$(PLATFORM) $$< -o $(CPP_FOLDER)/$(PLATFORM)/$(LIB)$(PYBIND_SUFFIX) + $($(PLATFORM)_CXX) $($(PLATFORM)_FLAGS) $(CXXFLAGS) -I$(CPP_FOLDER)/$(PLATFORM) $$< -o $(CPP_FOLDER)/$(PLATFORM)/$(LIB)$(PYBIND_SUFFIX) endef $(foreach PLATFORM, $(PLATFORMS), \ diff --git a/src/pybind/geometry-pybind.cc b/src/pybind/geometry-pybind.cc index 3e9be7c..ade209f 100644 --- a/src/pybind/geometry-pybind.cc +++ b/src/pybind/geometry-pybind.cc @@ -1,12 +1,12 @@ #ifdef _OPENACC -#warning "Using GPU" +//#warning "Using GPU" #define NS gpu #else #ifdef _OPENMP -#warning "Using OpenMP" +//#warning "Using OpenMP" #define NS cpu_par #else -#warning "Using sequential" +//#warning "Using sequential" #define NS cpu_seq #endif #endif From e6bb55c03c7d5686ccd59d2360cf5a7e820a5b31 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 3 Mar 2023 16:07:13 +0100 Subject: [PATCH 092/136] #25 Added more warning flags --- src/Makefile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Makefile b/src/Makefile index 71ba57c..2037c3c 100644 --- a/src/Makefile +++ b/src/Makefile @@ -13,11 +13,12 @@ endif CPP_FOLDER=lib/cpp #CXXFLAGS += -I../contrib/cpptqdm/ -Iinclude -CXXFLAGS += -I$(CPP_FOLDER)/include -march=native -Wall -Wextra -Wfloat-equal -Wundef -Wshadow -shared -fPIC -g -std=c++17 -O3 +CXXFLAGS += -I$(CPP_FOLDER)/include -march=native -Wall -Wextra -Wfloat-equal -Wundef -Wshadow -Wuninitialized -Winit-self -shared -fPIC -g -std=c++17 -O3 PLATFORMS=cpu_seq cpu cpu_seq_CXX=$(CXX) +cpu_seq_FLAGS=-Wno-unknown-pragmas -Wno-comment -Wconversion -Weffc++ cpu_CXX=$(cpu_seq_CXX) -cpu_FLAGS=-fopenmp +cpu_FLAGS=$(cpu_seq_FLAGS) -fopenmp LIBS=io geometry morphology TARGETS = $(foreach PLATFORM, $(PLATFORMS), $(foreach LIB, $(LIBS), $(CPP_FOLDER)/$(PLATFORM)/$(LIB)$(PYBIND_SUFFIX))) @@ -29,6 +30,8 @@ PLATFORMS += gpu gpu_CXX = nvc++ gpu_FLAGS = -acc=gpu -tp=native -Xcudafe --display_error_number #-Minfo=accel gpu_FLAGS += --diag_suppress 1626 # Remove the annoying pybind warning that routine is both inline and noinline +gpu_FLAGS += --diag_suppress 9 # Remove the warning about nested comments +gpu_FLAGS += -Wnvlink,-w # Disable nvlink warnings else $(info OpenACC compiler nvc++ not found. Compiling without.) endif From 3c81ebb497fd62e318cbcbc354fecc63a1f9ef95 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 3 Mar 2023 16:07:41 +0100 Subject: [PATCH 093/136] #25 Handled shadow warning for ndarray struct --- src/lib/cpp/include/datatypes.hh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/lib/cpp/include/datatypes.hh b/src/lib/cpp/include/datatypes.hh index cbe1213..2171b86 100644 --- a/src/lib/cpp/include/datatypes.hh +++ b/src/lib/cpp/include/datatypes.hh @@ -19,8 +19,8 @@ template <typename voxel_type> using np_array = py::array_t<voxel_type, py::array::c_style | py::array::forcecast>; typedef py::array_t<mask_type, py::array::c_style | py::array::forcecast> np_maskarray; -typedef py::array_t<real_t, py::array::c_style | py::array::forcecast> np_realarray; -typedef py::array_t<uint8_t, py::array::c_style | py::array::forcecast> np_bytearray; +typedef py::array_t<real_t, py::array::c_style | py::array::forcecast> np_realarray; +typedef py::array_t<uint8_t, py::array::c_style | py::array::forcecast> np_bytearray; template <typename T> constexpr ssize_t acc_block_size = 1024 * 1024 * 1024 / sizeof(T); // 1 GB @@ -33,16 +33,16 @@ template <typename T> struct input_ndarray { const T *data; const vector<ssize_t> shape; - input_ndarray(const T *data, const vector<ssize_t> &shape): data(data), shape(shape) {} - input_ndarray(const void *data, const vector<ssize_t> &shape): data(static_cast<const T*>(data)), shape(shape) {} + input_ndarray(const T *arg_data, const vector<ssize_t> &arg_shape): data(arg_data), shape(arg_shape) {} + input_ndarray(const void *arg_data, const vector<ssize_t> &arg_shape): data(static_cast<const T*>(arg_data)), shape(arg_shape) {} }; template <typename T> struct output_ndarray { T *data; const vector<ssize_t> shape; - output_ndarray(T *data, const vector<ssize_t> &shape): data(data), shape(shape) {} - output_ndarray(void *data, const vector<ssize_t> &shape): data(static_cast<T*>(data)), shape(shape) {} + output_ndarray(T *arg_data, const vector<ssize_t> &arg_shape): data(arg_data), shape(arg_shape) {} + output_ndarray(void *arg_data, const vector<ssize_t> &arg_shape): data(static_cast<T*>(arg_data)), shape(arg_shape) {} }; typedef std::array<real_t,16> matrix4x4; From 386153935352658fcad02695c24d0c61090f6d1f Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 3 Mar 2023 16:08:01 +0100 Subject: [PATCH 094/136] #25 Handled unused variable warning for macro generated code --- src/lib/cpp/include/boilerplate.hh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/lib/cpp/include/boilerplate.hh b/src/lib/cpp/include/boilerplate.hh index 0b5b1bc..54852fb 100644 --- a/src/lib/cpp/include/boilerplate.hh +++ b/src/lib/cpp/include/boilerplate.hh @@ -54,9 +54,9 @@ #define PUSH_N_DOWN_TO_BUFFER(ARR) \ ssize_t \ - ARR##_buffer_Nz = ARR##_Nz, \ - ARR##_buffer_Ny = ARR##_Ny, \ - ARR##_buffer_Nx = ARR##_Nx; + __attribute__((unused)) ARR##_buffer_Nz = ARR##_Nz, \ + __attribute__((unused)) ARR##_buffer_Ny = ARR##_Ny, \ + __attribute__((unused)) ARR##_buffer_Nx = ARR##_Nx; #ifdef _OPENACC #define BLOCK_BEGIN(ARR, EXTRA_PRAGMA_CLAUSE) \ @@ -90,9 +90,9 @@ #define UNPACK_NUMPY(ARR) \ ssize_t \ - ARR##_Nz = ARR.shape[0], \ - ARR##_Ny = ARR.shape[1], \ - ARR##_Nx = ARR.shape[2], \ - ARR##_length = ARR##_Nz*ARR##_Ny*ARR##_Nx; + __attribute__((unused)) ARR##_Nz = ARR.shape[0], \ + __attribute__((unused)) ARR##_Ny = ARR.shape[1], \ + __attribute__((unused)) ARR##_Nx = ARR.shape[2], \ + __attribute__((unused)) ARR##_length = ARR##_Nz*ARR##_Ny*ARR##_Nx; #endif \ No newline at end of file From 0ccf64713755ac2aff5e9f09f62171f16d2b0816 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 3 Mar 2023 16:08:16 +0100 Subject: [PATCH 095/136] #25 Handled shadow warning --- src/lib/cpp/cpu_seq/geometry.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index c79ca2d..78858cb 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -186,8 +186,8 @@ void sample_plane(const input_ndarray<T> &voxels, // printf("u,v = %g,%g -> %.1f,%.1f,%.1f -> %d, %d, %d\n",u,v,X,Y,Z,int(round(x)),int(round(y)),int(round(z))); T value = 0; - std::array<float, 6> bbox = {0.5f, voxels_Nx-0.5f, 0.5f, voxels_Ny-0.5f, 0.5f, voxels_Nz-0.5f}; - if (in_bbox(x,y,z, bbox)) + std::array<float, 6> local_bbox = {0.5f, voxels_Nx-0.5f, 0.5f, voxels_Ny-0.5f, 0.5f, voxels_Nz-0.5f}; + if (in_bbox(x,y,z, local_bbox)) value = (T) floor(resample2x2x2<T>(voxels.data, {voxels_Nx, voxels_Ny, voxels_Nz}, {x, y, z})); // else // fprintf(stderr,"Sampling outside image: x,y,z = %.1f,%.1f,%.1f, Nx,Ny,Nz = %ld,%ld,%ld\n",x,y,z,Nx,Ny,Nz); From c159aa9b3d4ef719c9d191987b864e496f58af5e Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 3 Mar 2023 16:37:27 +0100 Subject: [PATCH 096/136] #25 Added explicit type conversions --- src/lib/cpp/cpu_seq/geometry.cc | 30 +++++++++++++++--------------- src/pybind/morphology-pybind.cc | 2 +- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index 78858cb..5f9d560 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -30,9 +30,9 @@ array<real_t, 3> center_of_mass(const input_ndarray<mask_type> &mask) { } BLOCK_END(); real_t - rcmz = cmz / ((real_t) total_mass), - rcmy = cmy / ((real_t) total_mass), - rcmx = cmx / ((real_t) total_mass); + rcmz = real_t(cmz) / real_t(total_mass), + rcmy = real_t(cmy) / real_t(total_mass), + rcmx = real_t(cmx) / real_t(total_mass); print_timestamp("center_of_mass end"); @@ -73,9 +73,9 @@ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &mask, const array // m guards this, and then branches are removed //if (m != 0) real_t - X = x - cm[0], - Y = y - cm[1], - Z = z - cm[2]; + X = real_t(x) - cm[0], + Y = real_t(y) - cm[1], + Z = real_t(z) - cm[2]; Ixx += m * (Y*Y + Z*Z); Iyy += m * (X*X + Z*Z); @@ -101,8 +101,8 @@ float resample2x2x2(const T *voxels, const array<float, 3> &X) { auto [Nz,Ny,Nx] = shape; - if (!in_bbox(X[0], X[1], X[2], {0.5f, Nx-0.5f, 0.5f, Ny-0.5f, 0.5f, Nz-0.5f})) { - uint64_t voxel_index = floor(X[0])*Ny*Nz + floor(X[1])*Ny + floor(X[2]); + if (!in_bbox(X[0], X[1], X[2], {0.5f, float(Nx)-0.5f, 0.5f, float(Ny)-0.5f, 0.5f, float(Nz)-0.5f})) { + uint64_t voxel_index = uint64_t(floor(X[0]))*Ny*Nz + uint64_t(floor(X[1]))*Ny + uint64_t(floor(X[2])); return voxels[voxel_index]; } @@ -115,8 +115,8 @@ float resample2x2x2(const T *voxels, Xfrac[0][i] = 1-modf(X[i]-0.5f, &Iminus); // 1-{X[i]-1/2}, floor(X[i]-1/2) Xfrac[1][i] = modf(X[i]+0.5f, &Iplus); // {X[i]+1/2}, floor(X[i]+1/2) - Xint[0][i] = Iminus; - Xint[1][i] = Iplus; + Xint[0][i] = (int64_t) Iminus; + Xint[1][i] = (int64_t) Iplus; } for (int ijk = 0; ijk <= 7; ijk++) { @@ -162,15 +162,15 @@ void sample_plane(const input_ndarray<T> &voxels, nu = plane_samples.shape[0], nv = plane_samples.shape[1]; real_t - du = (umax - umin) / nu, - dv = (vmax - vmin) / nv; + du = (umax - umin) / real_t(nu), + dv = (vmax - vmin) / real_t(nv); //#pragma omp parallel for collapse(2) for (ssize_t ui = 0; ui < nu; ui++) { for (ssize_t vj = 0; vj < nv; vj++) { const real_t - u = umin + ui*du, - v = vmin + vj*dv; + u = umin + real_t(ui)*du, + v = vmin + real_t(vj)*dv; // X,Y,Z in micrometers; x,y,z in voxel index space const real_t @@ -186,7 +186,7 @@ void sample_plane(const input_ndarray<T> &voxels, // printf("u,v = %g,%g -> %.1f,%.1f,%.1f -> %d, %d, %d\n",u,v,X,Y,Z,int(round(x)),int(round(y)),int(round(z))); T value = 0; - std::array<float, 6> local_bbox = {0.5f, voxels_Nx-0.5f, 0.5f, voxels_Ny-0.5f, 0.5f, voxels_Nz-0.5f}; + std::array<float, 6> local_bbox = {0.5f, float(voxels_Nx)-0.5f, 0.5f, float(voxels_Ny)-0.5f, 0.5f, float(voxels_Nz)-0.5f}; if (in_bbox(x,y,z, local_bbox)) value = (T) floor(resample2x2x2<T>(voxels.data, {voxels_Nx, voxels_Ny, voxels_Nz}, {x, y, z})); // else diff --git a/src/pybind/morphology-pybind.cc b/src/pybind/morphology-pybind.cc index f9c7891..b8547e7 100644 --- a/src/pybind/morphology-pybind.cc +++ b/src/pybind/morphology-pybind.cc @@ -16,7 +16,7 @@ void morphology_3d_sphere_wrapper( voxels_info = np_voxels.request(), result_info = np_result.request(); - int32_t Nz = voxels_info.shape[0], Ny = voxels_info.shape[1], Nx = voxels_info.shape[2]; + int64_t Nz = voxels_info.shape[0], Ny = voxels_info.shape[1], Nx = voxels_info.shape[2]; int64_t N[3] = {Nz, Ny, Nx}; int64_t strides[3] = {Ny*Nx, Nx, 1}; From 1daccb7fa1caa20a61528d8c08add0925ea7287f Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 3 Mar 2023 16:38:12 +0100 Subject: [PATCH 097/136] #25 Made the non-seq io functions call the seq one --- src/Makefile | 2 +- src/lib/cpp/cpu/io.cc | 8 ++++++-- src/lib/cpp/cpu_seq/io.cc | 2 ++ src/lib/cpp/gpu/io.cc | 8 ++++++-- src/lib/cpp/include/datatypes.hh | 14 +++++++++++++ src/lib/cpp/include/io.hh | 10 ++++++++-- src/pybind/io-pybind.cc | 34 ++++++++++++++++++-------------- 7 files changed, 56 insertions(+), 22 deletions(-) diff --git a/src/Makefile b/src/Makefile index 2037c3c..463d674 100644 --- a/src/Makefile +++ b/src/Makefile @@ -16,7 +16,7 @@ CPP_FOLDER=lib/cpp CXXFLAGS += -I$(CPP_FOLDER)/include -march=native -Wall -Wextra -Wfloat-equal -Wundef -Wshadow -Wuninitialized -Winit-self -shared -fPIC -g -std=c++17 -O3 PLATFORMS=cpu_seq cpu cpu_seq_CXX=$(CXX) -cpu_seq_FLAGS=-Wno-unknown-pragmas -Wno-comment -Wconversion -Weffc++ +cpu_seq_FLAGS=-Wno-unknown-pragmas -Wno-comment -Wconversion #-Weffc++ cpu_CXX=$(cpu_seq_CXX) cpu_FLAGS=$(cpu_seq_FLAGS) -fopenmp diff --git a/src/lib/cpp/cpu/io.cc b/src/lib/cpp/cpu/io.cc index 41b56ec..62be52c 100644 --- a/src/lib/cpp/cpu/io.cc +++ b/src/lib/cpp/cpu/io.cc @@ -2,15 +2,17 @@ #include <fstream> #include "io.hh" +#include "../cpu_seq/io.cc" using namespace std; +namespace cpu { template <typename T> void load_contiguous_slice(T *data, const string filename, const uint64_t offset, const uint64_t size) { - throw runtime_error(string("Library doesn't have a parallel cpu implementation of ") + __FUNCTION__); + cpu_seq::load_contiguous_slice(data, filename, offset, size); } template <typename T> @@ -18,5 +20,7 @@ void write_contiguous_slice(const T *data, const string filename, const uint64_t offset, const uint64_t size) { - throw runtime_error(string("Library doesn't have a parallel cpu implementation of ") + __FUNCTION__); + cpu_seq::write_contiguous_slice(data, filename, offset, size); +} + } diff --git a/src/lib/cpp/cpu_seq/io.cc b/src/lib/cpp/cpu_seq/io.cc index 2d30477..bf771f2 100644 --- a/src/lib/cpp/cpu_seq/io.cc +++ b/src/lib/cpp/cpu_seq/io.cc @@ -4,6 +4,7 @@ #include "io.hh" using namespace std; +namespace cpu_seq { template <typename T> void load_contiguous_slice(T *data, @@ -39,3 +40,4 @@ void write_contiguous_slice(const T *data, } // TODO non-contiguous +} diff --git a/src/lib/cpp/gpu/io.cc b/src/lib/cpp/gpu/io.cc index 4eb196a..992209a 100644 --- a/src/lib/cpp/gpu/io.cc +++ b/src/lib/cpp/gpu/io.cc @@ -2,15 +2,17 @@ #include <fstream> #include "io.hh" +#include "../cpu_seq/io.cc" using namespace std; +namespace gpu { template <typename T> void load_contiguous_slice(T *data, const string filename, const uint64_t offset, const uint64_t size) { - throw runtime_error(string("Library doesn't have a gpu implementation of ") + __FUNCTION__); + cpu_seq::load_contiguous_slice(data, filename, offset, size); } template <typename T> @@ -18,5 +20,7 @@ void write_contiguous_slice(const T *data, const string filename, const uint64_t offset, const uint64_t size) { - throw runtime_error(string("Library doesn't have a gpu implementation of ") + __FUNCTION__); + cpu_seq::write_contiguous_slice(data, filename, offset, size); } + +} \ No newline at end of file diff --git a/src/lib/cpp/include/datatypes.hh b/src/lib/cpp/include/datatypes.hh index 2171b86..b91fc78 100644 --- a/src/lib/cpp/include/datatypes.hh +++ b/src/lib/cpp/include/datatypes.hh @@ -1,5 +1,19 @@ #ifndef datatypes_h #define datatypes_h + +#ifdef _OPENACC +//#warning "Using GPU" +#define NS gpu +#else +#ifdef _OPENMP +//#warning "Using OpenMP" +#define NS cpu_par +#else +//#warning "Using sequential" +#define NS cpu_seq +#endif +#endif + #include <array> #include <vector> #include <pybind11/pybind11.h> diff --git a/src/lib/cpp/include/io.hh b/src/lib/cpp/include/io.hh index a28da76..5720a20 100644 --- a/src/lib/cpp/include/io.hh +++ b/src/lib/cpp/include/io.hh @@ -1,9 +1,15 @@ #ifndef io_h #define io_h +#include "datatypes.hh" + +namespace NS { + template <typename T> -void load_contiguous_slice(T *data, const string filename, const uint64_t offset, const uint64_t size); +void load_contiguous_slice(const T *data, const string filename, const uint64_t offset, const uint64_t size); template <typename T> -void write_contiguous_slice(T *np_data, const string filename, const uint64_t offset, const uint64_t size); +void write_contiguous_slice(const T *np_data, const string filename, const uint64_t offset, const uint64_t size); + +} #endif \ No newline at end of file diff --git a/src/pybind/io-pybind.cc b/src/pybind/io-pybind.cc index 496b990..e15e1fe 100644 --- a/src/pybind/io-pybind.cc +++ b/src/pybind/io-pybind.cc @@ -7,6 +7,8 @@ namespace py = pybind11; #include "datatypes.hh" #include "io.cc" +namespace python_api { + template <typename T> void load_slice(py::array_t<T> &np_data, const string filename, const tuple<uint64_t, uint64_t, uint64_t> offset, @@ -16,7 +18,7 @@ void load_slice(py::array_t<T> &np_data, const string filename, auto [Nz, Ny, Nx] = shape; auto [oz, oy, ox] = offset; uint64_t flat_offset = oz*Ny*Nx + oy*Nx + ox; - load_contiguous_slice<T>(data, filename, flat_offset, data_info.size); + NS::load_contiguous_slice<T>(data, filename, flat_offset, data_info.size); } template <typename T> @@ -29,22 +31,24 @@ void write_slice(const py::array_t<T> &np_data, auto [Nz, Ny, Nx] = shape; auto [oz, oy, ox] = offset; uint64_t flat_offset = oz*Ny*Nx + oy*Nx + ox; - write_contiguous_slice<T>(data, filename, flat_offset, data_info.size); + NS::write_contiguous_slice<T>(data, filename, flat_offset, data_info.size); +} + } PYBIND11_MODULE(io, m) { m.doc() = "I/O functions for handling flat binary format files."; // optional module docstring - m.def("load_slice", &load_slice<uint8_t>); - m.def("load_slice", &load_slice<uint16_t>); - m.def("load_slice", &load_slice<uint32_t>); - m.def("load_slice", &load_slice<uint64_t>); - m.def("load_slice", &load_slice<float>); - m.def("load_slice", &load_slice<double>); - - m.def("write_slice", &write_slice<uint8_t>); - m.def("write_slice", &write_slice<uint16_t>); - m.def("write_slice", &write_slice<uint32_t>); - m.def("write_slice", &write_slice<uint64_t>); - m.def("write_slice", &write_slice<float>); - m.def("write_slice", &write_slice<double>); + m.def("load_slice", &python_api::load_slice<uint8_t>); + m.def("load_slice", &python_api::load_slice<uint16_t>); + m.def("load_slice", &python_api::load_slice<uint32_t>); + m.def("load_slice", &python_api::load_slice<uint64_t>); + m.def("load_slice", &python_api::load_slice<float>); + m.def("load_slice", &python_api::load_slice<double>); + + m.def("write_slice", &python_api::write_slice<uint8_t>); + m.def("write_slice", &python_api::write_slice<uint16_t>); + m.def("write_slice", &python_api::write_slice<uint32_t>); + m.def("write_slice", &python_api::write_slice<uint64_t>); + m.def("write_slice", &python_api::write_slice<float>); + m.def("write_slice", &python_api::write_slice<double>); } \ No newline at end of file From 7631ce944fea99b7de4669fae37c770f9f27eae5 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 6 Mar 2023 08:35:32 +0100 Subject: [PATCH 098/136] #25 Incorrect namespace in io --- src/lib/cpp/cpu/io.cc | 2 +- src/pybind/io-pybind.cc | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lib/cpp/cpu/io.cc b/src/lib/cpp/cpu/io.cc index 62be52c..098cde0 100644 --- a/src/lib/cpp/cpu/io.cc +++ b/src/lib/cpp/cpu/io.cc @@ -5,7 +5,7 @@ #include "../cpu_seq/io.cc" using namespace std; -namespace cpu { +namespace cpu_par { template <typename T> void load_contiguous_slice(T *data, diff --git a/src/pybind/io-pybind.cc b/src/pybind/io-pybind.cc index e15e1fe..060d9d9 100644 --- a/src/pybind/io-pybind.cc +++ b/src/pybind/io-pybind.cc @@ -4,7 +4,6 @@ using namespace std; namespace py = pybind11; -#include "datatypes.hh" #include "io.cc" namespace python_api { From ec85d405f95e159bff72d855cd05db770f069c75 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 6 Mar 2023 08:35:54 +0100 Subject: [PATCH 099/136] #25 Added test for the other io implementations --- src/test/test_io.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/test_io.py b/src/test/test_io.py index 199257d..09ad43a 100644 --- a/src/test/test_io.py +++ b/src/test/test_io.py @@ -19,7 +19,7 @@ dim_size = 128 dim_shape = (dim_size, dim_size, dim_size) partial_factor = 4 -impls = [io_cpu_seq] #, io_cpu, io_gpu] +impls = [io_cpu_seq, io_cpu, io_gpu] def random(shape, dtype): rnds = np.random.random(shape) * 100 @@ -63,7 +63,7 @@ def test_dtype(impl, dtype): for i in range(partial_factor+1): impl.load_slice(read_data, individual_tmp_file, (i*partial,0,0), read_data.shape) assert np.allclose(data[i*partial:(i+1)*partial], read_data) - + # Write past where the file ends impl.write_slice(data, individual_tmp_file, (data.shape[0]*2,0,0), data.shape) assert os.path.getsize(individual_tmp_file) == 3 * data.nbytes From 127428b574b182596a83ed59146143decd6dc48e Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 6 Mar 2023 08:44:43 +0100 Subject: [PATCH 100/136] #25 GPU debugging. Device code cannot have assert --- src/lib/cpp/cpu_seq/geometry.cc | 27 ++++----------------------- src/pybind/geometry-pybind.cc | 13 ------------- src/test/test_geometry.py | 8 ++++---- 3 files changed, 8 insertions(+), 40 deletions(-) diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index 5f9d560..cf5f8cb 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -139,8 +139,8 @@ float resample2x2x2(const T *voxels, // abort(); // } uint64_t voxel_index = I*Ny*Nz+J*Ny+K; - assert(I>=0 && J>=0 && K>=0); - assert(I<Nx && J<Ny && K<Nz); + //assert(I>=0 && J>=0 && K>=0); + //assert(I<Nx && J<Ny && K<Nz); float voxel = (float) voxels[voxel_index]; value += voxel*weight; } @@ -200,7 +200,7 @@ void sample_plane(const input_ndarray<T> &voxels, } /* -/* TODO only called in test.py. Postpone for now. +// TODO only called in test.py. Postpone for now. // NB: xyz are in indices, not micrometers void zero_outside_bbox(const array<real_t,9> &principal_axes, const array<real_t,6> ¶meter_ranges, @@ -257,25 +257,6 @@ inline vector4 hom_transform(const vector4 &x, const matrix4x4 &M) { return c; } -#define loop_mask_start(mask_in,mask_out,COPY) { \ - ssize_t Mx = mask_in.shape[0], My = mask_in.shape[1], Mz = mask_in.shape[2]; \ - ssize_t mask_length = Mx*My*Mz; \ - \ - for (ssize_t block_start = 0; block_start < mask_length; block_start += acc_block_size) { \ - const mask_type *maskin_buffer = mask_in.data + block_start; \ - mask_type *maskout_buffer = mask_out.data + block_start; \ - ssize_t this_block_length = min(acc_block_size, mask_length-block_start); \ - \ - _Pragma(STR(acc parallel loop copy(maskin_buffer[:this_block_length], maskout_buffer[:this_block_length]) copy COPY)) \ - for (int64_t k = 0; k < this_block_length; k++) { \ - int64_t flat_idx = block_start + k; \ - int64_t X = (flat_idx / (My*Mz)), Y = (flat_idx / Mz) % My, Z = flat_idx % Mz; \ - std::array<real_t,4> Xs = { X*voxel_size, Y*voxel_size, Z*voxel_size, 1 }; \ - bool mask_value = maskin_buffer[k]; - -#define loop_mask_end(mask) }}} - -/* void fill_implant_mask(const input_ndarray<mask_type> implant_mask, float voxel_size, const array<float,6> &bbox, @@ -425,7 +406,7 @@ void cylinder_projection(const input_ndarray<float> edt, // Euclidean Distance abort(); } - //****** MEAT OF THE IMPLEMENTATION IS HERE ****** + // ****** MEAT OF THE IMPLEMENTATION IS HERE ****** real_t distance = resample2x2x2<float>(edt_block, {this_edt_length/(ey*ez),ey,ez}, {x,y,z}); if (distance > d_min && distance <= d_max) { // TODO: and W>w_min diff --git a/src/pybind/geometry-pybind.cc b/src/pybind/geometry-pybind.cc index ade209f..3051f69 100644 --- a/src/pybind/geometry-pybind.cc +++ b/src/pybind/geometry-pybind.cc @@ -1,16 +1,3 @@ -#ifdef _OPENACC -//#warning "Using GPU" -#define NS gpu -#else -#ifdef _OPENMP -//#warning "Using OpenMP" -#define NS cpu_par -#else -//#warning "Using sequential" -#define NS cpu_seq -#endif -#endif - #include "geometry.cc" namespace python_api { diff --git a/src/test/test_geometry.py b/src/test/test_geometry.py index 3d0a3ae..9426e7f 100644 --- a/src/test/test_geometry.py +++ b/src/test/test_geometry.py @@ -57,11 +57,11 @@ def compare_fs(func, baseline_f, cpu_f, gpu_f, should_assert=True, tolerance=1e- print (f'({func}) Sequential ran in {baseline_t}') cpu, cpu_t = run_with_warmup(cpu_f, allocate_result) - print (f'({func}) Parallel CPU ran in {cpu_t}, which is {baseline_t / cpu_t} times faster than sequential') + print (f'({func}) Parallel CPU ran in {cpu_t}, which is {baseline_t / cpu_t:.02f} times faster than sequential') if should_assert: assert_with_print(baseline, cpu, tolerance, 'cpu_seq vs cpu') gpu, gpu_t = run_with_warmup(gpu_f, allocate_result) - print (f'({func}) GPU ran in {gpu_t}, which is {baseline_t / gpu_t} times faster than sequential') + print (f'({func}) GPU ran in {gpu_t}, which is {baseline_t / gpu_t:.02f} times faster than sequential') if should_assert: assert_with_print(baseline, gpu, tolerance, 'cpu_seq vs gpu') def test_center_of_mass(): @@ -101,12 +101,12 @@ def test_sample_plane(dtype): UVW = E.T _, v_vec, w_vec = UVW cpu_seq, cpu, gpu = [ - partial(impl.sample_plane, voxels, voxel_size, cm, v_vec, w_vec, [0, 1024, 0, 1024]) + partial(impl.sample_plane, voxels, voxel_size, cm, v_vec, w_vec, [0, 128, 0, 128]) for impl in [m_cpu_seq, m_cpu, m_gpu] ] # TODO the function is unstable, even when they're all calling the sequential implementation, t least when comparing gcc against nvcc, but it differs at most with 1. Hence the higher tolerance for this test. Can be tested with something like for i in range(10000): - compare_fs('sample_plane', cpu_seq, cpu, gpu, True, 1.1, ((800,800), np.float32)) + compare_fs('sample_plane', cpu_seq, cpu, gpu, True, 1.1, ((64,64), np.float32)) if __name__ == '__main__': test_center_of_mass() From 5fc5448bce895b20473309573502fe37154de77e Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 6 Mar 2023 08:45:11 +0100 Subject: [PATCH 101/136] #25 Plane samples is working for all implementations --- src/Makefile | 3 ++- src/lib/cpp/cpu_seq/geometry.cc | 11 ++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/Makefile b/src/Makefile index 463d674..f64e876 100644 --- a/src/Makefile +++ b/src/Makefile @@ -28,7 +28,8 @@ CLEANUP = $(TARGETS) $(foreach PLATFORM, $(PLATFORMS), $(CPP_FOLDER)/$(PLATFORM) ifneq (, $(shell which nvc++)) PLATFORMS += gpu gpu_CXX = nvc++ -gpu_FLAGS = -acc=gpu -tp=native -Xcudafe --display_error_number #-Minfo=accel +gpu_FLAGS = -acc=gpu -tp=native #-Minfo=accel +gpu_FLAGS += -Xcudafe --display_error_number # Getting the warning codes for later suppression gpu_FLAGS += --diag_suppress 1626 # Remove the annoying pybind warning that routine is both inline and noinline gpu_FLAGS += --diag_suppress 9 # Remove the warning about nested comments gpu_FLAGS += -Wnvlink,-w # Disable nvlink warnings diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index cf5f8cb..69f0578 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -165,7 +165,11 @@ void sample_plane(const input_ndarray<T> &voxels, du = (umax - umin) / real_t(nu), dv = (vmax - vmin) / real_t(nv); - //#pragma omp parallel for collapse(2) + real_t *dat = plane_samples.data; + + #pragma acc data copyin(voxels, voxels.data[:voxels_Nz*voxels_Ny*voxels_Nx], voxels_Nz, voxels_Ny, voxels_Nx) create(dat[:nu*nv]) copyout(dat[:nu*nv]) + { + PRAGMA(PARALLEL_TERM collapse(2)) for (ssize_t ui = 0; ui < nu; ui++) { for (ssize_t vj = 0; vj < nv; vj++) { const real_t @@ -188,13 +192,14 @@ void sample_plane(const input_ndarray<T> &voxels, T value = 0; std::array<float, 6> local_bbox = {0.5f, float(voxels_Nx)-0.5f, 0.5f, float(voxels_Ny)-0.5f, 0.5f, float(voxels_Nz)-0.5f}; if (in_bbox(x,y,z, local_bbox)) - value = (T) floor(resample2x2x2<T>(voxels.data, {voxels_Nx, voxels_Ny, voxels_Nz}, {x, y, z})); + value = (T) round(resample2x2x2<T>(voxels.data, {voxels_Nx, voxels_Ny, voxels_Nz}, {x, y, z})); // else // fprintf(stderr,"Sampling outside image: x,y,z = %.1f,%.1f,%.1f, Nx,Ny,Nz = %ld,%ld,%ld\n",x,y,z,Nx,Ny,Nz); - plane_samples.data[ui*nv + vj] = value; + dat[ui*nv + vj] = value; } } + } } } From df57aa085748aec203c92dceceb7905dc3abbe70 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 6 Mar 2023 08:45:52 +0100 Subject: [PATCH 102/136] #25 Added cuda generated file to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index a724c3c..6fc3a6a 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,7 @@ src/meow/runner_processing/* *.so *.so.dSYM a.out +cudafe # Ignore the $BONE_DATA symlinks, as they're only there for convinience in vscode data_* \ No newline at end of file From 6b9d1ddefb0bfc1e7565e3b51f8218c4fd7c85d4 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 6 Mar 2023 09:42:54 +0100 Subject: [PATCH 103/136] #25 Implemented geometry::integrate_axes --- src/lib/cpp/cpu/geometry.cc | 9 ++++++++ src/lib/cpp/cpu_seq/geometry.cc | 36 ++++++++++++++++++++++++++++++ src/lib/cpp/gpu/geometry.cc | 9 ++++++++ src/lib/cpp/include/boilerplate.hh | 6 +++++ src/pybind/geometry-pybind.cc | 10 ++++----- 5 files changed, 65 insertions(+), 5 deletions(-) diff --git a/src/lib/cpp/cpu/geometry.cc b/src/lib/cpp/cpu/geometry.cc index 4548dd7..7bf6ad9 100644 --- a/src/lib/cpp/cpu/geometry.cc +++ b/src/lib/cpp/cpu/geometry.cc @@ -22,6 +22,15 @@ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &mask, const array return cpu_seq::inertia_matrix(mask, cm); } +void integrate_axes(const input_ndarray<mask_type> &mask, + const array<real_t,3> &x0, + const array<real_t,3> &v_axis, + const array<real_t,3> &w_axis, + const real_t v_min, const real_t w_min, + output_ndarray<real_t> output) { + return cpu_seq::integrate_axes(mask, x0, v_axis, w_axis, v_min, w_min, output); +} + template <typename T> float resample2x2x2(const T *voxels, const array<ssize_t, 3> &shape, diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index 69f0578..4709555 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -202,6 +202,42 @@ void sample_plane(const input_ndarray<T> &voxels, } } +void integrate_axes(const input_ndarray<mask_type> &mask, + const array<real_t,3> &x0, + const array<real_t,3> &v_axis, + const array<real_t,3> &w_axis, + const real_t v_min, const real_t w_min, + output_ndarray<real_t> output) { + UNPACK_NUMPY(mask); + ssize_t Nv = output.shape[0], Nw = output.shape[1]; + real_t *output_data = output.data; + + // TODO: Check v_axis & w_axis projections to certify bounds and get rid of runtime check + #pragma acc data create(output_data[:Nv*Nw]) copyout(output_data[:Nv*Nw]) + { + BLOCK_BEGIN(mask, ) { + + mask_type voxel = mask_buffer[flat_index]; + if (voxel != 0) { + real_t xs[3] = { + real_t(x) - x0[0], + real_t(y) - x0[1], + real_t(z) - x0[2] + }; + + real_t v = dot(xs,v_axis), w = dot(xs,w_axis); + int64_t i_v = int64_t(round(v-v_min)), j_w = int64_t(round(w-w_min)); + + if(i_v >= 0 && j_w >= 0 && i_v < Nv && j_w < Nw){ + ATOMIC() + output_data[i_v*Nw + j_w] += voxel; + } + } + + BLOCK_END() } + } +} + } /* diff --git a/src/lib/cpp/gpu/geometry.cc b/src/lib/cpp/gpu/geometry.cc index dc6448d..317f572 100644 --- a/src/lib/cpp/gpu/geometry.cc +++ b/src/lib/cpp/gpu/geometry.cc @@ -21,6 +21,15 @@ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &mask, const array return cpu_seq::inertia_matrix(mask, cm); } +void integrate_axes(const input_ndarray<mask_type> &mask, + const array<real_t,3> &x0, + const array<real_t,3> &v_axis, + const array<real_t,3> &w_axis, + const real_t v_min, const real_t w_min, + output_ndarray<real_t> output) { + return cpu_seq::integrate_axes(mask, x0, v_axis, w_axis, v_min, w_min, output); +} + template <typename T> float resample2x2x2(const T *voxels, const array<ssize_t, 3> &shape, diff --git a/src/lib/cpp/include/boilerplate.hh b/src/lib/cpp/include/boilerplate.hh index 54852fb..8eb5dab 100644 --- a/src/lib/cpp/include/boilerplate.hh +++ b/src/lib/cpp/include/boilerplate.hh @@ -16,6 +16,12 @@ #endif #endif +#ifdef _OPENACC +#define ATOMIC() PRAGMA(acc atomic) +#else +#define ATOMIC() PRAGMA(omp atomic) +#endif + // TODO attempt at docstring; not quite working. /// Inserts boilerplate code for accessing the given parameter, ARR, in a blocked (chunked) manner. diff --git a/src/pybind/geometry-pybind.cc b/src/pybind/geometry-pybind.cc index 3051f69..469ff2d 100644 --- a/src/pybind/geometry-pybind.cc +++ b/src/pybind/geometry-pybind.cc @@ -30,12 +30,11 @@ void sample_plane(const np_array<T> &np_voxels, {plane_samples_info.ptr, plane_samples_info.shape}); } -real_t resample2x2x2(const np_array<uint8_t> &np_voxels) { +/*real_t resample2x2x2(const np_array<uint8_t> &np_voxels) { auto voxels_info = np_voxels.request(); return 0.0f; -} +}*/ -/* void integrate_axes(const np_maskarray &np_voxels, const array<real_t,3> &x0, const array<real_t,3> &v_axis, @@ -45,12 +44,13 @@ void integrate_axes(const np_maskarray &np_voxels, auto voxels_info = np_voxels.request(); auto output_info = output.request(); - integrate_axes({voxels_info.ptr, voxels_info.shape}, + NS::integrate_axes({voxels_info.ptr, voxels_info.shape}, x0,v_axis,w_axis, v_min, w_min, {output_info.ptr, output_info.shape}); } +/* void zero_outside_bbox(const array<real_t,9> &principal_axes, const array<real_t,6> ¶meter_ranges, const array<real_t,3> &cm, // TOOD: Med eller uden voxelsize? @@ -127,7 +127,7 @@ PYBIND11_MODULE(geometry, m) { m.def("center_of_mass", &python_api::center_of_mass); m.def("inertia_matrix", &python_api::inertia_matrix); - //m.def("integrate_axes", &python_api::integrate_axes); + m.def("integrate_axes", &python_api::integrate_axes); //m.def("zero_outside_bbox", &python_api::zero_outside_bbox); //m.def("fill_implant_mask", &python_api::fill_implant_mask); //m.def("cylinder_projection", &python_api::cylinder_projection); From 9d4143780c355ecea5093aa7be66bc4ca1be0d0b Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 6 Mar 2023 09:43:31 +0100 Subject: [PATCH 104/136] #25 Added test for integrate_axes --- src/test/test_geometry.py | 40 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/src/test/test_geometry.py b/src/test/test_geometry.py index 9426e7f..36152c9 100644 --- a/src/test/test_geometry.py +++ b/src/test/test_geometry.py @@ -108,7 +108,45 @@ def test_sample_plane(dtype): # TODO the function is unstable, even when they're all calling the sequential implementation, t least when comparing gcc against nvcc, but it differs at most with 1. Hence the higher tolerance for this test. Can be tested with something like for i in range(10000): compare_fs('sample_plane', cpu_seq, cpu, gpu, True, 1.1, ((64,64), np.float32)) +def test_integrate_axes(): + n = 128 + dtype = np.uint8 + voxels = np.random.randint(0, np.iinfo(dtype).max, (n,n,n), dtype) + cm = m_cpu.center_of_mass(voxels) + M = np.array(m_cpu.inertia_matrix(voxels, cm)).reshape(3,3) + + lam, E = np.linalg.eigh(M) + ix = np.argsort(np.abs(lam)) + lam, E = np.array(lam)[ix], np.array(E)[:,ix] + + v_axis, w_axis = E[:,1], E[:,2] + + (vmin,vmax), _ = axis_parameter_bounds(voxels.shape, cm, v_axis) + (wmin,wmax), _ = axis_parameter_bounds(voxels.shape, cm, w_axis) + + cpu_seq, cpu, gpu = [ + partial(impl.integrate_axes, voxels, cm, v_axis, w_axis, vmin, wmin) + for impl in [m_cpu_seq, m_cpu, m_gpu] + ] + + compare_fs('integrate_axes', cpu_seq, cpu, gpu, True, 1e-7, ((int(vmax-vmin+2),int(wmax-wmin+2)), float)) + +def axis_parameter_bounds(shape, center, axis): + d = len(axis) + signs = np.sign(axis) + + # (0,0,..,0) corner and furthest corner of grid, relative to center +# print(center) + x0 = -np.array(center) + x1 = np.array(shape)[::-1]-center # Data has z,y,x-order, but we keep x,y,z in geometry calc + + xmin = (signs==1)*x0 + (signs==-1)*x1 # minimizes dot(x,axis) + xmax = (signs==1)*x1 + (signs==-1)*x0 # maximizes dot(x,axis) + + return (np.dot(xmin,axis), np.dot(xmax,axis)), (xmin,xmax) + if __name__ == '__main__': test_center_of_mass() test_inertia_matrix() - test_sample_plane(np.uint8) \ No newline at end of file + test_sample_plane(np.uint8) + test_integrate_axes() \ No newline at end of file From 23d23934787011aedfd140eeae5faebe5e849515 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 6 Mar 2023 09:43:45 +0100 Subject: [PATCH 105/136] #25 Removed unused imports --- src/processing_steps/0600_segment_implant_cc.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/processing_steps/0600_segment_implant_cc.py b/src/processing_steps/0600_segment_implant_cc.py index 1f09060..b3a0d99 100644 --- a/src/processing_steps/0600_segment_implant_cc.py +++ b/src/processing_steps/0600_segment_implant_cc.py @@ -2,14 +2,13 @@ sys.path.append(sys.path[0]+"/../") from config.constants import * from config.paths import hdf5_root, binary_root -from lib.py.helpers import commandline_args, update_hdf5, update_hdf5_mask -from lib.cpp.cpu_seq.geometry import center_of_mass, inertia_matrix, integrate_axes, sample_plane +from lib.py.helpers import commandline_args, update_hdf5_mask from lib.cpp.cpu.io import load_slice NA = np.newaxis sample, scale, chunk_size, verbose = commandline_args({"sample" : "<required>", - "scale" : 8, + "scale" : 8, "chunk_size" : 256, "verbose" : 1}) @@ -45,8 +44,8 @@ load_slice(voxel_chunk, f"{binary_root}/voxels/{scale}x/{sample}.uint16", (z,0,0), (nz,ny,nx)) noisy_implant[z:z+chunk_length] = voxel_chunk[:chunk_length] >= implant_threshold_u16 - - + + if verbose >= 1: print(f"Computing connected components") label, n_features = ndi.label(noisy_implant) if verbose >= 1: print(f"Counting component volumes") From de16c73922cda9f033376d368addb0c71fa66883 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 6 Mar 2023 10:28:46 +0100 Subject: [PATCH 106/136] #25 Implemented geometry::zero_outside_bbox --- src/lib/cpp/cpu/geometry.cc | 7 ++++ src/lib/cpp/cpu_seq/geometry.cc | 52 +++++++++++++----------------- src/lib/cpp/gpu/geometry.cc | 45 +++----------------------- src/lib/cpp/include/boilerplate.hh | 8 ++--- src/lib/cpp/include/geometry.hh | 11 +++++++ src/pybind/geometry-pybind.cc | 11 ++----- 6 files changed, 52 insertions(+), 82 deletions(-) diff --git a/src/lib/cpp/cpu/geometry.cc b/src/lib/cpp/cpu/geometry.cc index 7bf6ad9..c3f7cac 100644 --- a/src/lib/cpp/cpu/geometry.cc +++ b/src/lib/cpp/cpu/geometry.cc @@ -49,4 +49,11 @@ void sample_plane(const input_ndarray<T> &voxels, return cpu_seq::sample_plane(voxels, voxel_size, cm, u_axis, v_axis, bbox, plane_samples); } +void zero_outside_bbox(const array<real_t,9> &principal_axes, + const array<real_t,6> ¶meter_ranges, + const array<real_t,3> &cm, + output_ndarray<mask_type> voxels) { + return cpu_seq::zero_outside_bbox(principal_axes, parameter_ranges, cm, voxels); +} + } \ No newline at end of file diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index 4709555..f756136 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -238,53 +238,45 @@ void integrate_axes(const input_ndarray<mask_type> &mask, } } -} - -/* -// TODO only called in test.py. Postpone for now. // NB: xyz are in indices, not micrometers void zero_outside_bbox(const array<real_t,9> &principal_axes, const array<real_t,6> ¶meter_ranges, const array<real_t,3> &cm, output_ndarray<mask_type> voxels) { - size_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; - int64_t image_length = Nx*Ny*Nz; - printf("(Nx,Ny,Nz) = (%ld,%ld,%ld), image_length = %ld",Nx,Ny,Nz, image_length); + UNPACK_NUMPY(voxels) - for (int64_t block_start = 0; block_start < image_length; block_start += acc_block_size) { - mask_type *buffer = voxels.data + block_start; - ssize_t this_block_length = min(acc_block_size, image_length-block_start); + BLOCK_BEGIN(voxels, ) { - //parallel_loop((buffer[:this_block_length])) - for (int64_t k = 0; k < this_block_length; k++) { - int64_t flat_idx = block_start + k; - int64_t x = flat_idx / (Ny*Nz); - int64_t y = (flat_idx / Nz) % Ny; - int64_t z = flat_idx % Nz; - // Boilerplate until here. TODO: macroize or lambda out! + real_t xs[3] = { + real_t(x) - cm[0], + real_t(y) - cm[1], + real_t(z) - cm[2]}; + real_t params[3] = { 0, 0, 0 }; - real_t xs[3] = {x-cm[0], y-cm[1], z-cm[2]}; + for (int uvw = 0; uvw < 3; uvw++) + for (int xyz = 0; xyz < 3; xyz++) + params[uvw] += xs[xyz] * principal_axes[uvw*3 + xyz]; // u = dot(xs,u_axis), v = dot(xs,v_axis), w = dot(xs,w_axis) - real_t params[3] = {0,0,0}; + bool p = false; - for (int uvw = 0; uvw < 3; uvw++) - for (int xyz = 0; xyz < 3; xyz++) - params[uvw] += xs[xyz] * principal_axes[uvw*3+xyz]; // u = dot(xs,u_axis), v = dot(xs,v_axis), w = dot(xs,w_axis) + for (int uvw = 0; uvw < 3; uvw++) { + real_t + param_min = parameter_ranges[uvw*2], + param_max = parameter_ranges[uvw*2 + 1]; + p |= (params[uvw] < param_min) | (params[uvw] > param_max); + } - bool p = false; + if (p) + voxels_buffer[flat_index] = 0; - for (int uvw = 0; uvw < 3; uvw++) { - real_t param_min = parameter_ranges[uvw*2], param_max = parameter_ranges[uvw*2+1]; - p |= (params[uvw] < param_min) | (params[uvw] > param_max); - } + BLOCK_END() } - if (p) buffer[k] = 0; +} - } - } } +/* inline vector4 hom_transform(const vector4 &x, const matrix4x4 &M) { vector4 c{{0,0,0,0}}; diff --git a/src/lib/cpp/gpu/geometry.cc b/src/lib/cpp/gpu/geometry.cc index 317f572..c70b867 100644 --- a/src/lib/cpp/gpu/geometry.cc +++ b/src/lib/cpp/gpu/geometry.cc @@ -48,46 +48,11 @@ void sample_plane(const input_ndarray<T> &voxels, return cpu_seq::sample_plane(voxels, voxel_size, cm, u_axis, v_axis, bbox, plane_samples); } -/* TODO Only called in test.py. Postponed for now. -void integrate_axes(const input_ndarray<mask_type> &voxels, - const array<real_t,3> &x0, - const array<real_t,3> &v_axis, - const array<real_t,3> &w_axis, - const real_t v_min, const real_t w_min, - output_ndarray<real_t> output) { - ssize_t Nx = voxels.shape[0], Ny = voxels.shape[1], Nz = voxels.shape[2]; - ssize_t Nv = output.shape[0], Nw = output.shape[1]; - int64_t image_length = Nx*Ny*Nz; - real_t *output_data = output.data; - - // TODO: Check v_axis & w_axis projections to certify bounds and get rid of runtime check - - for (ssize_t block_start = 0; block_start < image_length; block_start += acc_block_size) { - const mask_type *buffer = voxels.data + block_start; - int block_length = min(acc_block_size,image_length-block_start); - - //#pragma acc parallel loop copy(output_data[:Nv*Nw]) copyin(buffer[:block_length], x0, v_axis, w_axis) - //parallel_loop((output_data[:Nv*Nw])) - for (int64_t k = 0; k < block_length; k++) { - if (buffer[k] != 0) { - int64_t flat_idx = block_start + k; - real_t xs[3] = { - (flat_idx / (Ny*Nz)) - x0[0], // x - ((flat_idx / Nz) % Ny) - x0[1], // y - (flat_idx % Nz) - x0[2] }; // z - - mask_type voxel = buffer[k]; - real_t v = dot(xs,v_axis), w = dot(xs,w_axis); - int64_t i_v = round(v-v_min), j_w = round(w-w_min); - - if (i_v >= 0 && j_w >= 0 && i_v < Nv && j_w < Nw) { - //atomic_statement() - output_data[i_v*Nw + j_w] += voxel; - } - } - } - } +void zero_outside_bbox(const array<real_t,9> &principal_axes, + const array<real_t,6> ¶meter_ranges, + const array<real_t,3> &cm, + output_ndarray<mask_type> voxels) { + return cpu_seq::zero_outside_bbox(principal_axes, parameter_ranges, cm, voxels); } -*/ } \ No newline at end of file diff --git a/src/lib/cpp/include/boilerplate.hh b/src/lib/cpp/include/boilerplate.hh index 8eb5dab..00b88d5 100644 --- a/src/lib/cpp/include/boilerplate.hh +++ b/src/lib/cpp/include/boilerplate.hh @@ -32,9 +32,9 @@ /// @param ARR The array that will be accessed. #define FOR_BLOCK_BEGIN(ARR) \ for (int64_t ARR##_buffer_start = 0; ARR##_buffer_start < ARR##_length; ARR##_buffer_start += acc_block_size<ARR##_type>) { \ - const ARR##_type *ARR##_buffer = ARR.data + ARR##_buffer_start; \ + ARR##_type *ARR##_buffer = (ARR##_type *) ARR.data + ARR##_buffer_start; \ ssize_t ARR##_buffer_length = min(acc_block_size<ARR##_type>, ARR##_length-ARR##_buffer_start); \ - PRAGMA(acc data copyin(ARR##_buffer[:ARR##_buffer_length])) \ + PRAGMA(acc data copy(ARR##_buffer[:ARR##_buffer_length])) \ { #define FOR_BLOCK_END() } } @@ -76,7 +76,7 @@ #else #ifdef _OPENMP // Should also capture OpenACC, which is why it's second. #define BLOCK_BEGIN(ARR, EXTRA_PRAGMA_CLAUSE) \ - const ARR##_type *ARR##_buffer = ARR.data; \ + ARR##_type *ARR##_buffer = (ARR##_type *) ARR.data; \ FOR_3D_BEGIN(ARR, EXTRA_PRAGMA_CLAUSE) \ int64_t flat_index = z*ARR##_Ny*ARR##_Nx + y*ARR##_Nx + x; @@ -84,7 +84,7 @@ #else #define BLOCK_BEGIN(ARR, EXTRA_PRAGMA_CLAUSE) \ int64_t flat_index = 0; \ - const ARR##_type *ARR##_buffer = ARR.data; \ + ARR##_type *ARR##_buffer = (ARR##_type *) ARR.data; \ FOR_3D_BEGIN(ARR, EXTRA_PRAGMA_CLAUSE) #define BLOCK_END() \ diff --git a/src/lib/cpp/include/geometry.hh b/src/lib/cpp/include/geometry.hh index 237844b..7635d80 100644 --- a/src/lib/cpp/include/geometry.hh +++ b/src/lib/cpp/include/geometry.hh @@ -36,6 +36,13 @@ Computes the inertia matrix of the given tomography based of the given center of */ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &voxels, const array<real_t,3> &cm); +void integrate_axes(const input_ndarray<mask_type> &mask, + const array<real_t,3> &x0, + const array<real_t,3> &v_axis, + const array<real_t,3> &w_axis, + const real_t v_min, const real_t w_min, + output_ndarray<real_t> output); + template <typename T> float resample2x2x2(const T *voxels, const array<ssize_t,3> &shape, @@ -50,6 +57,10 @@ void sample_plane(const input_ndarray<T> &voxels, const array<real_t, 4> bbox, // [umin,umax,vmin,vmax] in micrometers output_ndarray<real_t> plane_samples); +void zero_outside_bbox(const array<real_t,9> &principal_axes, + const array<real_t,6> ¶meter_ranges, + const array<real_t,3> &cm, + output_ndarray<mask_type> voxels); } #endif \ No newline at end of file diff --git a/src/pybind/geometry-pybind.cc b/src/pybind/geometry-pybind.cc index 469ff2d..d22a2a9 100644 --- a/src/pybind/geometry-pybind.cc +++ b/src/pybind/geometry-pybind.cc @@ -30,11 +30,6 @@ void sample_plane(const np_array<T> &np_voxels, {plane_samples_info.ptr, plane_samples_info.shape}); } -/*real_t resample2x2x2(const np_array<uint8_t> &np_voxels) { - auto voxels_info = np_voxels.request(); - return 0.0f; -}*/ - void integrate_axes(const np_maskarray &np_voxels, const array<real_t,3> &x0, const array<real_t,3> &v_axis, @@ -50,19 +45,19 @@ void integrate_axes(const np_maskarray &np_voxels, {output_info.ptr, output_info.shape}); } -/* void zero_outside_bbox(const array<real_t,9> &principal_axes, const array<real_t,6> ¶meter_ranges, const array<real_t,3> &cm, // TOOD: Med eller uden voxelsize? np_maskarray &np_voxels) { auto voxels_info = np_voxels.request(); - zero_outside_bbox(principal_axes, + NS::zero_outside_bbox(principal_axes, parameter_ranges, cm, {voxels_info.ptr, voxels_info.shape}); } +/* void fill_implant_mask(const np_maskarray implant_mask, float voxel_size, const array<float,6> &bbox, @@ -128,7 +123,7 @@ PYBIND11_MODULE(geometry, m) { m.def("center_of_mass", &python_api::center_of_mass); m.def("inertia_matrix", &python_api::inertia_matrix); m.def("integrate_axes", &python_api::integrate_axes); - //m.def("zero_outside_bbox", &python_api::zero_outside_bbox); + m.def("zero_outside_bbox", &python_api::zero_outside_bbox); //m.def("fill_implant_mask", &python_api::fill_implant_mask); //m.def("cylinder_projection", &python_api::cylinder_projection); m.def("sample_plane", &python_api::sample_plane<uint16_t>); From 98d8426a3bf8be985ce2207d403f408221677d9e Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 6 Mar 2023 10:29:08 +0100 Subject: [PATCH 107/136] #25 Added test for zero_outside_bbox --- src/test/test_geometry.py | 83 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 78 insertions(+), 5 deletions(-) diff --git a/src/test/test_geometry.py b/src/test/test_geometry.py index 36152c9..dae5f56 100644 --- a/src/test/test_geometry.py +++ b/src/test/test_geometry.py @@ -39,12 +39,16 @@ def run_with_warmup(f, allocate_result=None): @param allocate_result Defines whether the memory for the result should be allocated before running. If it should, it should be a tuple of the shape and the dtype of the array. None otherwise. ''' - alloc = lambda x: np.zeros(x[0], x[1]) - f() if allocate_result is None else f(alloc(allocate_result)) if allocate_result is None: + f() start = datetime.datetime.now() result = f() else: + if type(allocate_result) is tuple: + alloc = lambda x: np.zeros(x[0], x[1]) + else: + alloc = lambda x: np.copy(x) + f(alloc(allocate_result)) result = alloc(allocate_result) start = datetime.datetime.now() f(result) @@ -52,7 +56,7 @@ def run_with_warmup(f, allocate_result=None): return result, end - start def compare_fs(func, baseline_f, cpu_f, gpu_f, should_assert=True, tolerance=1e-7, - allocate_result: tuple[tuple[int],np.dtype]=None): + allocate_result: tuple[tuple[int],np.dtype] | np.ndarray=None): baseline, baseline_t = run_with_warmup(baseline_f, allocate_result) print (f'({func}) Sequential ran in {baseline_t}') @@ -132,7 +136,6 @@ def test_integrate_axes(): compare_fs('integrate_axes', cpu_seq, cpu, gpu, True, 1e-7, ((int(vmax-vmin+2),int(wmax-wmin+2)), float)) def axis_parameter_bounds(shape, center, axis): - d = len(axis) signs = np.sign(axis) # (0,0,..,0) corner and furthest corner of grid, relative to center @@ -145,8 +148,78 @@ def axis_parameter_bounds(shape, center, axis): return (np.dot(xmin,axis), np.dot(xmax,axis)), (xmin,xmax) +def integrate_axes(img, cm, v_axis, w_axis): + (vmin,vmax), (vxmin,vxmax) = axis_parameter_bounds(img.shape, cm, v_axis) + (wmin,wmax), (wxmin,wxmax) = axis_parameter_bounds(img.shape, cm, w_axis) + + integral = np.zeros((int(vmax-vmin+2),int(wmax-wmin+2)), dtype=float) + m_cpu.integrate_axes(img,cm,v_axis, w_axis,vmin, wmin, integral) + + return integral + +def bounding_volume(voxels,voxelsize=1.85): + cm = np.array(m_cpu.center_of_mass(voxels)) + M = np.array(m_cpu.inertia_matrix(voxels,cm)).reshape(3,3) + + lam,E = np.linalg.eigh(M) + ix = np.argsort(np.abs(lam)) + lam,E = np.array(lam)[ix], np.array(E)[:,ix] + + u_axis, v_axis, w_axis = E[:,0], E[:,1], E[:,2] + (vmin,vmax), _ = axis_parameter_bounds(voxels.shape, cm, v_axis) + + int_vw = integrate_axes(voxels, cm, v_axis, w_axis) + int_uw = integrate_axes(voxels, cm, u_axis, w_axis) + int_uv = integrate_axes(voxels, cm, u_axis, v_axis) + int_u = np.sum(int_uv,axis=1) + int_v = np.sum(int_uv,axis=0) + int_w = np.sum(int_uw,axis=0) + + lengths = np.array([np.sum(int_u>0), np.sum(int_v>0), np.sum(int_w>0)]) + ix = np.argsort(lengths)[::-1] + print("lengths: ",lengths, ", ix: ",ix) + + (umin,umax), _ = axis_parameter_bounds(voxels.shape, cm, u_axis) + (vmin,vmax), _ = axis_parameter_bounds(voxels.shape, cm, v_axis) + (wmin,wmax), _ = axis_parameter_bounds(voxels.shape, cm, w_axis) + + u_prefix, u_postfix = np.sum(int_u[0:int(np.ceil(abs(umin)))]>0), np.sum(int_u[int(np.floor(abs(umin))):]>0) + v_prefix, v_postfix = np.sum(int_v[0:int(np.ceil(abs(vmin)))]>0), np.sum(int_v[int(np.floor(abs(vmin))):]>0) + w_prefix, w_postfix = np.sum(int_w[0:int(np.ceil(abs(wmin)))]>0), np.sum(int_w[int(np.floor(abs(wmin))):]>0) + + + return { + 'principal_axes':np.array([u_axis,v_axis,w_axis]), + 'principal_axes_ranges':np.array([[-u_prefix*voxelsize,u_postfix*voxelsize], + [-v_prefix*voxelsize,v_postfix*voxelsize], + [-w_prefix*voxelsize,w_postfix*voxelsize]]), + 'centre_of_mass':cm*voxelsize + } + +def test_zero_outside_bbox(): + n = 128 + dtype = np.uint8 + voxels = np.random.randint(0, np.iinfo(dtype).max, (n,n,n), dtype) + voxelsize = 1.85 + coarse_scale = 6 + fine_scale = 2 + mmtofi = 1 / (voxelsize * fine_scale) # Conversion factor from micrometers to index + + implant_bound = bounding_volume(voxels, voxelsize*coarse_scale) + uvw_axes = implant_bound["principal_axes"] + uvw_ranges = implant_bound["principal_axes_ranges"] * mmtofi + cm = implant_bound["centre_of_mass"] * mmtofi + + cpu_seq, cpu, gpu = [ + partial(impl.zero_outside_bbox, uvw_axes.flatten(), uvw_ranges.flatten(), cm) + for impl in [m_cpu_seq, m_cpu, m_gpu] + ] + + compare_fs('zero_outside_bbox', cpu_seq, cpu, gpu, True, 1e-7, voxels) + if __name__ == '__main__': test_center_of_mass() test_inertia_matrix() test_sample_plane(np.uint8) - test_integrate_axes() \ No newline at end of file + test_integrate_axes() + test_zero_outside_bbox() \ No newline at end of file From 2eda0f4d82c80039424c48c4143c7af72ee34187 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 6 Mar 2023 15:18:15 +0100 Subject: [PATCH 108/136] #25 Implemented geometry::fill_implant_mask --- src/lib/cpp/cpu/geometry.cc | 11 +- src/lib/cpp/cpu_seq/geometry.cc | 179 ++++++++++++++--------------- src/lib/cpp/gpu/geometry.cc | 102 +++++++++++++++- src/lib/cpp/include/boilerplate.hh | 2 + src/lib/cpp/include/geometry.hh | 41 ++++++- 5 files changed, 234 insertions(+), 101 deletions(-) diff --git a/src/lib/cpp/cpu/geometry.cc b/src/lib/cpp/cpu/geometry.cc index c3f7cac..879ca9f 100644 --- a/src/lib/cpp/cpu/geometry.cc +++ b/src/lib/cpp/cpu/geometry.cc @@ -14,8 +14,15 @@ array<real_t,3> center_of_mass(const input_ndarray<mask_type> &mask) { return cpu_seq::center_of_mass(mask); } -bool in_bbox(float U, float V, float W, const std::array<float, 6> &bbox) { - return cpu_seq::in_bbox(U, V, W, bbox); +void fill_implant_mask(const input_ndarray<mask_type> mask, + float voxel_size, + const array<float,6> &bbox, + float r_fraction, + const matrix4x4 &Muvw, + output_ndarray<mask_type> solid_implant_mask, + output_ndarray<float> rsqr_maxs, + output_ndarray<float> profile) { + return cpu_seq::fill_implant_mask(mask, voxel_size, bbox, r_fraction, Muvw, solid_implant_mask, rsqr_maxs, profile); } array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &mask, const array<real_t,3> &cm) { diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index f756136..1fd5546 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -39,21 +39,93 @@ array<real_t, 3> center_of_mass(const input_ndarray<mask_type> &mask) { return array<real_t, 3>{ rcmz, rcmy, rcmx }; } -bool in_bbox(float U, float V, float W, const std::array<float, 6> &bbox) { - const auto& [U_min, U_max, V_min, V_max, W_min, W_max] = bbox; +void fill_implant_mask(const input_ndarray<mask_type> mask, + float voxel_size, + const array<float,6> &bbox, + float r_fraction, + const matrix4x4 &Muvw, + output_ndarray<mask_type> solid_implant_mask, + output_ndarray<float> rsqr_maxs, + output_ndarray<float> profile) { + UNPACK_NUMPY(mask) + + real_t theta_min = real_t(M_PI), theta_max = real_t(-M_PI); + ssize_t n_segments = rsqr_maxs.shape[0]; + const auto [U_min,U_max,V_min,V_max,W_min,W_max] = bbox; + float *rsqr_maxs_d = rsqr_maxs.data; + float *profile_d = profile.data; + + + //BLOCK_BEGIN(mask, ) { + #pragma omp parallel for collapse(3) + for (int64_t z = 0; z < mask_Nz; z++) { for (int64_t y = 0; y < mask_Ny; y++) { for (int64_t x = 0; x < mask_Nx; x++) { + //mask_type *solid_mask_buffer = solid_implant_mask.data + mask_buffer_start; - bool inside = - U >= U_min && - U <= U_max && - V >= V_min && - V <= V_max && - W >= W_min && - W <= W_max; + mask_type mask_value = mask.data[z*mask_Ny*mask_Nx + y*mask_Nx + x]; + std::array<real_t, 4> Xs = { + real_t(x) * voxel_size, + real_t(y) * voxel_size, + real_t(z) * voxel_size, + 1 }; - // printf("in_bbox: (%.1f,%.1f,%.1f) \in ([%.1f,%.1f],[%.1f,%.1f],[%.1f,%.1f]) == %d\n", - // U,V,W,U_min,U_max,V_min,V_max,U_min,U_max,inside); + if (mask_value) { + auto [U,V,W,c] = hom_transform(Xs, Muvw); + + real_t r_sqr = V*V+W*W; + real_t theta = atan2(V,W); + + int U_i = int(floor((U-U_min)*real_t(n_segments-1)/(U_max-U_min))); + + // if (U_i >= 0 && U_i < n_segments) { + if ( in_bbox(U,V,W,bbox) ) { + rsqr_maxs_d[U_i] = max(rsqr_maxs_d[U_i], float(r_sqr)); + theta_min = min(theta_min, theta); + theta_max = max(theta_max, theta); + // W_min = min(W_min, W); + } else { + // Otherwise we've calculated it wrong! + // fprintf(stderr,"U-coordinate out of bounds: U_i = %ld, U = %g, U_min = %g, U_max = %g\n",U_i,U,U_min,U_max); + } + } + + //FOR_3D_END() } + }}} + + double theta_center = (theta_max+theta_min)/2; + + //FOR_3D_BEGIN(mask, ) { + #pragma omp parallel for collapse(3) + for (int64_t z = 0; z < mask_Nz; z++) { for (int64_t y = 0; y < mask_Ny; y++) { for (int64_t x = 0; x < mask_Nx; x++) { + std::array<real_t, 4> Xs = { + real_t(x) * voxel_size, + real_t(y) * voxel_size, + real_t(z) * voxel_size, + 1 }; + int64_t flat_index = z*mask_Ny*mask_Nx + y*mask_Nx + x; + mask_type mask_value = mask.data[flat_index]; + + // Second pass does the actual work + auto [U,V,W,c] = hom_transform(Xs,Muvw); + float r_sqr = V*V+W*W; + float theta = atan2(V,W); + int U_i = int(floor((U-U_min)*real_t(n_segments-1)/(U_max-U_min))); + + bool solid_mask_value = false; + if (U_i >= 0 && U_i < n_segments && W >= W_min) { // TODO: Full bounding box check? + solid_mask_value = mask_value | (r_sqr <= r_fraction*rsqr_maxs_d[U_i]); + + if (theta >= theta_min && theta <= theta_center && r_sqr <= rsqr_maxs_d[U_i]) { + ATOMIC() + profile_d[U_i] += solid_mask_value; + } + } + + solid_implant_mask.data[flat_index] = solid_mask_value; + + //BLOCK_END() } + //FOR_3D_END() } + }}} - return inside; } array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &mask, const array<real_t,3> &cm) { @@ -277,89 +349,6 @@ void zero_outside_bbox(const array<real_t,9> &principal_axes, } /* -inline vector4 hom_transform(const vector4 &x, const matrix4x4 &M) { - vector4 c{{0,0,0,0}}; - - for (int i = 0; i < 4; i++) { - real_t sum = 0; - #pragma simd parallel for reduction(+:sum) - for (int j=0;j<4;j++) - sum += M[i*4+j]*x[j]; - c[i] = sum; - } - return c; -} - -void fill_implant_mask(const input_ndarray<mask_type> implant_mask, - float voxel_size, - const array<float,6> &bbox, - float r_fraction, - const matrix4x4 &Muvw, - output_ndarray<mask_type> solid_implant_mask, - output_ndarray<float> rsqr_maxs, - output_ndarray<float> profile) { - real_t theta_min = M_PI, theta_max = -M_PI; - ssize_t n_segments = rsqr_maxs.shape[0]; - const auto [U_min,U_max,V_min,V_max,W_min,W_max] = bbox; - - printf("implant_mask.shape = %ld,%ld,%ld\n",implant_mask.shape[0],implant_mask.shape[1],implant_mask.shape[2]); - printf("solid_implant_mask.shape = %ld,%ld,%ld\n",solid_implant_mask.shape[0],solid_implant_mask.shape[1],solid_implant_mask.shape[2]); - - fprintf(stderr,"voxel_size = %g, U_min = %g, U_max = %g, r_frac = %g, n_segments = %ld\n", - voxel_size, U_min, U_max, r_fraction, n_segments); - - float *rsqr_maxs_d = rsqr_maxs.data; - float *profile_d = profile.data; - - // First pass computes some bounds -- possibly separate out to avoid repeating - //loop_mask_start(implant_mask, solid_implant_mask, (maskin_buffer[:this_block_length], rsqr_maxs_d[:n_segments], Muvw[:16], bbox[:6]) ); - if (mask_value) { - auto [U,V,W,c] = hom_transform(Xs,Muvw); - - real_t r_sqr = V*V+W*W; - real_t theta = atan2(V,W); - - int U_i = floor((U-U_min)*(n_segments-1)/(U_max-U_min)); - - // if (U_i >= 0 && U_i < n_segments) { - if ( in_bbox(U,V,W,bbox) ) { - rsqr_maxs_d[U_i] = max(rsqr_maxs_d[U_i], float(r_sqr)); - theta_min = min(theta_min, theta); - theta_max = max(theta_max, theta); - // W_min = min(W_min, W); - } else { - // Otherwise we've calculated it wrong! - // fprintf(stderr,"U-coordinate out of bounds: U_i = %ld, U = %g, U_min = %g, U_max = %g\n",U_i,U,U_min,U_max); - } - } - //loop_mask_end(implant_mask); - - double theta_center = (theta_max+theta_min)/2; - - fprintf(stderr,"theta_min, theta_center, theta_max = %g,%g,%g\n", theta_min, theta_center, theta_max); - - // Second pass does the actual work - //loop_mask_start(implant_mask, solid_implant_mask, - (rsqr_maxs_d[:n_segments], profile_d[:n_segments]) ); - auto [U,V,W,c] = hom_transform(Xs,Muvw); - float r_sqr = V*V+W*W; - float theta = atan2(V,W); - int U_i = floor((U-U_min)*(n_segments-1)/(U_max-U_min)); - - bool solid_mask_value = false; - if (U_i >= 0 && U_i < n_segments && W >= W_min) { // TODO: Full bounding box check? - solid_mask_value = mask_value | (r_sqr <= r_fraction*rsqr_maxs_d[U_i]); - - if (theta >= theta_min && theta <= theta_center && r_sqr <= rsqr_maxs_d[U_i]) { - //atomic_statement() - profile_d[U_i] += solid_mask_value; - } - } - maskout_buffer[k] = solid_mask_value; - - //loop_mask_end(implant_mask); -} - void compute_front_mask(const input_ndarray<mask_type> solid_implant, const float voxel_size, const matrix4x4 &Muvw, diff --git a/src/lib/cpp/gpu/geometry.cc b/src/lib/cpp/gpu/geometry.cc index c70b867..5267619 100644 --- a/src/lib/cpp/gpu/geometry.cc +++ b/src/lib/cpp/gpu/geometry.cc @@ -13,8 +13,106 @@ array<real_t,3> center_of_mass(const input_ndarray<mask_type> &mask) { return cpu_seq::center_of_mass(mask); } -bool in_bbox(float U, float V, float W, const std::array<float, 6> &bbox) { - return cpu_seq::in_bbox(U, V, W, bbox); +void fill_implant_mask(const input_ndarray<mask_type> mask, + float voxel_size, + const array<float,6> &bbox, + float r_fraction, + const matrix4x4 &Muvw, + output_ndarray<mask_type> solid_implant_mask, + output_ndarray<float> rsqr_maxs, + output_ndarray<float> profile) { + UNPACK_NUMPY(mask) + + real_t theta_min = real_t(M_PI), theta_max = real_t(-M_PI); + ssize_t n_segments = rsqr_maxs.shape[0]; + const auto [U_min, U_max, V_min, V_max, W_min, W_max] = bbox; + float *rsqr_maxs_d = rsqr_maxs.data; + float *profile_d = profile.data; + + #pragma acc data copyin(U_min) create(rsqr_maxs_d[:n_segments], profile_d[:n_segments]) copyout(rsqr_maxs_d[:n_segments], profile_d[:n_segments]) + { + for (int64_t mask_buffer_start = 0; mask_buffer_start < mask_length; mask_buffer_start += acc_block_size<mask_type>) { + ssize_t mask_buffer_length = min(acc_block_size<mask_type>, mask_length-mask_buffer_start); + mask_type *mask_buffer = (mask_type *) mask.data + mask_buffer_start; + #pragma acc data copy(mask_buffer[:mask_buffer_length]) + { + #pragma acc parallel loop + for (int64_t flat_index = 0; flat_index < mask_buffer_length; flat_index++) { + int64_t + global_index = mask_buffer_start + flat_index, + z = global_index / (mask_Ny * mask_Nx), + y = (global_index / mask_Nx) % mask_Ny, + x = global_index % mask_Nx; + mask_type mask_value = mask_buffer[flat_index]; + std::array<real_t, 4> Xs = { + real_t(x) * voxel_size, + real_t(y) * voxel_size, + real_t(z) * voxel_size, + 1 }; + + if (mask_value) { + auto [U,V,W,c] = hom_transform(Xs, Muvw); + + real_t r_sqr = V*V+W*W; + real_t theta = atan2(V,W); + + int U_i = int(floor((U-U_min)*real_t(n_segments-1)/(U_max-U_min))); + + if ( in_bbox(U,V,W,bbox) ) { + rsqr_maxs_d[U_i] = max(rsqr_maxs_d[U_i], float(r_sqr)); + theta_min = min(theta_min, theta); + theta_max = max(theta_max, theta); + } else { + // Otherwise we've calculated it wrong! + } + } + } + } + } + + double theta_center = (theta_max+theta_min)/2; + + for (int64_t mask_buffer_start = 0; mask_buffer_start < mask_length; mask_buffer_start += acc_block_size<mask_type>) { + mask_type *mask_buffer = (mask_type *) mask.data + mask_buffer_start; + ssize_t mask_buffer_length = min(acc_block_size<mask_type>, mask_length-mask_buffer_start); + mask_type *solid_mask_buffer = solid_implant_mask.data + mask_buffer_start; + #pragma acc data copy(mask_buffer[:mask_buffer_length]) create(solid_mask_buffer[:mask_buffer_length]) copyout(solid_mask_buffer[:mask_buffer_length]) + { + #pragma acc parallel loop + for (int64_t flat_index = 0; flat_index < mask_buffer_length; flat_index++) { + int64_t + global_index = mask_buffer_start + flat_index, + z = global_index / (mask_Ny * mask_Nx), + y = (global_index / mask_Nx) % mask_Ny, + x = global_index % mask_Nx; + mask_type mask_value = mask_buffer[flat_index]; + std::array<real_t, 4> Xs = { + real_t(x) * voxel_size, + real_t(y) * voxel_size, + real_t(z) * voxel_size, + 1 }; + + // Second pass does the actual work + auto [U,V,W,c] = hom_transform(Xs,Muvw); + float r_sqr = V*V+W*W; + float theta = atan2(V,W); + int U_i = int(floor((U-U_min)*real_t(n_segments-1)/(U_max-U_min))); + + bool solid_mask_value = false; + if (U_i >= 0 && U_i < n_segments && W >= W_min) { // TODO: Full bounding box check? + solid_mask_value = mask_value | (r_sqr <= r_fraction*rsqr_maxs_d[U_i]); + + if (theta >= theta_min && theta <= theta_center && r_sqr <= rsqr_maxs_d[U_i]) { + ATOMIC() + profile_d[U_i] += solid_mask_value; + } + } + + solid_mask_buffer[flat_index] = solid_mask_value; + } + } + } + } } array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &mask, const array<real_t,3> &cm) { diff --git a/src/lib/cpp/include/boilerplate.hh b/src/lib/cpp/include/boilerplate.hh index 00b88d5..c0ba698 100644 --- a/src/lib/cpp/include/boilerplate.hh +++ b/src/lib/cpp/include/boilerplate.hh @@ -77,6 +77,7 @@ #ifdef _OPENMP // Should also capture OpenACC, which is why it's second. #define BLOCK_BEGIN(ARR, EXTRA_PRAGMA_CLAUSE) \ ARR##_type *ARR##_buffer = (ARR##_type *) ARR.data; \ + __attribute__((unused)) int64_t ARR##_buffer_start = 0; \ FOR_3D_BEGIN(ARR, EXTRA_PRAGMA_CLAUSE) \ int64_t flat_index = z*ARR##_Ny*ARR##_Nx + y*ARR##_Nx + x; @@ -85,6 +86,7 @@ #define BLOCK_BEGIN(ARR, EXTRA_PRAGMA_CLAUSE) \ int64_t flat_index = 0; \ ARR##_type *ARR##_buffer = (ARR##_type *) ARR.data; \ + __attribute__((unused)) int64_t ARR##_buffer_start = 0; \ FOR_3D_BEGIN(ARR, EXTRA_PRAGMA_CLAUSE) #define BLOCK_END() \ diff --git a/src/lib/cpp/include/geometry.hh b/src/lib/cpp/include/geometry.hh index 7635d80..c168ed4 100644 --- a/src/lib/cpp/include/geometry.hh +++ b/src/lib/cpp/include/geometry.hh @@ -9,12 +9,42 @@ using namespace std; #define dot(a,b) (a[0]*b[0] + a[1]*b[1] + a[2]*b[2]) -void print_timestamp(string message) { +inline void print_timestamp(string message) { auto now = chrono::system_clock::to_time_t(chrono::system_clock::now()); tm local_tm = *localtime(&now); fprintf(stderr,"%s at %02d:%02d:%02d\n", message.c_str(), local_tm.tm_hour, local_tm.tm_min, local_tm.tm_sec); } +inline vector4 hom_transform(const vector4 &x, const matrix4x4 &M) { + vector4 c{{ 0, 0, 0, 0 }}; + + for (int i = 0; i < 4; i++) { + real_t sum = 0; + #pragma simd parallel for reduction(+:sum) + for (int j = 0; j < 4; j++) + sum += M[i*4 + j] * x[j]; + c[i] = sum; + } + return c; +} + +inline bool in_bbox(float U, float V, float W, const std::array<float, 6> &bbox) { + const auto& [U_min, U_max, V_min, V_max, W_min, W_max] = bbox; + + bool inside = + U >= U_min && + U <= U_max && + V >= V_min && + V <= V_max && + W >= W_min && + W <= W_max; + + // printf("in_bbox: (%.1f,%.1f,%.1f) \in ([%.1f,%.1f],[%.1f,%.1f],[%.1f,%.1f]) == %d\n", + // U,V,W,U_min,U_max,V_min,V_max,U_min,U_max,inside); + + return inside; +} + namespace NS { /* @@ -25,7 +55,14 @@ Computes the center of mass of the given tomography. */ array<real_t,3> center_of_mass(const input_ndarray<mask_type> &voxels); -bool in_bbox(float U, float V, float W, const std::array<float, 6> &bbox); +void fill_implant_mask(const input_ndarray<mask_type> implant_mask, + float voxel_size, + const array<float,6> &bbox, + float r_fraction, + const matrix4x4 &Muvw, + output_ndarray<mask_type> solid_implant_mask, + output_ndarray<float> rsqr_maxs, + output_ndarray<float> profile); /* Computes the inertia matrix of the given tomography based of the given center of mass. From caf7e60ee71462ececa380df9a6d032f82fab076 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 6 Mar 2023 15:18:43 +0100 Subject: [PATCH 109/136] #25 Added test for fill_implant_mask --- src/pybind/geometry-pybind.cc | 6 +++--- src/test/test_geometry.py | 37 ++++++++++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/src/pybind/geometry-pybind.cc b/src/pybind/geometry-pybind.cc index d22a2a9..ac3116b 100644 --- a/src/pybind/geometry-pybind.cc +++ b/src/pybind/geometry-pybind.cc @@ -57,7 +57,6 @@ void zero_outside_bbox(const array<real_t,9> &principal_axes, {voxels_info.ptr, voxels_info.shape}); } -/* void fill_implant_mask(const np_maskarray implant_mask, float voxel_size, const array<float,6> &bbox, @@ -72,7 +71,7 @@ void fill_implant_mask(const np_maskarray implant_mask, rsqr_info = rsqr_maxs.request(), profile_info = profile.request(); - return fill_implant_mask({implant_info.ptr, implant_info.shape}, + return NS::fill_implant_mask({implant_info.ptr, implant_info.shape}, voxel_size, bbox, r_fraction, Muvw, {solid_implant_info.ptr, solid_implant_info.shape}, {rsqr_info.ptr, rsqr_info.shape}, @@ -80,6 +79,7 @@ void fill_implant_mask(const np_maskarray implant_mask, ); } +/* void compute_front_mask(const np_array<uint8_t> &np_solid_implant, const float voxel_size, const matrix4x4 &Muvw, @@ -124,7 +124,7 @@ PYBIND11_MODULE(geometry, m) { m.def("inertia_matrix", &python_api::inertia_matrix); m.def("integrate_axes", &python_api::integrate_axes); m.def("zero_outside_bbox", &python_api::zero_outside_bbox); - //m.def("fill_implant_mask", &python_api::fill_implant_mask); + m.def("fill_implant_mask", &python_api::fill_implant_mask); //m.def("cylinder_projection", &python_api::cylinder_projection); m.def("sample_plane", &python_api::sample_plane<uint16_t>); m.def("sample_plane", &python_api::sample_plane<uint8_t>); diff --git a/src/test/test_geometry.py b/src/test/test_geometry.py index dae5f56..6525151 100644 --- a/src/test/test_geometry.py +++ b/src/test/test_geometry.py @@ -217,9 +217,44 @@ def test_zero_outside_bbox(): compare_fs('zero_outside_bbox', cpu_seq, cpu, gpu, True, 1e-7, voxels) +def test_fill_implant_mask(): + n = 128 + dtype = np.uint8 + implant = np.random.randint(0, np.iinfo(dtype).max, (n,n,n), dtype) + # Values hardcoded from running 770c_pag on processing_steps/0800_implant_data.py + voxel_size = 3.75 + bbox_flat = (-3041.39336716053, 2955.146870664342, -1743.0321403974565, 1744.4435665884819, 367.6267143127782, 1764.022543822563) + rsqr_fraction = 0.7 + Muvwp_flat = (-0.9969205263686536, -0.07827989472162836, 0.004660706729396567, 3351.6367031993477, -0.004165804965960026, -0.006484313676985426, -0.9999702066630287, 3287.1018168847136, -0.07830654571765466, 0.996894476384658, -0.006138149566672908, -1739.8123507003322, 0.0, 0.0, 0.0, 1.0) + n_bins = 1024 + + solid_implant_mask = np.zeros(implant.shape, np.uint8) + rsqr_maxs = np.zeros((n_bins, ), np.uint8) + profile = np.zeros((n_bins, ), np.uint8) + + impls = [m_cpu_seq, m_cpu, m_gpu] + result_solid_implant_mask = [solid_implant_mask.copy() for _ in impls] + result_rsqr_maxs = [rsqr_maxs.copy() for _ in impls] + result_profile = [profile.copy() for _ in impls] + cpu_seq, cpu, gpu = [ + partial(impl.fill_implant_mask, implant, voxel_size, bbox_flat, rsqr_fraction, Muvwp_flat, solid_implant_mask, rsqr_maxs, profile) + for i, impl in enumerate(impls) + ] + + compare_fs('test_fill_implant_mask', cpu_seq, cpu, gpu, False) + + assert_with_print(result_solid_implant_mask[0], result_solid_implant_mask[1], 1e-7, "cpu_seq vs cpu") + assert_with_print(result_solid_implant_mask[0], result_solid_implant_mask[2], 1e-7, "cpu_seq vs gpu") + assert_with_print(result_rsqr_maxs[0], result_rsqr_maxs[1], 1e-7, "cpu_seq vs cpu") + assert_with_print(result_rsqr_maxs[0], result_rsqr_maxs[2], 1e-7, "cpu_seq vs gpu") + assert_with_print(result_profile[0], result_profile[1], 1e-7, "cpu_seq vs cpu") + assert_with_print(result_profile[0], result_profile[2], 1e-7, "cpu_seq vs gpu") + + if __name__ == '__main__': test_center_of_mass() test_inertia_matrix() test_sample_plane(np.uint8) test_integrate_axes() - test_zero_outside_bbox() \ No newline at end of file + test_zero_outside_bbox() + test_fill_implant_mask() \ No newline at end of file From 3a35e5f45ee5a68aa94d95a1de03511a80223428 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Tue, 7 Mar 2023 13:33:50 +0100 Subject: [PATCH 110/136] #25 Fixed integrate axes not returning anything --- src/lib/cpp/cpu/geometry.cc | 2 +- src/lib/cpp/cpu_seq/geometry.cc | 14 +++++++++----- src/lib/cpp/gpu/geometry.cc | 2 +- src/lib/cpp/include/datatypes.hh | 4 ++-- src/lib/cpp/include/geometry.hh | 2 +- src/pybind/geometry-pybind.cc | 6 +++--- src/test/test_geometry.py | 14 ++++++++++++-- 7 files changed, 29 insertions(+), 15 deletions(-) diff --git a/src/lib/cpp/cpu/geometry.cc b/src/lib/cpp/cpu/geometry.cc index 879ca9f..68af2ce 100644 --- a/src/lib/cpp/cpu/geometry.cc +++ b/src/lib/cpp/cpu/geometry.cc @@ -34,7 +34,7 @@ void integrate_axes(const input_ndarray<mask_type> &mask, const array<real_t,3> &v_axis, const array<real_t,3> &w_axis, const real_t v_min, const real_t w_min, - output_ndarray<real_t> output) { + output_ndarray<uint64_t> output) { return cpu_seq::integrate_axes(mask, x0, v_axis, w_axis, v_min, w_min, output); } diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index 1fd5546..6a79125 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -279,10 +279,10 @@ void integrate_axes(const input_ndarray<mask_type> &mask, const array<real_t,3> &v_axis, const array<real_t,3> &w_axis, const real_t v_min, const real_t w_min, - output_ndarray<real_t> output) { + output_ndarray<uint64_t> output) { UNPACK_NUMPY(mask); ssize_t Nv = output.shape[0], Nw = output.shape[1]; - real_t *output_data = output.data; + uint64_t *output_data = output.data; // TODO: Check v_axis & w_axis projections to certify bounds and get rid of runtime check #pragma acc data create(output_data[:Nv*Nw]) copyout(output_data[:Nv*Nw]) @@ -297,10 +297,14 @@ void integrate_axes(const input_ndarray<mask_type> &mask, real_t(z) - x0[2] }; - real_t v = dot(xs,v_axis), w = dot(xs,w_axis); - int64_t i_v = int64_t(round(v-v_min)), j_w = int64_t(round(w-w_min)); + real_t + v = dot(xs, v_axis), + w = dot(xs, w_axis); + int64_t + i_v = int64_t(round(v - v_min)), + j_w = int64_t(round(w - w_min)); - if(i_v >= 0 && j_w >= 0 && i_v < Nv && j_w < Nw){ + if (i_v >= 0 && j_w >= 0 && i_v < Nv && j_w < Nw) { ATOMIC() output_data[i_v*Nw + j_w] += voxel; } diff --git a/src/lib/cpp/gpu/geometry.cc b/src/lib/cpp/gpu/geometry.cc index 5267619..a22cb41 100644 --- a/src/lib/cpp/gpu/geometry.cc +++ b/src/lib/cpp/gpu/geometry.cc @@ -124,7 +124,7 @@ void integrate_axes(const input_ndarray<mask_type> &mask, const array<real_t,3> &v_axis, const array<real_t,3> &w_axis, const real_t v_min, const real_t w_min, - output_ndarray<real_t> output) { + output_ndarray<uint64_t> output) { return cpu_seq::integrate_axes(mask, x0, v_axis, w_axis, v_min, w_min, output); } diff --git a/src/lib/cpp/include/datatypes.hh b/src/lib/cpp/include/datatypes.hh index b91fc78..cf37cef 100644 --- a/src/lib/cpp/include/datatypes.hh +++ b/src/lib/cpp/include/datatypes.hh @@ -29,8 +29,8 @@ typedef float gauss_type; typedef float real_t; namespace py = pybind11; -template <typename voxel_type> -using np_array = py::array_t<voxel_type, py::array::c_style | py::array::forcecast>; +template <typename T> +using np_array = py::array_t<T, py::array::c_style | py::array::forcecast>; typedef py::array_t<mask_type, py::array::c_style | py::array::forcecast> np_maskarray; typedef py::array_t<real_t, py::array::c_style | py::array::forcecast> np_realarray; diff --git a/src/lib/cpp/include/geometry.hh b/src/lib/cpp/include/geometry.hh index c168ed4..8bd5cd1 100644 --- a/src/lib/cpp/include/geometry.hh +++ b/src/lib/cpp/include/geometry.hh @@ -78,7 +78,7 @@ void integrate_axes(const input_ndarray<mask_type> &mask, const array<real_t,3> &v_axis, const array<real_t,3> &w_axis, const real_t v_min, const real_t w_min, - output_ndarray<real_t> output); + output_ndarray<uint64_t> output); template <typename T> float resample2x2x2(const T *voxels, diff --git a/src/pybind/geometry-pybind.cc b/src/pybind/geometry-pybind.cc index ac3116b..5c50f69 100644 --- a/src/pybind/geometry-pybind.cc +++ b/src/pybind/geometry-pybind.cc @@ -21,7 +21,7 @@ void sample_plane(const np_array<T> &np_voxels, const array<real_t,3> u_axis, const array<real_t,3> v_axis, const array<real_t,4> bbox, // [umin,umax,vmin,vmax] in micrometers - np_array<real_t> np_plane_samples) { + np_array<real_t> &np_plane_samples) { auto voxels_info = np_voxels.request(); auto plane_samples_info = np_plane_samples.request(); @@ -35,9 +35,9 @@ void integrate_axes(const np_maskarray &np_voxels, const array<real_t,3> &v_axis, const array<real_t,3> &w_axis, const real_t v_min, const real_t w_min, - np_realarray &output) { + np_array<uint64_t> &output) { auto voxels_info = np_voxels.request(); - auto output_info = output.request(); + auto output_info = output.request(); NS::integrate_axes({voxels_info.ptr, voxels_info.shape}, x0,v_axis,w_axis, diff --git a/src/test/test_geometry.py b/src/test/test_geometry.py index 6525151..ae43815 100644 --- a/src/test/test_geometry.py +++ b/src/test/test_geometry.py @@ -125,6 +125,9 @@ def test_integrate_axes(): v_axis, w_axis = E[:,1], E[:,2] + # TODO de her kan også bruges til test: + v_axis, w_axis = np.array([1,0,0], np.float32), np.array([0,1,0], np.float32) + (vmin,vmax), _ = axis_parameter_bounds(voxels.shape, cm, v_axis) (wmin,wmax), _ = axis_parameter_bounds(voxels.shape, cm, w_axis) @@ -132,8 +135,15 @@ def test_integrate_axes(): partial(impl.integrate_axes, voxels, cm, v_axis, w_axis, vmin, wmin) for impl in [m_cpu_seq, m_cpu, m_gpu] ] - - compare_fs('integrate_axes', cpu_seq, cpu, gpu, True, 1e-7, ((int(vmax-vmin+2),int(wmax-wmin+2)), float)) + #$void integrate_axes(const np_maskarray &np_voxels, + #$ const array<real_t,3> &x0, + #$ const array<real_t,3> &v_axis, + #$ const array<real_t,3> &w_axis, + #$ const real_t v_min, + # const real_t w_min, + #$ np_realarray output) { + + compare_fs('integrate_axes', cpu_seq, cpu, gpu, True, 1e-7, ((int(vmax-vmin+2),int(wmax-wmin+2)), np.uint64)) def axis_parameter_bounds(shape, center, axis): signs = np.sign(axis) From 7103f1d9b807f4d47d0f577a0c39236737011a01 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Tue, 7 Mar 2023 13:34:08 +0100 Subject: [PATCH 111/136] #25 Fixed incorrect GPU results for integrate axes --- src/lib/cpp/cpu_seq/geometry.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index 6a79125..ff735fe 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -285,7 +285,7 @@ void integrate_axes(const input_ndarray<mask_type> &mask, uint64_t *output_data = output.data; // TODO: Check v_axis & w_axis projections to certify bounds and get rid of runtime check - #pragma acc data create(output_data[:Nv*Nw]) copyout(output_data[:Nv*Nw]) + #pragma acc data copy(output_data[:Nv*Nw]) copyin(x0, v_axis, w_axis, v_min, w_min) { BLOCK_BEGIN(mask, ) { From d48d75d55cda58598fffbb2493cea16aef8545e3 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Tue, 7 Mar 2023 14:09:04 +0100 Subject: [PATCH 112/136] #25 Fixed zero_outside_bbox --- src/lib/cpp/cpu_seq/geometry.cc | 4 +++- src/test/test_geometry.py | 7 +++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index ff735fe..23cfc54 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -322,6 +322,8 @@ void zero_outside_bbox(const array<real_t,9> &principal_axes, UNPACK_NUMPY(voxels) + #pragma acc data copyin(principal_axes, parameter_ranges, cm) + { BLOCK_BEGIN(voxels, ) { real_t xs[3] = { @@ -347,7 +349,7 @@ void zero_outside_bbox(const array<real_t,9> &principal_axes, voxels_buffer[flat_index] = 0; BLOCK_END() } - + } } } diff --git a/src/test/test_geometry.py b/src/test/test_geometry.py index ae43815..a67e6af 100644 --- a/src/test/test_geometry.py +++ b/src/test/test_geometry.py @@ -215,10 +215,9 @@ def test_zero_outside_bbox(): fine_scale = 2 mmtofi = 1 / (voxelsize * fine_scale) # Conversion factor from micrometers to index - implant_bound = bounding_volume(voxels, voxelsize*coarse_scale) - uvw_axes = implant_bound["principal_axes"] - uvw_ranges = implant_bound["principal_axes_ranges"] * mmtofi - cm = implant_bound["centre_of_mass"] * mmtofi + uvw_axes = np.array([[1,0,0],[0,1,0],[0,0,1]], np.float32) + uvw_ranges = np.array([-16,16]*3, np.float32) + cm = np.array(m_cpu.center_of_mass(voxels)) cpu_seq, cpu, gpu = [ partial(impl.zero_outside_bbox, uvw_axes.flatten(), uvw_ranges.flatten(), cm) From 066778c4630ed381e45dc4cd9f4d6ae922e63707 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Tue, 7 Mar 2023 14:14:46 +0100 Subject: [PATCH 113/136] #25 Added additional checking of the results in test_geometry --- src/test/test_geometry.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/test/test_geometry.py b/src/test/test_geometry.py index a67e6af..0caba8b 100644 --- a/src/test/test_geometry.py +++ b/src/test/test_geometry.py @@ -16,8 +16,12 @@ #n = 2344 # ~12 GB, used for testing whether blocked works. n = 128 +def assert_interesting_result(result): + checksum = result.sum() if type(result) is np.ndarray else sum(result) + assert (checksum < 0 or checksum > 0) # Sanity check that there's an actual result to compare to. + def assert_with_print(a, b, tolerance=1e-7, names=None): - na, nb = np.array(a), np.array(b) + na, nb = np.array(a, dtype=np.float64), np.array(b, dtype=np.float64) nabs = np.abs(na - nb) all_close = np.alltrue(nabs < tolerance) if not all_close: @@ -25,6 +29,8 @@ def assert_with_print(a, b, tolerance=1e-7, names=None): print ('b', nb) print ('absolute error (AE) (abs(a-b))', nabs) print ('AE sum', np.sum(nabs)) + suma, sumb = na.sum(), nb.sum() + print ('checksums', suma, sumb, np.abs(suma - sumb), suma / sumb) diffs = np.argwhere(nabs > tolerance) print (f'differing on {diffs.shape} elements') for i in diffs[:5]: # Only print 5 first @@ -59,6 +65,7 @@ def compare_fs(func, baseline_f, cpu_f, gpu_f, should_assert=True, tolerance=1e- allocate_result: tuple[tuple[int],np.dtype] | np.ndarray=None): baseline, baseline_t = run_with_warmup(baseline_f, allocate_result) print (f'({func}) Sequential ran in {baseline_t}') + if should_assert: assert_interesting_result(baseline) cpu, cpu_t = run_with_warmup(cpu_f, allocate_result) print (f'({func}) Parallel CPU ran in {cpu_t}, which is {baseline_t / cpu_t:.02f} times faster than sequential') @@ -90,6 +97,8 @@ def test_inertia_matrix(): # TODO assert disabled due to floating point associativity error accumulation compare_fs('inertia_matrix', baseline, cpu, gpu, should_assert=False) + assert_interesting_result(baseline()) + @pytest.mark.parametrize("dtype", [np.uint8, np.uint16]) def test_sample_plane(dtype): # TODO something that isn't just random data? From ce816048f5b5defeee52a7b16eae844611bca676 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Wed, 8 Mar 2023 13:39:01 +0100 Subject: [PATCH 114/136] #25 Fixed fill_implant_mask not giving any results --- src/lib/cpp/cpu_seq/geometry.cc | 51 ++++++++++++++++----------------- src/lib/cpp/gpu/geometry.cc | 32 +++++++++++++-------- src/lib/cpp/include/geometry.hh | 1 - src/test/test_geometry.py | 35 ++++++++++++++-------- 4 files changed, 67 insertions(+), 52 deletions(-) diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index 23cfc54..826be41 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -56,11 +56,10 @@ void fill_implant_mask(const input_ndarray<mask_type> mask, float *profile_d = profile.data; - //BLOCK_BEGIN(mask, ) { - #pragma omp parallel for collapse(3) - for (int64_t z = 0; z < mask_Nz; z++) { for (int64_t y = 0; y < mask_Ny; y++) { for (int64_t x = 0; x < mask_Nx; x++) { - //mask_type *solid_mask_buffer = solid_implant_mask.data + mask_buffer_start; - + #pragma omp parallel for collapse(3) reduction(max:rsqr_maxs_d[:n_segments], theta_max) reduction(min:theta_min) + for (int64_t z = 0; z < mask_Nz; z++) { + for (int64_t y = 0; y < mask_Ny; y++) { + for (int64_t x = 0; x < mask_Nx; x++) { mask_type mask_value = mask.data[z*mask_Ny*mask_Nx + y*mask_Nx + x]; std::array<real_t, 4> Xs = { real_t(x) * voxel_size, @@ -71,13 +70,13 @@ void fill_implant_mask(const input_ndarray<mask_type> mask, if (mask_value) { auto [U,V,W,c] = hom_transform(Xs, Muvw); - real_t r_sqr = V*V+W*W; - real_t theta = atan2(V,W); + real_t r_sqr = V*V + W*W; + real_t theta = atan2(V, W); - int U_i = int(floor((U-U_min)*real_t(n_segments-1)/(U_max-U_min))); + int U_i = int(floor((U - U_min) * real_t(n_segments-1) / (U_max - U_min))); // if (U_i >= 0 && U_i < n_segments) { - if ( in_bbox(U,V,W,bbox) ) { + if ( in_bbox(U, V, W, bbox) ) { rsqr_maxs_d[U_i] = max(rsqr_maxs_d[U_i], float(r_sqr)); theta_min = min(theta_min, theta); theta_max = max(theta_max, theta); @@ -87,15 +86,16 @@ void fill_implant_mask(const input_ndarray<mask_type> mask, // fprintf(stderr,"U-coordinate out of bounds: U_i = %ld, U = %g, U_min = %g, U_max = %g\n",U_i,U,U_min,U_max); } } + } + } + } - //FOR_3D_END() } - }}} - - double theta_center = (theta_max+theta_min)/2; + real_t theta_center = (theta_max + theta_min) / 2; - //FOR_3D_BEGIN(mask, ) { - #pragma omp parallel for collapse(3) - for (int64_t z = 0; z < mask_Nz; z++) { for (int64_t y = 0; y < mask_Ny; y++) { for (int64_t x = 0; x < mask_Nx; x++) { + #pragma omp parallel for collapse(3) reduction(+:profile_d[:n_segments]) + for (int64_t z = 0; z < mask_Nz; z++) { + for (int64_t y = 0; y < mask_Ny; y++) { + for (int64_t x = 0; x < mask_Nx; x++) { std::array<real_t, 4> Xs = { real_t(x) * voxel_size, real_t(y) * voxel_size, @@ -105,27 +105,24 @@ void fill_implant_mask(const input_ndarray<mask_type> mask, mask_type mask_value = mask.data[flat_index]; // Second pass does the actual work - auto [U,V,W,c] = hom_transform(Xs,Muvw); - float r_sqr = V*V+W*W; - float theta = atan2(V,W); - int U_i = int(floor((U-U_min)*real_t(n_segments-1)/(U_max-U_min))); + auto [U,V,W,c] = hom_transform(Xs, Muvw); + float r_sqr = V*V + W*W; + float theta = atan2(V, W); + int U_i = int(floor((U - U_min) * real_t(n_segments-1) / (U_max - U_min))); bool solid_mask_value = false; if (U_i >= 0 && U_i < n_segments && W >= W_min) { // TODO: Full bounding box check? - solid_mask_value = mask_value | (r_sqr <= r_fraction*rsqr_maxs_d[U_i]); + solid_mask_value = mask_value | (r_sqr <= r_fraction * rsqr_maxs_d[U_i]); if (theta >= theta_min && theta <= theta_center && r_sqr <= rsqr_maxs_d[U_i]) { - ATOMIC() profile_d[U_i] += solid_mask_value; } } solid_implant_mask.data[flat_index] = solid_mask_value; - - //BLOCK_END() } - //FOR_3D_END() } - }}} - + } + } + } } array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &mask, const array<real_t,3> &cm) { diff --git a/src/lib/cpp/gpu/geometry.cc b/src/lib/cpp/gpu/geometry.cc index a22cb41..a78c3eb 100644 --- a/src/lib/cpp/gpu/geometry.cc +++ b/src/lib/cpp/gpu/geometry.cc @@ -29,14 +29,17 @@ void fill_implant_mask(const input_ndarray<mask_type> mask, float *rsqr_maxs_d = rsqr_maxs.data; float *profile_d = profile.data; - #pragma acc data copyin(U_min) create(rsqr_maxs_d[:n_segments], profile_d[:n_segments]) copyout(rsqr_maxs_d[:n_segments], profile_d[:n_segments]) + #pragma acc data copyin(U_min, U_max, W_min, Muvw, mask_Nz, mask_Ny, mask_Nx, voxel_size, n_segments, bbox) copy(rsqr_maxs_d[:n_segments], profile_d[:n_segments]) + { + #pragma acc data copy(theta_min, theta_max) { for (int64_t mask_buffer_start = 0; mask_buffer_start < mask_length; mask_buffer_start += acc_block_size<mask_type>) { ssize_t mask_buffer_length = min(acc_block_size<mask_type>, mask_length-mask_buffer_start); mask_type *mask_buffer = (mask_type *) mask.data + mask_buffer_start; - #pragma acc data copy(mask_buffer[:mask_buffer_length]) + #pragma acc data copyin(mask_buffer_start, mask_buffer[:mask_buffer_length]) { - #pragma acc parallel loop + // TODO the reduction on rsqr_maxs_d kills performance, and allocates more memory than what's available on the GPU! The real solution would be using atomic, but OpenACC doesn't like it on that particular statement. + #pragma acc parallel loop reduction(max:theta_max) reduction(min:theta_min) reduction(max:rsqr_maxs_d[:n_segments]) for (int64_t flat_index = 0; flat_index < mask_buffer_length; flat_index++) { int64_t global_index = mask_buffer_start + flat_index, @@ -53,32 +56,36 @@ void fill_implant_mask(const input_ndarray<mask_type> mask, if (mask_value) { auto [U,V,W,c] = hom_transform(Xs, Muvw); - real_t r_sqr = V*V+W*W; + real_t r_sqr = V*V + W*W; real_t theta = atan2(V,W); - int U_i = int(floor((U-U_min)*real_t(n_segments-1)/(U_max-U_min))); + int U_i = int(floor((U - U_min) * real_t(n_segments-1) / (U_max - U_min))); if ( in_bbox(U,V,W,bbox) ) { + //#pragma acc atomic update rsqr_maxs_d[U_i] = max(rsqr_maxs_d[U_i], float(r_sqr)); theta_min = min(theta_min, theta); theta_max = max(theta_max, theta); } else { // Otherwise we've calculated it wrong! + } } } } } } - double theta_center = (theta_max+theta_min)/2; + real_t theta_center = (theta_max + theta_min) / 2; + #pragma acc data copyin(theta_center) + { for (int64_t mask_buffer_start = 0; mask_buffer_start < mask_length; mask_buffer_start += acc_block_size<mask_type>) { mask_type *mask_buffer = (mask_type *) mask.data + mask_buffer_start; ssize_t mask_buffer_length = min(acc_block_size<mask_type>, mask_length-mask_buffer_start); mask_type *solid_mask_buffer = solid_implant_mask.data + mask_buffer_start; #pragma acc data copy(mask_buffer[:mask_buffer_length]) create(solid_mask_buffer[:mask_buffer_length]) copyout(solid_mask_buffer[:mask_buffer_length]) { - #pragma acc parallel loop + #pragma acc parallel loop // reduction(+:profile_d[:n_segments]) for (int64_t flat_index = 0; flat_index < mask_buffer_length; flat_index++) { int64_t global_index = mask_buffer_start + flat_index, @@ -93,14 +100,14 @@ void fill_implant_mask(const input_ndarray<mask_type> mask, 1 }; // Second pass does the actual work - auto [U,V,W,c] = hom_transform(Xs,Muvw); - float r_sqr = V*V+W*W; - float theta = atan2(V,W); - int U_i = int(floor((U-U_min)*real_t(n_segments-1)/(U_max-U_min))); + auto [U,V,W,c] = hom_transform(Xs, Muvw); + float r_sqr = V*V + W*W; + float theta = atan2(V, W); + int U_i = int(floor((U - U_min) * real_t(n_segments-1) / (U_max - U_min))); bool solid_mask_value = false; if (U_i >= 0 && U_i < n_segments && W >= W_min) { // TODO: Full bounding box check? - solid_mask_value = mask_value | (r_sqr <= r_fraction*rsqr_maxs_d[U_i]); + solid_mask_value = mask_value | (r_sqr <= r_fraction * rsqr_maxs_d[U_i]); if (theta >= theta_min && theta <= theta_center && r_sqr <= rsqr_maxs_d[U_i]) { ATOMIC() @@ -109,6 +116,7 @@ void fill_implant_mask(const input_ndarray<mask_type> mask, } solid_mask_buffer[flat_index] = solid_mask_value; + } } } } diff --git a/src/lib/cpp/include/geometry.hh b/src/lib/cpp/include/geometry.hh index 8bd5cd1..c7ec8dc 100644 --- a/src/lib/cpp/include/geometry.hh +++ b/src/lib/cpp/include/geometry.hh @@ -20,7 +20,6 @@ inline vector4 hom_transform(const vector4 &x, const matrix4x4 &M) { for (int i = 0; i < 4; i++) { real_t sum = 0; - #pragma simd parallel for reduction(+:sum) for (int j = 0; j < 4; j++) sum += M[i*4 + j] * x[j]; c[i] = sum; diff --git a/src/test/test_geometry.py b/src/test/test_geometry.py index 0caba8b..ce7446b 100644 --- a/src/test/test_geometry.py +++ b/src/test/test_geometry.py @@ -6,9 +6,12 @@ import cpu_seq.geometry as m_cpu_seq import cpu.geometry as m_cpu import gpu.geometry as m_gpu +sys.path.append(sys.path[0]+'/../') +from config.paths import hdf5_root import datetime from functools import partial +import h5py import numpy as np import pytest @@ -25,9 +28,9 @@ def assert_with_print(a, b, tolerance=1e-7, names=None): nabs = np.abs(na - nb) all_close = np.alltrue(nabs < tolerance) if not all_close: - print ('a', na) - print ('b', nb) - print ('absolute error (AE) (abs(a-b))', nabs) + #print ('a', na) + #print ('b', nb) + #print ('absolute error (AE) (abs(a-b))', nabs) print ('AE sum', np.sum(nabs)) suma, sumb = na.sum(), nb.sum() print ('checksums', suma, sumb, np.abs(suma - sumb), suma / sumb) @@ -238,29 +241,36 @@ def test_zero_outside_bbox(): def test_fill_implant_mask(): n = 128 dtype = np.uint8 - implant = np.random.randint(0, np.iinfo(dtype).max, (n,n,n), dtype) - # Values hardcoded from running 770c_pag on processing_steps/0800_implant_data.py - voxel_size = 3.75 - bbox_flat = (-3041.39336716053, 2955.146870664342, -1743.0321403974565, 1744.4435665884819, 367.6267143127782, 1764.022543822563) - rsqr_fraction = 0.7 - Muvwp_flat = (-0.9969205263686536, -0.07827989472162836, 0.004660706729396567, 3351.6367031993477, -0.004165804965960026, -0.006484313676985426, -0.9999702066630287, 3287.1018168847136, -0.07830654571765466, 0.996894476384658, -0.006138149566672908, -1739.8123507003322, 0.0, 0.0, 0.0, 1.0) + implant = np.random.randint(0, 2, (n,n,n), dtype) + voxel_size = 1 + bbox_flat = np.array([-16,16] * 3, np.float32) + rsqr_fraction = 1#0.7 + Muvwp_flat = np.array([ + 1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, 0, + 0, 0, 0, 1 + ], np.float32) n_bins = 1024 solid_implant_mask = np.zeros(implant.shape, np.uint8) - rsqr_maxs = np.zeros((n_bins, ), np.uint8) - profile = np.zeros((n_bins, ), np.uint8) + rsqr_maxs = np.zeros((n_bins, ), np.float32) + profile = np.zeros((n_bins, ), np.float32) impls = [m_cpu_seq, m_cpu, m_gpu] result_solid_implant_mask = [solid_implant_mask.copy() for _ in impls] result_rsqr_maxs = [rsqr_maxs.copy() for _ in impls] result_profile = [profile.copy() for _ in impls] cpu_seq, cpu, gpu = [ - partial(impl.fill_implant_mask, implant, voxel_size, bbox_flat, rsqr_fraction, Muvwp_flat, solid_implant_mask, rsqr_maxs, profile) + partial(impl.fill_implant_mask, implant, voxel_size, bbox_flat, rsqr_fraction, Muvwp_flat, result_solid_implant_mask[i], result_rsqr_maxs[i], result_profile[i]) for i, impl in enumerate(impls) ] compare_fs('test_fill_implant_mask', cpu_seq, cpu, gpu, False) + assert_interesting_result(result_solid_implant_mask[0]) + assert_interesting_result(result_rsqr_maxs[0]) + assert_interesting_result(result_profile[0]) assert_with_print(result_solid_implant_mask[0], result_solid_implant_mask[1], 1e-7, "cpu_seq vs cpu") assert_with_print(result_solid_implant_mask[0], result_solid_implant_mask[2], 1e-7, "cpu_seq vs gpu") assert_with_print(result_rsqr_maxs[0], result_rsqr_maxs[1], 1e-7, "cpu_seq vs cpu") @@ -270,6 +280,7 @@ def test_fill_implant_mask(): if __name__ == '__main__': + np.random.seed(42) test_center_of_mass() test_inertia_matrix() test_sample_plane(np.uint8) From 56ad65f0256fbc9e7110f8f766db0959d6af2a0e Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Wed, 8 Mar 2023 13:51:44 +0100 Subject: [PATCH 115/136] #25 Added implementation and test for geometry::compute_front_mask --- src/lib/cpp/cpu/geometry.cc | 8 ++ src/lib/cpp/cpu_seq/geometry.cc | 157 +++++++++++++++-------------- src/lib/cpp/gpu/geometry.cc | 116 +++++++++++---------- src/lib/cpp/include/boilerplate.hh | 40 ++++++++ src/lib/cpp/include/datatypes.hh | 2 + src/lib/cpp/include/geometry.hh | 6 ++ src/pybind/geometry-pybind.cc | 6 +- src/test/test_geometry.py | 32 +++++- 8 files changed, 235 insertions(+), 132 deletions(-) diff --git a/src/lib/cpp/cpu/geometry.cc b/src/lib/cpp/cpu/geometry.cc index 68af2ce..b173519 100644 --- a/src/lib/cpp/cpu/geometry.cc +++ b/src/lib/cpp/cpu/geometry.cc @@ -14,6 +14,14 @@ array<real_t,3> center_of_mass(const input_ndarray<mask_type> &mask) { return cpu_seq::center_of_mass(mask); } +void compute_front_mask(const input_ndarray<mask_type> solid_implant, + const float voxel_size, + const matrix4x4 &Muvw, + std::array<float,6> bbox, + output_ndarray<mask_type> front_mask) { + return cpu_seq::compute_front_mask(solid_implant, voxel_size, Muvw, bbox, front_mask); +} + void fill_implant_mask(const input_ndarray<mask_type> mask, float voxel_size, const array<float,6> &bbox, diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index 826be41..8ad180c 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -39,6 +39,33 @@ array<real_t, 3> center_of_mass(const input_ndarray<mask_type> &mask) { return array<real_t, 3>{ rcmz, rcmy, rcmx }; } +void compute_front_mask(const input_ndarray<mask_type> solid_implant, + const float voxel_size, + const matrix4x4 &Muvw, + std::array<float,6> bbox, + output_ndarray<mask_type> front_mask) { + const auto [U_min, U_max, V_min, V_max, W_min, W_max] = bbox; + UNPACK_NUMPY(solid_implant) + + BLOCK_BEGIN_WITH_OUTPUT(solid_implant, front_mask, ) { + + std::array<real_t, 4> Xs = { + real_t(x) * voxel_size, + real_t(y) * voxel_size, + real_t(z) * voxel_size, + 1 }; + mask_type mask_value = solid_implant_buffer[flat_index]; + + if (mask_value) { + front_mask_buffer[flat_index] = 0; + } else { + auto [U,V,W,c] = hom_transform(Xs, Muvw); + front_mask_buffer[flat_index] = W > W_min; + } + + BLOCK_END_WITH_OUTPUT() } +} + void fill_implant_mask(const input_ndarray<mask_type> mask, float voxel_size, const array<float,6> &bbox, @@ -60,32 +87,32 @@ void fill_implant_mask(const input_ndarray<mask_type> mask, for (int64_t z = 0; z < mask_Nz; z++) { for (int64_t y = 0; y < mask_Ny; y++) { for (int64_t x = 0; x < mask_Nx; x++) { - mask_type mask_value = mask.data[z*mask_Ny*mask_Nx + y*mask_Nx + x]; - std::array<real_t, 4> Xs = { - real_t(x) * voxel_size, - real_t(y) * voxel_size, - real_t(z) * voxel_size, - 1 }; + mask_type mask_value = mask.data[z*mask_Ny*mask_Nx + y*mask_Nx + x]; + std::array<real_t, 4> Xs = { + real_t(x) * voxel_size, + real_t(y) * voxel_size, + real_t(z) * voxel_size, + 1 }; - if (mask_value) { - auto [U,V,W,c] = hom_transform(Xs, Muvw); + if (mask_value) { + auto [U,V,W,c] = hom_transform(Xs, Muvw); real_t r_sqr = V*V + W*W; real_t theta = atan2(V, W); int U_i = int(floor((U - U_min) * real_t(n_segments-1) / (U_max - U_min))); - // if (U_i >= 0 && U_i < n_segments) { + // if (U_i >= 0 && U_i < n_segments) { if ( in_bbox(U, V, W, bbox) ) { - rsqr_maxs_d[U_i] = max(rsqr_maxs_d[U_i], float(r_sqr)); - theta_min = min(theta_min, theta); - theta_max = max(theta_max, theta); - // W_min = min(W_min, W); - } else { - // Otherwise we've calculated it wrong! - // fprintf(stderr,"U-coordinate out of bounds: U_i = %ld, U = %g, U_min = %g, U_max = %g\n",U_i,U,U_min,U_max); - } - } + rsqr_maxs_d[U_i] = max(rsqr_maxs_d[U_i], float(r_sqr)); + theta_min = min(theta_min, theta); + theta_max = max(theta_max, theta); + // W_min = min(W_min, W); + } else { + // Otherwise we've calculated it wrong! + // fprintf(stderr,"U-coordinate out of bounds: U_i = %ld, U = %g, U_min = %g, U_max = %g\n",U_i,U,U_min,U_max); + } + } } } } @@ -96,30 +123,30 @@ void fill_implant_mask(const input_ndarray<mask_type> mask, for (int64_t z = 0; z < mask_Nz; z++) { for (int64_t y = 0; y < mask_Ny; y++) { for (int64_t x = 0; x < mask_Nx; x++) { - std::array<real_t, 4> Xs = { - real_t(x) * voxel_size, - real_t(y) * voxel_size, - real_t(z) * voxel_size, - 1 }; - int64_t flat_index = z*mask_Ny*mask_Nx + y*mask_Nx + x; - mask_type mask_value = mask.data[flat_index]; - - // Second pass does the actual work + std::array<real_t, 4> Xs = { + real_t(x) * voxel_size, + real_t(y) * voxel_size, + real_t(z) * voxel_size, + 1 }; + int64_t flat_index = z*mask_Ny*mask_Nx + y*mask_Nx + x; + mask_type mask_value = mask.data[flat_index]; + + // Second pass does the actual work auto [U,V,W,c] = hom_transform(Xs, Muvw); float r_sqr = V*V + W*W; float theta = atan2(V, W); int U_i = int(floor((U - U_min) * real_t(n_segments-1) / (U_max - U_min))); - bool solid_mask_value = false; - if (U_i >= 0 && U_i < n_segments && W >= W_min) { // TODO: Full bounding box check? + bool solid_mask_value = false; + if (U_i >= 0 && U_i < n_segments && W >= W_min) { // TODO: Full bounding box check? solid_mask_value = mask_value | (r_sqr <= r_fraction * rsqr_maxs_d[U_i]); - if (theta >= theta_min && theta <= theta_center && r_sqr <= rsqr_maxs_d[U_i]) { - profile_d[U_i] += solid_mask_value; - } - } + if (theta >= theta_min && theta <= theta_center && r_sqr <= rsqr_maxs_d[U_i]) { + profile_d[U_i] += solid_mask_value; + } + } - solid_implant_mask.data[flat_index] = solid_mask_value; + solid_implant_mask.data[flat_index] = solid_mask_value; } } } @@ -321,55 +348,37 @@ void zero_outside_bbox(const array<real_t,9> &principal_axes, #pragma acc data copyin(principal_axes, parameter_ranges, cm) { - BLOCK_BEGIN(voxels, ) { - - real_t xs[3] = { - real_t(x) - cm[0], - real_t(y) - cm[1], - real_t(z) - cm[2]}; - real_t params[3] = { 0, 0, 0 }; - - for (int uvw = 0; uvw < 3; uvw++) - for (int xyz = 0; xyz < 3; xyz++) - params[uvw] += xs[xyz] * principal_axes[uvw*3 + xyz]; // u = dot(xs,u_axis), v = dot(xs,v_axis), w = dot(xs,w_axis) + BLOCK_BEGIN(voxels, ) { - bool p = false; - - for (int uvw = 0; uvw < 3; uvw++) { - real_t - param_min = parameter_ranges[uvw*2], - param_max = parameter_ranges[uvw*2 + 1]; - p |= (params[uvw] < param_min) | (params[uvw] > param_max); - } + real_t xs[3] = { + real_t(x) - cm[0], + real_t(y) - cm[1], + real_t(z) - cm[2]}; + real_t params[3] = { 0, 0, 0 }; + + for (int uvw = 0; uvw < 3; uvw++) + for (int xyz = 0; xyz < 3; xyz++) + params[uvw] += xs[xyz] * principal_axes[uvw*3 + xyz]; // u = dot(xs,u_axis), v = dot(xs,v_axis), w = dot(xs,w_axis) + + bool p = false; + + for (int uvw = 0; uvw < 3; uvw++) { + real_t + param_min = parameter_ranges[uvw*2], + param_max = parameter_ranges[uvw*2 + 1]; + p |= (params[uvw] < param_min) | (params[uvw] > param_max); + } - if (p) - voxels_buffer[flat_index] = 0; + if (p) + voxels_buffer[flat_index] = 0; - BLOCK_END() } + BLOCK_END() } } } } /* -void compute_front_mask(const input_ndarray<mask_type> solid_implant, - const float voxel_size, - const matrix4x4 &Muvw, - std::array<float,6> bbox, - output_ndarray<mask_type> front_mask) { - const auto [U_min,U_max,V_min,V_max,W_min,W_max] = bbox; - - loop_mask_start(solid_implant, front_mask, () ); - - if (!mask_value) { - auto [U,V,W,c] = hom_transform(Xs,Muvw); - maskout_buffer[k] = W>W_min; - } else - maskout_buffer[k] = 0; - - loop_mask_end(solid_implant) -} - void cylinder_projection(const input_ndarray<float> edt, // Euclidean Distance Transform in um, should be low-resolution (will be interpolated) const input_ndarray<uint8_t> C, // Material classification images (probability per voxel, 0..1 -> 0..255) float voxel_size, // Voxel size for Cs diff --git a/src/lib/cpp/gpu/geometry.cc b/src/lib/cpp/gpu/geometry.cc index a78c3eb..57126dc 100644 --- a/src/lib/cpp/gpu/geometry.cc +++ b/src/lib/cpp/gpu/geometry.cc @@ -13,6 +13,14 @@ array<real_t,3> center_of_mass(const input_ndarray<mask_type> &mask) { return cpu_seq::center_of_mass(mask); } +void compute_front_mask(const input_ndarray<mask_type> solid_implant, + const float voxel_size, + const matrix4x4 &Muvw, + std::array<float,6> bbox, + output_ndarray<mask_type> front_mask) { + return cpu_seq::compute_front_mask(solid_implant, voxel_size, Muvw, bbox, front_mask); +} + void fill_implant_mask(const input_ndarray<mask_type> mask, float voxel_size, const array<float,6> &bbox, @@ -32,42 +40,42 @@ void fill_implant_mask(const input_ndarray<mask_type> mask, #pragma acc data copyin(U_min, U_max, W_min, Muvw, mask_Nz, mask_Ny, mask_Nx, voxel_size, n_segments, bbox) copy(rsqr_maxs_d[:n_segments], profile_d[:n_segments]) { #pragma acc data copy(theta_min, theta_max) - { - for (int64_t mask_buffer_start = 0; mask_buffer_start < mask_length; mask_buffer_start += acc_block_size<mask_type>) { - ssize_t mask_buffer_length = min(acc_block_size<mask_type>, mask_length-mask_buffer_start); - mask_type *mask_buffer = (mask_type *) mask.data + mask_buffer_start; + { + for (int64_t mask_buffer_start = 0; mask_buffer_start < mask_length; mask_buffer_start += acc_block_size<mask_type>) { + ssize_t mask_buffer_length = min(acc_block_size<mask_type>, mask_length-mask_buffer_start); + mask_type *mask_buffer = (mask_type *) mask.data + mask_buffer_start; #pragma acc data copyin(mask_buffer_start, mask_buffer[:mask_buffer_length]) - { + { // TODO the reduction on rsqr_maxs_d kills performance, and allocates more memory than what's available on the GPU! The real solution would be using atomic, but OpenACC doesn't like it on that particular statement. #pragma acc parallel loop reduction(max:theta_max) reduction(min:theta_min) reduction(max:rsqr_maxs_d[:n_segments]) - for (int64_t flat_index = 0; flat_index < mask_buffer_length; flat_index++) { - int64_t - global_index = mask_buffer_start + flat_index, - z = global_index / (mask_Ny * mask_Nx), - y = (global_index / mask_Nx) % mask_Ny, - x = global_index % mask_Nx; - mask_type mask_value = mask_buffer[flat_index]; - std::array<real_t, 4> Xs = { - real_t(x) * voxel_size, - real_t(y) * voxel_size, - real_t(z) * voxel_size, - 1 }; - - if (mask_value) { - auto [U,V,W,c] = hom_transform(Xs, Muvw); + for (int64_t flat_index = 0; flat_index < mask_buffer_length; flat_index++) { + int64_t + global_index = mask_buffer_start + flat_index, + z = global_index / (mask_Ny * mask_Nx), + y = (global_index / mask_Nx) % mask_Ny, + x = global_index % mask_Nx; + mask_type mask_value = mask_buffer[flat_index]; + std::array<real_t, 4> Xs = { + real_t(x) * voxel_size, + real_t(y) * voxel_size, + real_t(z) * voxel_size, + 1 }; + + if (mask_value) { + auto [U,V,W,c] = hom_transform(Xs, Muvw); real_t r_sqr = V*V + W*W; - real_t theta = atan2(V,W); + real_t theta = atan2(V,W); int U_i = int(floor((U - U_min) * real_t(n_segments-1) / (U_max - U_min))); - if ( in_bbox(U,V,W,bbox) ) { + if ( in_bbox(U,V,W,bbox) ) { //#pragma acc atomic update - rsqr_maxs_d[U_i] = max(rsqr_maxs_d[U_i], float(r_sqr)); - theta_min = min(theta_min, theta); - theta_max = max(theta_max, theta); - } else { - // Otherwise we've calculated it wrong! + rsqr_maxs_d[U_i] = max(rsqr_maxs_d[U_i], float(r_sqr)); + theta_min = min(theta_min, theta); + theta_max = max(theta_max, theta); + } else { + // Otherwise we've calculated it wrong! } } } @@ -79,43 +87,43 @@ void fill_implant_mask(const input_ndarray<mask_type> mask, #pragma acc data copyin(theta_center) { - for (int64_t mask_buffer_start = 0; mask_buffer_start < mask_length; mask_buffer_start += acc_block_size<mask_type>) { - mask_type *mask_buffer = (mask_type *) mask.data + mask_buffer_start; - ssize_t mask_buffer_length = min(acc_block_size<mask_type>, mask_length-mask_buffer_start); - mask_type *solid_mask_buffer = solid_implant_mask.data + mask_buffer_start; - #pragma acc data copy(mask_buffer[:mask_buffer_length]) create(solid_mask_buffer[:mask_buffer_length]) copyout(solid_mask_buffer[:mask_buffer_length]) - { + for (int64_t mask_buffer_start = 0; mask_buffer_start < mask_length; mask_buffer_start += acc_block_size<mask_type>) { + mask_type *mask_buffer = (mask_type *) mask.data + mask_buffer_start; + ssize_t mask_buffer_length = min(acc_block_size<mask_type>, mask_length-mask_buffer_start); + mask_type *solid_mask_buffer = solid_implant_mask.data + mask_buffer_start; + #pragma acc data copy(mask_buffer[:mask_buffer_length]) create(solid_mask_buffer[:mask_buffer_length]) copyout(solid_mask_buffer[:mask_buffer_length]) + { #pragma acc parallel loop // reduction(+:profile_d[:n_segments]) - for (int64_t flat_index = 0; flat_index < mask_buffer_length; flat_index++) { - int64_t - global_index = mask_buffer_start + flat_index, - z = global_index / (mask_Ny * mask_Nx), - y = (global_index / mask_Nx) % mask_Ny, - x = global_index % mask_Nx; - mask_type mask_value = mask_buffer[flat_index]; - std::array<real_t, 4> Xs = { - real_t(x) * voxel_size, - real_t(y) * voxel_size, - real_t(z) * voxel_size, - 1 }; - - // Second pass does the actual work + for (int64_t flat_index = 0; flat_index < mask_buffer_length; flat_index++) { + int64_t + global_index = mask_buffer_start + flat_index, + z = global_index / (mask_Ny * mask_Nx), + y = (global_index / mask_Nx) % mask_Ny, + x = global_index % mask_Nx; + mask_type mask_value = mask_buffer[flat_index]; + std::array<real_t, 4> Xs = { + real_t(x) * voxel_size, + real_t(y) * voxel_size, + real_t(z) * voxel_size, + 1 }; + + // Second pass does the actual work auto [U,V,W,c] = hom_transform(Xs, Muvw); float r_sqr = V*V + W*W; float theta = atan2(V, W); int U_i = int(floor((U - U_min) * real_t(n_segments-1) / (U_max - U_min))); - bool solid_mask_value = false; - if (U_i >= 0 && U_i < n_segments && W >= W_min) { // TODO: Full bounding box check? + bool solid_mask_value = false; + if (U_i >= 0 && U_i < n_segments && W >= W_min) { // TODO: Full bounding box check? solid_mask_value = mask_value | (r_sqr <= r_fraction * rsqr_maxs_d[U_i]); - if (theta >= theta_min && theta <= theta_center && r_sqr <= rsqr_maxs_d[U_i]) { - ATOMIC() - profile_d[U_i] += solid_mask_value; + if (theta >= theta_min && theta <= theta_center && r_sqr <= rsqr_maxs_d[U_i]) { + ATOMIC() + profile_d[U_i] += solid_mask_value; + } } - } - solid_mask_buffer[flat_index] = solid_mask_value; + solid_mask_buffer[flat_index] = solid_mask_value; } } } diff --git a/src/lib/cpp/include/boilerplate.hh b/src/lib/cpp/include/boilerplate.hh index c0ba698..c3c1dad 100644 --- a/src/lib/cpp/include/boilerplate.hh +++ b/src/lib/cpp/include/boilerplate.hh @@ -39,6 +39,16 @@ #define FOR_BLOCK_END() } } +#define FOR_BLOCK_BEGIN_WITH_OUTPUT(ARR_IN, ARR_OUT) \ + for (int64_t ARR_IN##_buffer_start = 0; ARR_IN##_buffer_start < ARR_IN##_length; ARR_IN##_buffer_start += acc_block_size<ARR_IN##_type> / 2) { \ + ARR_IN##_type *ARR_IN##_buffer = (ARR_IN##_type *) ARR_IN.data + ARR_IN##_buffer_start; \ + ARR_OUT##_type *ARR_OUT##_buffer = (ARR_OUT##_type *) ARR_OUT.data + ARR_IN##_buffer_start; \ + ssize_t ARR_IN##_buffer_length = min(acc_block_size<ARR_IN##_type>, ARR_IN##_length - ARR_IN##_buffer_start); \ + PRAGMA(acc data copyin(ARR_IN##_buffer[:ARR_IN##_buffer_length]) copy(ARR_OUT##_buffer[:ARR_IN##_buffer_length])) \ + { + +#define FOR_BLOCK_END_WITH_OUTPUT() } } + #define FOR_3D_BEGIN(ARR, EXTRA_PRAGMA_CLAUSE) \ PRAGMA(PARALLEL_TERM collapse(3) EXTRA_PRAGMA_CLAUSE) \ for (int64_t z = 0; z < ARR##_Nz; z++) { \ @@ -73,6 +83,15 @@ #define BLOCK_END() \ FOR_FLAT_END() \ FOR_BLOCK_END() + +#define BLOCK_BEGIN_WITH_OUTPUT(ARR_IN, ARR_OUT, EXTRA_PRAGMA_CLAUSE) \ + FOR_BLOCK_BEGIN_WITH_OUTPUT(ARR_IN, ARR_OUT) \ + PUSH_N_DOWN_TO_BUFFER(ARR_IN) \ + FOR_FLAT_BEGIN(ARR_IN##_buffer, global, EXTRA_PRAGMA_CLAUSE) + +#define BLOCK_END_WITH_OUTPUT() \ + FOR_FLAT_END() \ + FOR_BLOCK_END_WITH_OUTPUT() #else #ifdef _OPENMP // Should also capture OpenACC, which is why it's second. #define BLOCK_BEGIN(ARR, EXTRA_PRAGMA_CLAUSE) \ @@ -82,6 +101,16 @@ int64_t flat_index = z*ARR##_Ny*ARR##_Nx + y*ARR##_Nx + x; #define BLOCK_END() FOR_3D_END() + +#define BLOCK_BEGIN_WITH_OUTPUT(ARR_IN, ARR_OUT, EXTRA_PRAGMA_CLAUSE) \ + ARR_IN##_type *ARR_IN##_buffer = (ARR_IN##_type *) ARR_IN.data; \ + ARR_OUT##_type *ARR_OUT##_buffer = (ARR_OUT##_type *) ARR_OUT.data; \ + __attribute__((unused)) int64_t ARR_IN##_buffer_start = 0; \ + FOR_3D_BEGIN(ARR_IN, EXTRA_PRAGMA_CLAUSE) \ + int64_t flat_index = z*ARR_IN##_Ny*ARR_IN##_Nx + y*ARR_IN##_Nx + x; + +#define BLOCK_END_WITH_OUTPUT() FOR_3D_END() + #else #define BLOCK_BEGIN(ARR, EXTRA_PRAGMA_CLAUSE) \ int64_t flat_index = 0; \ @@ -93,6 +122,17 @@ flat_index++; \ FOR_3D_END() +#define BLOCK_BEGIN_WITH_OUTPUT(ARR_IN, ARR_OUT, EXTRA_PRAGMA_CLAUSE) \ + int64_t flat_index = 0; \ + ARR_IN##_type *ARR_IN##_buffer = (ARR_IN##_type *) ARR_IN.data; \ + ARR_OUT##_type *ARR_OUT##_buffer = (ARR_OUT##_type *) ARR_OUT.data; \ + __attribute__((unused)) int64_t ARR_IN##_buffer_start = 0; \ + FOR_3D_BEGIN(ARR_IN, EXTRA_PRAGMA_CLAUSE) + +#define BLOCK_END_WITH_OUTPUT() \ + flat_index++; \ + FOR_3D_END() + #endif #endif diff --git a/src/lib/cpp/include/datatypes.hh b/src/lib/cpp/include/datatypes.hh index cf37cef..72d898f 100644 --- a/src/lib/cpp/include/datatypes.hh +++ b/src/lib/cpp/include/datatypes.hh @@ -27,6 +27,8 @@ typedef mask_type voxels_type; typedef uint16_t field_type; typedef float gauss_type; typedef float real_t; +typedef mask_type solid_implant_type; +typedef mask_type front_mask_type; namespace py = pybind11; template <typename T> diff --git a/src/lib/cpp/include/geometry.hh b/src/lib/cpp/include/geometry.hh index c7ec8dc..39641ab 100644 --- a/src/lib/cpp/include/geometry.hh +++ b/src/lib/cpp/include/geometry.hh @@ -54,6 +54,12 @@ Computes the center of mass of the given tomography. */ array<real_t,3> center_of_mass(const input_ndarray<mask_type> &voxels); +void compute_front_mask(const input_ndarray<mask_type> solid_implant, + const float voxel_size, + const matrix4x4 &Muvw, + std::array<float,6> bbox, + output_ndarray<mask_type> front_mask); + void fill_implant_mask(const input_ndarray<mask_type> implant_mask, float voxel_size, const array<float,6> &bbox, diff --git a/src/pybind/geometry-pybind.cc b/src/pybind/geometry-pybind.cc index 5c50f69..b947b1b 100644 --- a/src/pybind/geometry-pybind.cc +++ b/src/pybind/geometry-pybind.cc @@ -79,7 +79,6 @@ void fill_implant_mask(const np_maskarray implant_mask, ); } -/* void compute_front_mask(const np_array<uint8_t> &np_solid_implant, const float voxel_size, const matrix4x4 &Muvw, @@ -88,11 +87,12 @@ void compute_front_mask(const np_array<uint8_t> &np_solid_implant, auto solid_implant_info = np_solid_implant.request(); auto front_mask_info = np_front_mask.request(); - ::compute_front_mask({solid_implant_info.ptr, solid_implant_info.shape}, + return NS::compute_front_mask({solid_implant_info.ptr, solid_implant_info.shape}, voxel_size, Muvw, bbox, {front_mask_info.ptr, front_mask_info.shape}); } +/* void cylinder_projection(const np_array<float> &np_edt, // Euclidean Distance Transform in um, should be low-resolution (will be interpolated) const np_bytearray &np_Cs, // Material classification images (probability per voxel, 0..1 -> 0..255) float Cs_voxel_size, // Voxel size for Cs @@ -128,5 +128,5 @@ PYBIND11_MODULE(geometry, m) { //m.def("cylinder_projection", &python_api::cylinder_projection); m.def("sample_plane", &python_api::sample_plane<uint16_t>); m.def("sample_plane", &python_api::sample_plane<uint8_t>); - //m.def("compute_front_mask", &python_api::compute_front_mask); + m.def("compute_front_mask", &python_api::compute_front_mask); } diff --git a/src/test/test_geometry.py b/src/test/test_geometry.py index ce7446b..33f4a75 100644 --- a/src/test/test_geometry.py +++ b/src/test/test_geometry.py @@ -278,6 +278,35 @@ def test_fill_implant_mask(): assert_with_print(result_profile[0], result_profile[1], 1e-7, "cpu_seq vs cpu") assert_with_print(result_profile[0], result_profile[2], 1e-7, "cpu_seq vs gpu") +def test_compute_front_mask(): + n = 128 + dtype = np.uint8 + implant = np.random.randint(0, 2, (n,n,n), dtype) + voxel_size = 1 + bbox_flat = np.array([-16,16] * 3, np.float32) + rsqr_fraction = 1#0.7 + Muvwp_flat = np.array([ + 1, 0, 0, 64, + 0, 1, 0, 64, + 0, 0, 1, 64, + 0, 0, 0, 1 + ], np.float32) + n_bins = 1024 + + solid_implant_mask = np.zeros(implant.shape, np.uint8) + rsqr_maxs = np.zeros((n_bins, ), np.float32) + profile = np.zeros((n_bins, ), np.float32) + + m_cpu.fill_implant_mask(implant, voxel_size, bbox_flat, rsqr_fraction, Muvwp_flat, solid_implant_mask, rsqr_maxs, profile) + + impls = [m_cpu_seq, m_cpu, m_gpu] + + cpu_seq, cpu, gpu = [ + partial(impl.compute_front_mask, solid_implant_mask, voxel_size, Muvwp_flat, bbox_flat) + for i, impl in enumerate(impls) + ] + + compare_fs('test_compute_front_mask', cpu_seq, cpu, gpu, True, 1e-7, (solid_implant_mask.shape, solid_implant_mask.dtype)) if __name__ == '__main__': np.random.seed(42) @@ -286,4 +315,5 @@ def test_fill_implant_mask(): test_sample_plane(np.uint8) test_integrate_axes() test_zero_outside_bbox() - test_fill_implant_mask() \ No newline at end of file + test_fill_implant_mask() + test_compute_front_mask() \ No newline at end of file From 3b6d642377afbbe218691c01c6069cb1696865a0 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Mar 2023 13:11:46 +0100 Subject: [PATCH 116/136] #25 Added additional debug launch configurations --- .vscode/launch.json | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.vscode/launch.json b/.vscode/launch.json index b48c6cc..cb878e1 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -4,6 +4,15 @@ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", "configurations": [ + { + "name": "Python: pre-cleanup/cylinder_surface2", + "type": "python", + "request": "launch", + "program": "${workspaceFolder}/pre-cleanup-src/analysis/cylinder_surface2.py", + "console": "integratedTerminal", + "args": ["770c_pag"], + "justMyCode": false + }, { "name": "Python: Test geometry", "type": "python", @@ -31,5 +40,14 @@ "args": ["770c_pag"], "justMyCode": false }, + { + "name": "Python: 0800_implant_data", + "type": "python", + "request": "launch", + "program": "${workspaceFolder}/src/processing_steps/0800_implant_data.py", + "console": "integratedTerminal", + "args": ["770c_pag"], + "justMyCode": false + }, ] } \ No newline at end of file From 9a17224e96294502aab530a42305ec1d346215ea Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Mar 2023 13:12:22 +0100 Subject: [PATCH 117/136] #25 Fixed imports of the old cylinder_surface script --- pre-cleanup-src/analysis/cylinder_surface2.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pre-cleanup-src/analysis/cylinder_surface2.py b/pre-cleanup-src/analysis/cylinder_surface2.py index f9002fd..c31b484 100644 --- a/pre-cleanup-src/analysis/cylinder_surface2.py +++ b/pre-cleanup-src/analysis/cylinder_surface2.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 import os, sys, h5py, numpy as np, pathlib, tqdm, vedo, matplotlib.pyplot as plt, edt, vedo.pointcloud as pc, scipy.ndimage as ndi -sys.path.append(sys.path[0]+"/../") +sys.path.append(sys.path[0]+"/../../src") from config.paths import * -from helper_functions import * -from pybind_kernels.geometry import cylinder_projection +from lib.py.helpers import commandline_args +from lib.cpp.cpu_seq.geometry import cylinder_projection NA = np.newaxis @@ -25,10 +25,10 @@ def homogeneous_transform(xs, M): def np_save(path,data): output_dir = os.path.dirname(path) - pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) + pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) np.save(path,data) - + # Requires: implant-FoR # soft-tissue/bone segmentation + blood analysis # EDT-field @@ -55,18 +55,18 @@ def np_save(path,data): print(f"Cant't read implant frame-of-reference: {e}") print(f"Make sure you have run segment-implant-cc.py and implant-FoR.py for {sample} at scale {mask_scale}x") sys.exit(-1) - + try: blood_mask = h5mask["blood/mask"][:] solid_implant = h5mask["implant_solid/mask"][:] - h5mask.close() + h5mask.close() except Exception as e: print(f"Cant't read masks: {e}") print("Make sure you have run compute_histograms.py, generate_xx_probabilities.py, segment_from_distributions,\n"+ "and segment-blood-cc.py") sys.exit(-1) - + P0_binfile = f"{binary_root}/segmented/P0/{segment_scale}x/{sample}.uint16" P1_binfile = f"{binary_root}/segmented/P1/{segment_scale}x/{sample}.uint16" edt_binfile = f"{binary_root}/fields/implant-edt/{mask_scale}x/{sample}.uint16" @@ -92,4 +92,4 @@ def np_save(path,data): d_min, d_max, theta_min, theta_max, tuple(bbox.flatten()), tuple(Muvwp.flatten()), images, counts) - + From 3b1d79404083ca421d27c007315d080a90d3b760 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Thu, 9 Mar 2023 13:13:34 +0100 Subject: [PATCH 118/136] #25 Added geometry::cylinder_projection --- src/lib/cpp/cpu/geometry.cc | 20 ++- src/lib/cpp/cpu_seq/geometry.cc | 283 +++++++++++++------------------- src/lib/cpp/gpu/geometry.cc | 20 ++- src/lib/cpp/include/geometry.hh | 69 +++++++- src/pybind/geometry-pybind.cc | 7 +- src/test/test_geometry.py | 13 ++ 6 files changed, 222 insertions(+), 190 deletions(-) diff --git a/src/lib/cpp/cpu/geometry.cc b/src/lib/cpp/cpu/geometry.cc index b173519..491c171 100644 --- a/src/lib/cpp/cpu/geometry.cc +++ b/src/lib/cpp/cpu/geometry.cc @@ -22,6 +22,19 @@ void compute_front_mask(const input_ndarray<mask_type> solid_implant, return cpu_seq::compute_front_mask(solid_implant, voxel_size, Muvw, bbox, front_mask); } +void cylinder_projection(const input_ndarray<float> edt, // Euclidean Distance Transform in um, should be low-resolution (will be interpolated) + const input_ndarray<uint8_t> C, // Material classification images (probability per voxel, 0..1 -> 0..255) + float voxel_size, // Voxel size for Cs + float d_min, float d_max, // Distance shell to map to cylinder + float theta_min, float theta_max, // Angle range (wrt cylinder center) + std::array<float,6> bbox, + const matrix4x4 &Muvw, // Transform from zyx (in um) to U'V'W' cylinder FoR (in um) + output_ndarray<float> image, // Probability-weighted volume of (class,theta,U)-voxels + output_ndarray<int64_t> count // Number of (class,theta,U)-voxels + ){ + return cpu_seq::cylinder_projection(edt, C, voxel_size, d_min, d_max, theta_min, theta_max, bbox, Muvw, image, count); +} + void fill_implant_mask(const input_ndarray<mask_type> mask, float voxel_size, const array<float,6> &bbox, @@ -46,13 +59,6 @@ void integrate_axes(const input_ndarray<mask_type> &mask, return cpu_seq::integrate_axes(mask, x0, v_axis, w_axis, v_min, w_min, output); } -template <typename T> -float resample2x2x2(const T *voxels, - const array<ssize_t, 3> &shape, - const array<float, 3> &X) { - return cpu_seq::resample2x2x2(voxels, shape, X); -} - template <typename T> void sample_plane(const input_ndarray<T> &voxels, const real_t voxel_size, // In micrometers diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index 8ad180c..d7793b6 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -47,6 +47,7 @@ void compute_front_mask(const input_ndarray<mask_type> solid_implant, const auto [U_min, U_max, V_min, V_max, W_min, W_max] = bbox; UNPACK_NUMPY(solid_implant) + // TODO move the typedefs here, rather than having them globally in datatypes.hh BLOCK_BEGIN_WITH_OUTPUT(solid_implant, front_mask, ) { std::array<real_t, 4> Xs = { @@ -66,6 +67,121 @@ void compute_front_mask(const input_ndarray<mask_type> solid_implant, BLOCK_END_WITH_OUTPUT() } } +void cylinder_projection(const input_ndarray<float> edt, // Euclidean Distance Transform in um, should be low-resolution (will be interpolated) + const input_ndarray<uint8_t> C, // Material classification images (probability per voxel, 0..1 -> 0..255) + float voxel_size, // Voxel size for Cs + float d_min, float d_max, // Distance shell to map to cylinder + float theta_min, float theta_max, // Angle range (wrt cylinder center) + std::array<float,6> bbox, + const matrix4x4 &Muvw, // Transform from zyx (in um) to U'V'W' cylinder FoR (in um) + output_ndarray<float> image, // Probability-weighted volume of (class,theta,U)-voxels + output_ndarray<int64_t> count // Number of (class,theta,U)-voxels + ){ + UNPACK_NUMPY(C); + UNPACK_NUMPY(edt); + + ssize_t n_theta = image.shape[0], n_U = image.shape[1]; + + const auto& [U_min, U_max, V_min, V_max, W_min, W_max] = bbox; + + real_t + edz = edt_Nz / real_t(C_Nz), + edy = edt_Ny / real_t(C_Ny), + edx = edt_Nx / real_t(C_Nx); + + //printf("Segmenting from %g to %g micrometers distance of implant.\n",d_min,d_max); + //printf("Bounding box is [U_min,U_max,V_min,V_max,W_min,W_max] = [[%g,%g],[%g,%g],[%g,%g]]\n", + // U_min,U_max,V_min,V_max,W_min,W_max); + //printf("EDT field is (%ld,%ld,%ld)\n",ex,ey,ez); + + real_t th_min = 1234, th_max = -1234; + ssize_t n_shell = 0; + ssize_t n_shell_bbox = 0; + + ssize_t block_height = 64; + + //TODO: new acc/openmp macro in parallel.hh + // TODO postponed, to get a working edition first + //typedef uint8_t C_type; + //BLOCK_BEGIN(C, "reduction(+:n_shell,n_shell_bbox)") { + //BLOCK_END() + + { + float *image_d = image.data; + int64_t *count_d = count.data; + + for (ssize_t block_start = 0, edt_block_start = 0; block_start < C_length; block_start += block_height*C_Ny*C_Nz, edt_block_start += block_height*edt_Ny*edt_Nz) { + const uint8_t *C_buffer = C.data + block_start; + const float *edt_block = edt.data + max(block_start - edt_Ny*edt_Nz, 0L); + + ssize_t this_block_length = min(block_height*C_Ny*C_Nz,C_length-block_start); + ssize_t this_edt_length = min((block_height+2)*edt_Ny*edt_Nz,edt_length-block_start); + + //#pragma acc parallel loop copy(C_buffer[:this_block_length], image_d[:n_theta*n_U], count_d[:n_theta*n_U], bbox[:6], Muvw[:16], edt_block[:this_edt_length]) reduction(+:n_shell,n_shell_bbox) + //#pragma omp parallel for reduction(+:n_shell,n_shell_bbox) + for (int64_t k = 0; k < this_block_length; k++) { + const int64_t flat_idx = block_start + k; + const int64_t X = (flat_idx / (C_Ny*C_Nz)), Y = (flat_idx / C_Nz) % C_Ny, Z = flat_idx % C_Nz; // Integer indices: Cs[c,X,Y,Z] + // Index into local block + const int64_t Xl = (k / (C_Ny*C_Nz)), Yl = (k / C_Nz) % C_Ny, Zl = k % C_Nz; + // Index into local edt block. Note EDT has 1-slice padding top+bottom + const float x = (Xl+1)*edx, y = Yl*edy, z = Zl*edy; + + if (x > block_height) { + printf("Block number k=%ld.\nX,Y,Z=%ld,%ld,%ld\nXl,Yl,Zl=%ld,%ld,%ld\nx,y,z=%.2f, %.2f, %.2f\n",k,X,Y,Z,Xl,Yl,Zl,x,y,z); + abort(); + } + + // ****** MEAT OF THE IMPLEMENTATION IS HERE ****** + real_t distance = resample2x2x2<float>(edt_block, {this_edt_length/(edt_Ny*edt_Nz),edt_Ny,edt_Nz}, {x,y,z}); + + if (distance > d_min && distance <= d_max) { // TODO: and W>w_min + array<real_t,4> Xs = {X*voxel_size, Y*voxel_size, Z*voxel_size, 1}; + auto [U,V,W,c] = hom_transform(Xs,Muvw); + n_shell ++; + + // printf("distance = %.1f, U,V,W = %.2f,%.2f,%.2f\n",distance,U,V,W); + if (in_bbox(U,V,W,bbox)) { + real_t theta = atan2(V,W); + + if (theta >= theta_min && theta <= theta_max) { + n_shell_bbox++; + + ssize_t theta_i = floor( (theta-theta_min) * (n_theta-1)/(theta_max-theta_min) ); + ssize_t U_i = floor( (U - U_min) * (n_U -1)/( U_max- U_min) ); + + real_t p = C_buffer[k]/255.; + + assert(theta >= theta_min); + assert(theta <= theta_max); + assert(U >= U_min); + assert(U <= U_max); + assert(theta_i >= 0); + assert(theta_i < n_theta); + assert(U_i >= 0); + assert(U_i < n_U); + + if (p > 0) { + th_min = min(theta,th_min); + th_max = max(theta,th_max); + + //atomic_statement() + image_d[theta_i*n_U + U_i] += p; + + //atomic_statement() + count_d[theta_i*n_U + U_i] += 1; + } + } + } + } + } + } + } + printf("n_shell = %ld, n_shell_bbox = %ld\n",n_shell,n_shell_bbox); + printf("theta_min, theta_max = %.2f,%.2f\n",theta_min,theta_max); + printf("th_min, th_max = %.2f,%.2f\n",th_min,th_max); +} + void fill_implant_mask(const input_ndarray<mask_type> mask, float voxel_size, const array<float,6> &bbox, @@ -191,59 +307,6 @@ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &mask, const array }; } -template <typename T> -float resample2x2x2(const T *voxels, - const array<ssize_t, 3> &shape, - const array<float, 3> &X) { - auto [Nz,Ny,Nx] = shape; - - if (!in_bbox(X[0], X[1], X[2], {0.5f, float(Nx)-0.5f, 0.5f, float(Ny)-0.5f, 0.5f, float(Nz)-0.5f})) { - uint64_t voxel_index = uint64_t(floor(X[0]))*Ny*Nz + uint64_t(floor(X[1]))*Ny + uint64_t(floor(X[2])); - return voxels[voxel_index]; - } - - float Xfrac[2][3]; // {Xminus[3], Xplus[3]} - int64_t Xint[2][3]; // {Iminus[3], Iplus[3]} - float value = 0; - - for (int i = 0; i < 3; i++) { - float Iminus, Iplus; - Xfrac[0][i] = 1-modf(X[i]-0.5f, &Iminus); // 1-{X[i]-1/2}, floor(X[i]-1/2) - Xfrac[1][i] = modf(X[i]+0.5f, &Iplus); // {X[i]+1/2}, floor(X[i]+1/2) - - Xint[0][i] = (int64_t) Iminus; - Xint[1][i] = (int64_t) Iplus; - } - - for (int ijk = 0; ijk <= 7; ijk++) { - float weight = 1; - int64_t IJK[3] = {0,0,0}; - - for (int axis = 0; axis < 3; axis++) { // x-1/2 or x+1/2 - int pm = (ijk >> axis) & 1; - IJK[axis] = Xint[pm][axis]; - weight *= Xfrac[pm][axis]; - } - - auto [I,J,K] = IJK; - // if (I<0 || J<0 || K<0) { - // printf("(I,J,K) = (%ld,%ld,%ld)\n",I,J,K); - // abort(); - // } - // if (I>=int(Nx) || J>=int(Ny) || K>=int(Nz)) { - // printf("(I,J,K) = (%ld,%ld,%ld), (Nx,Ny,Nz) = (%ld,%ld,%ld)\n",I,J,K,Nx,Ny,Nz); - // abort(); - // } - uint64_t voxel_index = I*Ny*Nz+J*Ny+K; - //assert(I>=0 && J>=0 && K>=0); - //assert(I<Nx && J<Ny && K<Nz); - float voxel = (float) voxels[voxel_index]; - value += voxel*weight; - } - - return value; -} - template <typename T> void sample_plane(const input_ndarray<T> &voxels, const real_t voxel_size, // In micrometers @@ -377,117 +440,3 @@ void zero_outside_bbox(const array<real_t,9> &principal_axes, } } - -/* -void cylinder_projection(const input_ndarray<float> edt, // Euclidean Distance Transform in um, should be low-resolution (will be interpolated) - const input_ndarray<uint8_t> C, // Material classification images (probability per voxel, 0..1 -> 0..255) - float voxel_size, // Voxel size for Cs - float d_min, float d_max, // Distance shell to map to cylinder - float theta_min, float theta_max, // Angle range (wrt cylinder center) - std::array<float,6> bbox, - const matrix4x4 &Muvw, // Transform from zyx (in um) to U'V'W' cylinder FoR (in um) - output_ndarray<float> image, // Probability-weighted volume of (class,theta,U)-voxels - output_ndarray<int64_t> count // Number of (class,theta,U)-voxels - ){ - ssize_t n_theta = image.shape[0], n_U = image.shape[1]; - - const auto& [U_min,U_max,V_min,V_max,W_min,W_max] = bbox; - - ssize_t ex = edt.shape[0], ey = edt.shape[1], ez = edt.shape[2]; - ssize_t Cx = C.shape[0], Cy = C.shape[1], Cz = C.shape[2]; - - real_t edx = ex/real_t(Cx), edy = ey/real_t(Cy), edz = ex/real_t(Cz); - - ssize_t edt_length = ex*ey*ez; - ssize_t C_length = Cx*Cy*Cz; - - printf("Segmenting from %g to %g micrometers distance of implant.\n",d_min,d_max); - - printf("Bounding box is [U_min,U_max,V_min,V_max,W_min,W_max] = [[%g,%g],[%g,%g],[%g,%g]]\n", - U_min,U_max,V_min,V_max,W_min,W_max); - printf("EDT field is (%ld,%ld,%ld)\n",ex,ey,ez); - - real_t th_min = 1234, th_max = -1234; - ssize_t n_shell = 0; - ssize_t n_shell_bbox = 0; - - ssize_t block_height = 64; - - //TODO: new acc/openmp macro in parallel.hh - { - float *image_d = image.data; - int64_t *count_d = count.data; - - for (ssize_t block_start = 0, edt_block_start = 0; block_start < C_length; block_start += block_height*Cy*Cz, edt_block_start += block_height*ey*ez) { - const uint8_t *C_buffer = C.data + block_start; - const float *edt_block = edt.data + max(block_start-ey*ez,0L); - - ssize_t this_block_length = min(block_height*Cy*Cz,C_length-block_start); - ssize_t this_edt_length = min((block_height+2)*ey*ez,edt_length-block_start); - - //#pragma acc parallel loop copy(C_buffer[:this_block_length], image_d[:n_theta*n_U], count_d[:n_theta*n_U], bbox[:6], Muvw[:16], edt_block[:this_edt_length]) reduction(+:n_shell,n_shell_bbox) - #pragma omp parallel for reduction(+:n_shell,n_shell_bbox) - for (int64_t k = 0; k < this_block_length; k++) { - const int64_t flat_idx = block_start + k; - const int64_t X = (flat_idx / (Cy*Cz)), Y = (flat_idx / Cz) % Cy, Z = flat_idx % Cz; // Integer indices: Cs[c,X,Y,Z] - // Index into local block - const int64_t Xl = (k / (Cy*Cz)), Yl = (k / Cz) % Cy, Zl = k % Cz; - // Index into local edt block. Note EDT has 1-slice padding top+bottom - const float x = (Xl+1)*edx, y = Yl*edy, z = Zl*edy; - - if (x > block_height) { - printf("Block number k=%ld.\nX,Y,Z=%ld,%ld,%ld\nXl,Yl,Zl=%ld,%ld,%ld\nx,y,z=%.2f, %.2f, %.2f\n",k,X,Y,Z,Xl,Yl,Zl,x,y,z); - abort(); - } - - // ****** MEAT OF THE IMPLEMENTATION IS HERE ****** - real_t distance = resample2x2x2<float>(edt_block, {this_edt_length/(ey*ez),ey,ez}, {x,y,z}); - - if (distance > d_min && distance <= d_max) { // TODO: and W>w_min - array<real_t,4> Xs = {X*voxel_size, Y*voxel_size, Z*voxel_size, 1}; - auto [U,V,W,c] = hom_transform(Xs,Muvw); - n_shell ++; - - // printf("distance = %.1f, U,V,W = %.2f,%.2f,%.2f\n",distance,U,V,W); - if (in_bbox(U,V,W,bbox)) { - real_t theta = atan2(V,W); - - if (theta >= theta_min && theta <= theta_max) { - n_shell_bbox++; - - ssize_t theta_i = floor( (theta-theta_min) * (n_theta-1)/(theta_max-theta_min) ); - ssize_t U_i = floor( (U - U_min) * (n_U -1)/( U_max- U_min) ); - - real_t p = C_buffer[k]/255.; - - assert(theta >= theta_min); - assert(theta <= theta_max); - assert(U >= U_min); - assert(U <= U_max); - assert(theta_i >= 0); - assert(theta_i < n_theta); - assert(U_i >= 0); - assert(U_i < n_U); - - if (p > 0) { - th_min = min(theta,th_min); - th_max = max(theta,th_max); - - //atomic_statement() - image_d[theta_i*n_U + U_i] += p; - - //atomic_statement() - count_d[theta_i*n_U + U_i] += 1; - } - } - } - } - } - } - } - printf("n_shell = %ld, n_shell_bbox = %ld\n",n_shell,n_shell_bbox); - printf("theta_min, theta_max = %.2f,%.2f\n",theta_min,theta_max); - printf("th_min, th_max = %.2f,%.2f\n",th_min,th_max); -} - -*/ \ No newline at end of file diff --git a/src/lib/cpp/gpu/geometry.cc b/src/lib/cpp/gpu/geometry.cc index 57126dc..b76db65 100644 --- a/src/lib/cpp/gpu/geometry.cc +++ b/src/lib/cpp/gpu/geometry.cc @@ -21,6 +21,19 @@ void compute_front_mask(const input_ndarray<mask_type> solid_implant, return cpu_seq::compute_front_mask(solid_implant, voxel_size, Muvw, bbox, front_mask); } +void cylinder_projection(const input_ndarray<float> edt, // Euclidean Distance Transform in um, should be low-resolution (will be interpolated) + const input_ndarray<uint8_t> C, // Material classification images (probability per voxel, 0..1 -> 0..255) + float voxel_size, // Voxel size for Cs + float d_min, float d_max, // Distance shell to map to cylinder + float theta_min, float theta_max, // Angle range (wrt cylinder center) + std::array<float,6> bbox, + const matrix4x4 &Muvw, // Transform from zyx (in um) to U'V'W' cylinder FoR (in um) + output_ndarray<float> image, // Probability-weighted volume of (class,theta,U)-voxels + output_ndarray<int64_t> count // Number of (class,theta,U)-voxels + ){ + return cpu_seq::cylinder_projection(edt, C, voxel_size, d_min, d_max, theta_min, theta_max, bbox, Muvw, image, count); +} + void fill_implant_mask(const input_ndarray<mask_type> mask, float voxel_size, const array<float,6> &bbox, @@ -144,13 +157,6 @@ void integrate_axes(const input_ndarray<mask_type> &mask, return cpu_seq::integrate_axes(mask, x0, v_axis, w_axis, v_min, w_min, output); } -template <typename T> -float resample2x2x2(const T *voxels, - const array<ssize_t, 3> &shape, - const array<float, 3> &X) { - return cpu_seq::resample2x2x2(voxels, shape, X); -} - template <typename T> void sample_plane(const input_ndarray<T> &voxels, const real_t voxel_size, // In micrometers diff --git a/src/lib/cpp/include/geometry.hh b/src/lib/cpp/include/geometry.hh index 39641ab..629aa53 100644 --- a/src/lib/cpp/include/geometry.hh +++ b/src/lib/cpp/include/geometry.hh @@ -44,6 +44,59 @@ inline bool in_bbox(float U, float V, float W, const std::array<float, 6> &bbox) return inside; } +template <typename T> +float resample2x2x2(const T *voxels, + const array<ssize_t, 3> &shape, + const array<float, 3> &X) { + auto [Nz,Ny,Nx] = shape; + + if (!in_bbox(X[0], X[1], X[2], {0.5f, float(Nx)-0.5f, 0.5f, float(Ny)-0.5f, 0.5f, float(Nz)-0.5f})) { + uint64_t voxel_index = uint64_t(floor(X[0]))*Ny*Nz + uint64_t(floor(X[1]))*Ny + uint64_t(floor(X[2])); + return voxels[voxel_index]; + } + + float Xfrac[2][3]; // {Xminus[3], Xplus[3]} + int64_t Xint[2][3]; // {Iminus[3], Iplus[3]} + float value = 0; + + for (int i = 0; i < 3; i++) { + float Iminus, Iplus; + Xfrac[0][i] = 1-modf(X[i]-0.5f, &Iminus); // 1-{X[i]-1/2}, floor(X[i]-1/2) + Xfrac[1][i] = modf(X[i]+0.5f, &Iplus); // {X[i]+1/2}, floor(X[i]+1/2) + + Xint[0][i] = (int64_t) Iminus; + Xint[1][i] = (int64_t) Iplus; + } + + for (int ijk = 0; ijk <= 7; ijk++) { + float weight = 1; + int64_t IJK[3] = {0,0,0}; + + for (int axis = 0; axis < 3; axis++) { // x-1/2 or x+1/2 + int pm = (ijk >> axis) & 1; + IJK[axis] = Xint[pm][axis]; + weight *= Xfrac[pm][axis]; + } + + auto [I,J,K] = IJK; + // if (I<0 || J<0 || K<0) { + // printf("(I,J,K) = (%ld,%ld,%ld)\n",I,J,K); + // abort(); + // } + // if (I>=int(Nx) || J>=int(Ny) || K>=int(Nz)) { + // printf("(I,J,K) = (%ld,%ld,%ld), (Nx,Ny,Nz) = (%ld,%ld,%ld)\n",I,J,K,Nx,Ny,Nz); + // abort(); + // } + uint64_t voxel_index = I*Ny*Nz+J*Ny+K; + //assert(I>=0 && J>=0 && K>=0); + //assert(I<Nx && J<Ny && K<Nz); + float voxel = (float) voxels[voxel_index]; + value += voxel*weight; + } + + return value; +} + namespace NS { /* @@ -60,6 +113,17 @@ void compute_front_mask(const input_ndarray<mask_type> solid_implant, std::array<float,6> bbox, output_ndarray<mask_type> front_mask); +void cylinder_projection(const input_ndarray<float> edt, // Euclidean Distance Transform in um, should be low-resolution (will be interpolated) + const input_ndarray<uint8_t> C, // Material classification images (probability per voxel, 0..1 -> 0..255) + float voxel_size, // Voxel size for Cs + float d_min, float d_max, // Distance shell to map to cylinder + float theta_min, float theta_max, // Angle range (wrt cylinder center) + std::array<float,6> bbox, + const matrix4x4 &Muvw, // Transform from zyx (in um) to U'V'W' cylinder FoR (in um) + output_ndarray<float> image, // Probability-weighted volume of (class,theta,U)-voxels + output_ndarray<int64_t> count // Number of (class,theta,U)-voxels + ); + void fill_implant_mask(const input_ndarray<mask_type> implant_mask, float voxel_size, const array<float,6> &bbox, @@ -85,11 +149,6 @@ void integrate_axes(const input_ndarray<mask_type> &mask, const real_t v_min, const real_t w_min, output_ndarray<uint64_t> output); -template <typename T> -float resample2x2x2(const T *voxels, - const array<ssize_t,3> &shape, - const array<float,3> &X); - template <typename T> void sample_plane(const input_ndarray<T> &voxels, const real_t voxel_size, // In micrometers diff --git a/src/pybind/geometry-pybind.cc b/src/pybind/geometry-pybind.cc index b947b1b..ebd09b7 100644 --- a/src/pybind/geometry-pybind.cc +++ b/src/pybind/geometry-pybind.cc @@ -92,7 +92,6 @@ void compute_front_mask(const np_array<uint8_t> &np_solid_implant, {front_mask_info.ptr, front_mask_info.shape}); } -/* void cylinder_projection(const np_array<float> &np_edt, // Euclidean Distance Transform in um, should be low-resolution (will be interpolated) const np_bytearray &np_Cs, // Material classification images (probability per voxel, 0..1 -> 0..255) float Cs_voxel_size, // Voxel size for Cs @@ -108,12 +107,12 @@ void cylinder_projection(const np_array<float> &np_edt, // Euclidean Distance auto images_info = np_images.request(); auto counts_info = np_counts.request(); - ::cylinder_projection({edt_info.ptr,edt_info.shape}, + NS::cylinder_projection({edt_info.ptr,edt_info.shape}, {Cs_info.ptr, Cs_info.shape}, Cs_voxel_size,d_min,d_max,theta_min,theta_max,bbox,Muvw, {images_info.ptr, images_info.shape}, {counts_info.ptr, counts_info.shape}); -}*/ +} } @@ -125,7 +124,7 @@ PYBIND11_MODULE(geometry, m) { m.def("integrate_axes", &python_api::integrate_axes); m.def("zero_outside_bbox", &python_api::zero_outside_bbox); m.def("fill_implant_mask", &python_api::fill_implant_mask); - //m.def("cylinder_projection", &python_api::cylinder_projection); + m.def("cylinder_projection", &python_api::cylinder_projection); m.def("sample_plane", &python_api::sample_plane<uint16_t>); m.def("sample_plane", &python_api::sample_plane<uint8_t>); m.def("compute_front_mask", &python_api::compute_front_mask); diff --git a/src/test/test_geometry.py b/src/test/test_geometry.py index 33f4a75..6070c93 100644 --- a/src/test/test_geometry.py +++ b/src/test/test_geometry.py @@ -10,6 +10,7 @@ from config.paths import hdf5_root import datetime +import edt from functools import partial import h5py import numpy as np @@ -308,6 +309,18 @@ def test_compute_front_mask(): compare_fs('test_compute_front_mask', cpu_seq, cpu, gpu, True, 1e-7, (solid_implant_mask.shape, solid_implant_mask.dtype)) +# TODO postponed because it's not used until after segment_from_distributions, i.e. in the last analysis phase. +#def test_cylinder_projection(): +# n = 128 +# implant_mask = np.zeros((n,n,n), np.uint8) +# implant_mask[:,n//2-4:n//2+4,n//2-4:n//2+4] = 1 +# edt_field = edt.edt(~implant_mask, parallel=16) +# +# m_cpu_seq.cylinder_projection(edt_field, Cs, Cs_voxel_size, +# d_min, d_max, theta_min, theta_max, +# tuple(bbox.flatten()), tuple(Muvwp.flatten()), +# images, counts) + if __name__ == '__main__': np.random.seed(42) test_center_of_mass() From 50864ecdcae372ca38c134c36ecb0c814194470e Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Mar 2023 16:10:34 +0100 Subject: [PATCH 119/136] #25 Added launch configuration for step 0800 --- .vscode/launch.json | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.vscode/launch.json b/.vscode/launch.json index cb878e1..2012182 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -40,6 +40,15 @@ "args": ["770c_pag"], "justMyCode": false }, + { + "name": "Python: 0700_implant_FoR", + "type": "python", + "request": "launch", + "program": "${workspaceFolder}/src/processing_steps/0700_implant_FoR.py", + "console": "integratedTerminal", + "args": ["770c_pag"], + "justMyCode": false + }, { "name": "Python: 0800_implant_data", "type": "python", From 8fa4c8575e4d37878ad6912f52d83b5af2488a0e Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Mar 2023 16:11:13 +0100 Subject: [PATCH 120/136] #25 Added rc file for running vedo over ssh --- src/vedo_setup.sh | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100755 src/vedo_setup.sh diff --git a/src/vedo_setup.sh b/src/vedo_setup.sh new file mode 100755 index 0000000..4dcb108 --- /dev/null +++ b/src/vedo_setup.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# +set -x +export DISPLAY=:99.0 +Xvfb :99 -screen 0 1024x1024x24 > /dev/null 2&>1 & +sleep 3 +set +x +exec "$@" From 36485901330415b7122f5a4c2b88f0080c44fe60 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Mar 2023 16:16:56 +0100 Subject: [PATCH 121/136] #25 Somethings is not right with the zyx vs xyz ordering of values --- src/lib/cpp/cpu_seq/geometry.cc | 2 +- src/lib/cpp/include/geometry.hh | 2 +- src/test/test_geometry.py | 89 +++++++++++++++++++++++++-------- 3 files changed, 69 insertions(+), 24 deletions(-) diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index d7793b6..5c0acfa 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -36,7 +36,7 @@ array<real_t, 3> center_of_mass(const input_ndarray<mask_type> &mask) { print_timestamp("center_of_mass end"); - return array<real_t, 3>{ rcmz, rcmy, rcmx }; + return array<real_t, 3>{ rcmx, rcmy, rcmz }; } void compute_front_mask(const input_ndarray<mask_type> solid_implant, diff --git a/src/lib/cpp/include/geometry.hh b/src/lib/cpp/include/geometry.hh index 629aa53..937c5d2 100644 --- a/src/lib/cpp/include/geometry.hh +++ b/src/lib/cpp/include/geometry.hh @@ -48,7 +48,7 @@ template <typename T> float resample2x2x2(const T *voxels, const array<ssize_t, 3> &shape, const array<float, 3> &X) { - auto [Nz,Ny,Nx] = shape; + auto [Nx,Ny,Nz] = shape; if (!in_bbox(X[0], X[1], X[2], {0.5f, float(Nx)-0.5f, 0.5f, float(Ny)-0.5f, 0.5f, float(Nz)-0.5f})) { uint64_t voxel_index = uint64_t(floor(X[0]))*Ny*Nz + uint64_t(floor(X[1]))*Ny + uint64_t(floor(X[2])); diff --git a/src/test/test_geometry.py b/src/test/test_geometry.py index 6070c93..8ee1f87 100644 --- a/src/test/test_geometry.py +++ b/src/test/test_geometry.py @@ -9,10 +9,12 @@ sys.path.append(sys.path[0]+'/../') from config.paths import hdf5_root +import argparse import datetime import edt from functools import partial import h5py +import matplotlib.pyplot as plt import numpy as np import pytest @@ -43,6 +45,27 @@ def assert_with_print(a, b, tolerance=1e-7, names=None): print (names) assert all_close +def compare_fs(func, baseline_f, cpu_f, gpu_f, should_assert=True, tolerance=1e-7, + allocate_result: tuple[tuple[int],np.dtype] | np.ndarray=None): + baseline, baseline_t = run_with_warmup(baseline_f, allocate_result) + print (f'({func}) Sequential ran in {baseline_t}') + if should_assert: assert_interesting_result(baseline) + + cpu, cpu_t = run_with_warmup(cpu_f, allocate_result) + print (f'({func}) Parallel CPU ran in {cpu_t}, which is {baseline_t / cpu_t:.02f} times faster than sequential') + if should_assert: assert_with_print(baseline, cpu, tolerance, 'cpu_seq vs cpu') + + gpu, gpu_t = run_with_warmup(gpu_f, allocate_result) + print (f'({func}) GPU ran in {gpu_t}, which is {baseline_t / gpu_t:.02f} times faster than sequential') + if should_assert: assert_with_print(baseline, gpu, tolerance, 'cpu_seq vs gpu') + +def parse_args(): + parser = argparse.ArgumentParser(description="Unit tests for the geometry C++ functions.") + + parser.add_argument('tests', nargs='*', help='Which test(s) to run. It can be either the name, or the index of the test.') + + return parser.parse_args() + def run_with_warmup(f, allocate_result=None): ''' Runs the given function and returns the result and how long time it took to run. @@ -65,20 +88,6 @@ def run_with_warmup(f, allocate_result=None): end = datetime.datetime.now() return result, end - start -def compare_fs(func, baseline_f, cpu_f, gpu_f, should_assert=True, tolerance=1e-7, - allocate_result: tuple[tuple[int],np.dtype] | np.ndarray=None): - baseline, baseline_t = run_with_warmup(baseline_f, allocate_result) - print (f'({func}) Sequential ran in {baseline_t}') - if should_assert: assert_interesting_result(baseline) - - cpu, cpu_t = run_with_warmup(cpu_f, allocate_result) - print (f'({func}) Parallel CPU ran in {cpu_t}, which is {baseline_t / cpu_t:.02f} times faster than sequential') - if should_assert: assert_with_print(baseline, cpu, tolerance, 'cpu_seq vs cpu') - - gpu, gpu_t = run_with_warmup(gpu_f, allocate_result) - print (f'({func}) GPU ran in {gpu_t}, which is {baseline_t / gpu_t:.02f} times faster than sequential') - if should_assert: assert_with_print(baseline, gpu, tolerance, 'cpu_seq vs gpu') - def test_center_of_mass(): voxels = np.random.randint(0, 256, (n,n,n), np.uint8) @@ -104,7 +113,7 @@ def test_inertia_matrix(): assert_interesting_result(baseline()) @pytest.mark.parametrize("dtype", [np.uint8, np.uint16]) -def test_sample_plane(dtype): +def test_sample_plane(dtype, debug=False): # TODO something that isn't just random data? n = 128 voxels = np.random.randint(0, np.iinfo(dtype).max, (n,n,n), dtype) @@ -125,6 +134,21 @@ def test_sample_plane(dtype): # TODO the function is unstable, even when they're all calling the sequential implementation, t least when comparing gcc against nvcc, but it differs at most with 1. Hence the higher tolerance for this test. Can be tested with something like for i in range(10000): compare_fs('sample_plane', cpu_seq, cpu, gpu, True, 1.1, ((64,64), np.float32)) + if debug: + voxels = np.zeros((n,n,n), dtype) + voxels[:, n//2-5:n//2+5, n//2-5:n//2-2] = 1 + voxels[:, n//2-5:n//2+5, n//2+2:n//2+5] = 1 + voxel_size = 1 + cm = m_cpu.center_of_mass(voxels) + # TODO plan vektorne er z y x, ikke x y z!! Trace hvorfor! + v_vec = np.array([0,0,1], np.float32) + w_vec = np.array([0,1,0], np.float32) + bbox = [-n//2, n//2, -n//2, n//2] + result = np.zeros((n, n), np.float32) + m_cpu_seq.sample_plane(voxels, voxel_size, cm, v_vec, w_vec, bbox, result) + plt.imshow(result) + plt.savefig('pis.png') + def test_integrate_axes(): n = 128 dtype = np.uint8 @@ -323,10 +347,31 @@ def test_compute_front_mask(): if __name__ == '__main__': np.random.seed(42) - test_center_of_mass() - test_inertia_matrix() - test_sample_plane(np.uint8) - test_integrate_axes() - test_zero_outside_bbox() - test_fill_implant_mask() - test_compute_front_mask() \ No newline at end of file + args = parse_args() + + if len(args.tests) == 0: + test_center_of_mass() + test_inertia_matrix() + test_sample_plane(np.uint8) + test_integrate_axes() + test_zero_outside_bbox() + test_fill_implant_mask() + test_compute_front_mask() + else: + for test in args.tests: + if test == '1' or test == 'center_of_mass': + test_center_of_mass() + elif test == '2' or test == 'inertia_matrix': + test_inertia_matrix() + elif test == '3' or test == 'sample_plane': + test_sample_plane(np.uint8, debug=True) + elif test == '4' or test == 'integrate_axes': + test_integrate_axes() + elif test == '5' or test == 'zero_outside_bbox': + test_zero_outside_bbox() + elif test == '6' or test == 'fill_implant_mask': + test_fill_implant_mask() + elif test == '7' or test == 'compute_front_mask': + test_compute_front_mask() + else: + print (f'WARNING: skipping unknown test: "{test}"') From 5052bc895144bbb448691273d3fd4d539176bd59 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Mar 2023 16:17:28 +0100 Subject: [PATCH 122/136] #25 Removed Bohrium dependency from esrf_read. Might not be the right thing, as it is very slow right now. --- src/lib/py/esrf_read.py | 42 ++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/lib/py/esrf_read.py b/src/lib/py/esrf_read.py index 1d62c4a..1e3475b 100644 --- a/src/lib/py/esrf_read.py +++ b/src/lib/py/esrf_read.py @@ -2,8 +2,8 @@ # Read metadata and data from raw tomograms from ESRF. # (C) James Avery for the MAXIBONE project, 2018 import numpy as np; -import bohrium as bh; -#import numpy as bh; +#import bohrium as bh; +import numpy as bh; #import jax.numpy as jp import numpy.ma as ma; import sys,re,os,tqdm; @@ -13,7 +13,7 @@ def esrf_edf_metadata(filename): meta = {}; - header_length = 1024; + header_length = 1024; with open(filename,"r",encoding="latin-1") as f: header = f.read(header_length); @@ -22,19 +22,19 @@ def esrf_edf_metadata(filename): kv = re.split("[=;]",l); if(len(kv)>=2): meta[kv[0].strip()] = kv[1].strip(); - + assert meta["ByteOrder"] == "LowByteFirst"; if(meta["DataType"] == "UnsignedShort"): meta["NumpyType"] = np.uint16; if(meta["DataType"] == "Float"): - meta["NumpyType"] = np.float32; - + meta["NumpyType"] = np.float32; + return meta; - + def esrf_edf_to_npy(filename): meta = esrf_edf_metadata(filename); - header_length = 1024; + header_length = 1024; with open(filename,"rb") as f: f.seek(header_length,os.SEEK_SET); @@ -71,8 +71,8 @@ def esrf_full_tomogram(info): def esrf_edf_to_bh(filename): meta = esrf_edf_metadata(filename); - (nx,ny) = (int(meta["Dim_2"]), int(meta["Dim_1"])); - header_length = 1024; + (nx,ny) = (int(meta["Dim_2"]), int(meta["Dim_1"])); + header_length = 1024; with open(filename,"rb") as f: f.seek(header_length,os.SEEK_SET); @@ -107,8 +107,8 @@ def esrf_edfrange_to_bh(info,region): def esrf_edf_to_jp(filename): meta = esrf_edf_metadata(filename); - (nx,ny) = (int(meta["Dim_2"]), int(meta["Dim_1"])); - header_length = 1024; + (nx,ny) = (int(meta["Dim_2"]), int(meta["Dim_1"])); + header_length = 1024; with open(filename,"rb") as f: f.seek(header_length,os.SEEK_SET); @@ -145,7 +145,7 @@ def esrf_read_xml(filename): fields = ["subvolume_name","sizex","sizey","sizez","originx","originy","originz","voxelsize","valmin","valmax","byte_order","s1","s2","S1","S2"]; fieldstrings = ["\<{}\>(.*)\<\/{}\>".format(f,f) for f in fields]; res = [re.compile(s,re.IGNORECASE) for s in fieldstrings]; - xmlmeta = {}; + xmlmeta = {}; with open(filename,"r") as file: for l in file.readlines(): for i in range(len(fields)): @@ -156,7 +156,7 @@ def esrf_read_xml(filename): xmlmeta["subvolume_name"]=xmlmeta["subvolume_name"].replace("%04d","{:04d}"); xmlmeta["filename"]=filename; xmlmeta["dirname"]=os.path.dirname(filename); - + # Change printf template to python3 format template return xmlmeta; @@ -165,27 +165,27 @@ def readfile(filename): with open(filename,'r') as f: return f.readlines() - + # def frame_histogram(frame,i,bin_edges): -# # print("Calculating histogram for frame",i) +# # print("Calculating histogram for frame",i) # count = np.histogram(frame.compressed(),bins=bin_edges)[0]; # # print("Completed histogram for frame",i) # return count # #To get a total histogram, simply do np.sum(count,axis=0) # def progressive_histogram(xml,nbins=2048,bin_edges=np.array([]),num_cores=4): - + # if(len(bin_edges)==0): # bin_edges = np.linspace(float(xml["valmin"]), float(xml["valmax"]), nbins + 1); # nbins = len(bin_edges)-1; # nz = int(xml["sizez"]); -# print("sizez = ",nz) +# print("sizez = ",nz) # meta,frame = esrf_edf_n_to_npy(xml,0); # frames = np.ma.empty((4*num_cores, frame.shape[0], frame.shape[1])); # counts = np.empty((nz,nbins),dtype=int); - + # for i in range(0,nz,4*num_cores): # chunk_length = min(4*num_cores,nz-i); # for j in range(chunk_length): @@ -193,6 +193,6 @@ def readfile(filename): # _, frames[j] = esrf_edf_n_to_npy(xml,i+j); # counts[i:i+chunk_length] = np.array(Parallel(n_jobs=num_cores)(delayed(frame_histogram)(frames[j],i+j,bin_edges) # for j in range(chunk_length))); - + # return counts, bin_edges; - + From df03abe34453f424c8d89becb5e493ae9e71f658 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Mar 2023 16:18:44 +0100 Subject: [PATCH 123/136] #25 Ensured that 0200 runs. --- proc-steps-checklist.txt | 2 ++ .../0200_generate_byte_hdf5.py | 26 +++++++++---------- 2 files changed, 15 insertions(+), 13 deletions(-) create mode 100644 proc-steps-checklist.txt diff --git a/proc-steps-checklist.txt b/proc-steps-checklist.txt new file mode 100644 index 0000000..34a94f6 --- /dev/null +++ b/proc-steps-checklist.txt @@ -0,0 +1,2 @@ +0100 - Haven't checked. Requires ERDA to be set up properly. +0200 - Runs! diff --git a/src/processing_steps/0200_generate_byte_hdf5.py b/src/processing_steps/0200_generate_byte_hdf5.py index 559bc85..5731316 100755 --- a/src/processing_steps/0200_generate_byte_hdf5.py +++ b/src/processing_steps/0200_generate_byte_hdf5.py @@ -6,10 +6,10 @@ # /voxels: uint8(Nz,Ny,Nx). Nz = sum(scan_dimensions[:,0]), ny = minimum(subvolume_dimensions[:,1]), nx = minimum(subvolume_dimensions[:,2]) import h5py, sys, os.path, pathlib, tqdm sys.path.append(sys.path[0]+"/../") -import bohrium as bh # TODO: Get rid of Bohrium dependence without losing too much performance +#import bohrium as bh # TODO: Get rid of Bohrium dependence without losing too much performance from lib.py.esrf_read import * import numpy as np, matplotlib.pyplot as plt -from config.paths import * +from config.paths import hdf5_root_fast as hdf5_root, esrf_implants_root from lib.py.helpers import commandline_args from PIL import Image @@ -45,7 +45,7 @@ def normalize(A,value_range,nbits=16,dtype=np.uint16): for i in range(len(subvolume_metadata)): if verbose >= 1: print(f"{i} {sample}/{subvolume_metadata[i]['experiment']}: {subvolume_range[i]}") -if verbose >= 1: print((global_vmin, global_vmax), (Nz,Ny,Nx)) +if verbose >= 1: print((global_vmin, global_vmax), (Nz,Ny,Nx)) if verbose >= 1: print(subvolume_dimensions) if verbose >= 1: print(subvolume_range) @@ -103,7 +103,7 @@ def normalize(A,value_range,nbits=16,dtype=np.uint16): def cylinder_mask(Ny,Nx): ys = np.linspace(-1,1,Ny) xs = np.linspace(-1,1,Nx) - return (xs[NA,:]**2 + ys[:,NA]**2) < 1 + return (xs[NA,:]**2 + ys[:,NA]**2) < 1 mask = np.array(cylinder_mask(Ny,Nx)) @@ -113,10 +113,10 @@ def cylinder_mask(Ny,Nx): (sy,sx) = ((ny-Ny)//2+((ny-Ny)%2), (nx-Nx)//2+((nx-Nx)%2)) (ey,ex) = (ny-(ny-Ny)//2, nx-(nx-Nx)//2) if verbose >= 1: print((sy,ey),(sx,ex)) - + # if verbose >= 1: print(f"Loading {subvolume_info['experiment']}") # tomo = normalize(esrf_full_tomogram_bh(subvolume_info), (global_vmin,global_vmax)); - # if verbose >= 1: print(f"Writing {subvolume_info['experiment']}") + # if verbose >= 1: print(f"Writing {subvolume_info['experiment']}") # h5tomo[z_offset:z_offset+nz] = tomo[:,sy:ey,sx:ex]; # del tomo chunk = np.zeros((chunk_length,Ny,Nx),dtype=np.uint16); @@ -129,7 +129,7 @@ def cylinder_mask(Ny,Nx): if verbose >= 1: print(f"Chunk shape: {slab_data.shape}") if verbose >= 1: print("Max value before masking:", slab_data.max()) slab_data *= mask[NA,:,:] - if verbose >= 1: print("Max value after masking:", slab_data.max()) + if verbose >= 1: print("Max value after masking:", slab_data.max()) chunk[:chunk_end-z] = normalize(slab_data,(global_vmin,global_vmax)) if verbose >= 1: print("Max value after normalizing:", chunk.max()) @@ -138,22 +138,22 @@ def cylinder_mask(Ny,Nx): # slice_data = jp.array(slice_data[sy:ey,sx:ex].copy()) # chunk[j] = normalize(slice_data[sy:ey,sx:ex],(global_vmin,global_vmax)) * mask - + if verbose >= 1: print(f"Writing {sample} MSB slice {z+z_offset}:{chunk_end+z_offset} ({i}-{z})"); chunk_msb = ((chunk[:chunk_end-z]>>8)&0xff).astype(np.uint8) if verbose >= 1: print("chunk_msb.max: ", chunk_msb.max()) - chunk_msb = chunk_msb.copy2numpy() + #chunk_msb = chunk_msb.copy2numpy() if verbose >= 1: print("chunk_msb.copy2numpy().max: ", chunk_msb.max()) h5tomo_msb[z_offset+z:z_offset+chunk_end] = chunk_msb[:] - + if verbose >= 1: print(f"Writing {sample} LSB slice {z+z_offset}:{chunk_end+z_offset} ({i}-{z})"); chunk_lsb = (chunk[:chunk_end-z]&0xff).astype(np.uint8) if verbose >= 1: print("chunk_lsb.max: ", chunk_lsb.max()) - chunk_lsb = chunk_lsb.copy2numpy() + #chunk_lsb = chunk_lsb.copy2numpy() if verbose >= 1: print("chunk_lsb.copy2numpy().max: ", chunk_lsb.max()) h5tomo_lsb[z_offset+z:z_offset+chunk_end] = chunk_lsb[:] - np.flush() - + #np.flush() + z_offset += nz; h5file_msb.close() From 75f11a2db3e1bcdf27dd063f7842a2d0f5796abb Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Mar 2023 16:19:13 +0100 Subject: [PATCH 124/136] #25 Verified that 0300 - 0600 (inclusive) runs --- proc-steps-checklist.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/proc-steps-checklist.txt b/proc-steps-checklist.txt index 34a94f6..30b6929 100644 --- a/proc-steps-checklist.txt +++ b/proc-steps-checklist.txt @@ -1,2 +1,6 @@ 0100 - Haven't checked. Requires ERDA to be set up properly. 0200 - Runs! +0300 - Verified! +0400 - Runs! +0500 - Runs! +0600 - Runs! \ No newline at end of file From f28163d1063eaa957b32bca5cd150da49d130e39 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Fri, 10 Mar 2023 16:20:11 +0100 Subject: [PATCH 125/136] #25 Started working on step 0700, which doesn't look quite right. --- src/processing_steps/0700_implant_FoR.py | 174 ++++++++++------------- 1 file changed, 77 insertions(+), 97 deletions(-) diff --git a/src/processing_steps/0700_implant_FoR.py b/src/processing_steps/0700_implant_FoR.py index dd65424..b311755 100644 --- a/src/processing_steps/0700_implant_FoR.py +++ b/src/processing_steps/0700_implant_FoR.py @@ -2,7 +2,7 @@ sys.path.append(sys.path[0]+"/../") from config.constants import * from config.paths import hdf5_root, binary_root -from lib.cpp.cpu_seq.geometry import center_of_mass, inertia_matrix, sample_plane +from lib.cpp.cpu.geometry import center_of_mass, inertia_matrix, sample_plane from lib.cpp.gpu.morphology import erode_3d_sphere as erode_3d, dilate_3d_sphere as dilate_3d import matplotlib.pyplot as plt from matplotlib.colors import colorConverter @@ -15,16 +15,16 @@ # Hvor skal disse hen? def circle_center(p0,p1,p2): - m1, m2 = (p0+p1)/2, (p0+p2)/2 # Midpoints + m1, m2 = (p0+p1)/2, (p0+p2)/2 # Midpoints (dx1,dy1), (dx2,dy2) = (p1-p0), (p2-p0) # Slopes of connecting lines n1, n2 = np.array([dy1,-dx1]).T, np.array([dy2,-dx2]).T # Normals perpendicular to connecting lines - + A = np.array([n1,-n2]).T # Solve m1 + t1*n1 == m2 + t2*n2 <=> t1*n1 - t2*n2 = m2-m1 - + (t1,t2) = la.solve(A, m2-m1) c1, c2 = m1+t1*n1, m2+t2*n2 # Center of circle! - + assert(np.allclose(c1,c2)) return c1 @@ -55,7 +55,6 @@ def open_3d(image, r): return I1[r:-r,r:-r,r:-r].astype(image.dtype) - def coordinate_image(shape): Nz,Ny,Nx = shape if verbose >= 1: print(f"Broadcasting coordinates for {shape} image") @@ -66,8 +65,6 @@ def coordinate_image(shape): if verbose >= 1: print(f"Done") return zyxs - - def proj(u,v): # Project u onto v return (np.dot(u,v)/np.dot(v,v))*v @@ -77,7 +74,6 @@ def gramschmidt(u,v,w): return np.array([u/la.norm(u), vp/la.norm(v), wp/la.norm(w)]) - def highest_peaks(data,n,height=0.7): peaks, info = signal.find_peaks(data,height=height*data.max()) return peaks[np.argsort(info['peak_heights'])][:n] @@ -85,7 +81,7 @@ def highest_peaks(data,n,height=0.7): def largest_cc_of(mask): label, n_features = ndi.label(mask) bincnts = np.bincount(label[label>0],minlength=n_features+1) - + largest_cc_ix = np.argmax(bincnts) return (label==largest_cc_ix) @@ -110,24 +106,23 @@ def homogeneous_transform(xs, M): if verbose >= 1: print(hxs.shape, M.shape) return hxs @ M.T - def zyx_to_UVWp_transform(): Tcm = hom_translate(-cm*voxel_size) Muvw = hom_linear(UVW) TW0 = hom_translate((0,0,-w0*voxel_size)) Tcp = hom_translate(-cp) - Muvwp = hom_linear(UVWp) + Muvwp = hom_linear(UVWp) return Muvwp @ Tcp @ TW0 @ Muvw @ Tcm vaxis = {'z':np.array((0,0,1.)), 'y':np.array((0,-1.,0)), 'z2':np.array((0,0,1.))} daxis = {'z':np.array([-1,1,0]), 'y':np.array([0,0,1]), 'z2':np.array([-1.5,0,0])} - + def figure_FoR_UVW(debug=True): - vol = vedo.Volume(implant,alpha=[0,0,0.05,0.2]) + vol = vedo.Volume(implant, alpha=[0,0,0.05,0.2]) u_arrow = vedo.Arrow(cm[::-1],cm[::-1]+1/np.sqrt(ls[0]/ls[2])*100*u_vec[::-1],c='r',s=0.7) v_arrow = vedo.Arrow(cm[::-1],cm[::-1]+1/np.sqrt(ls[1]/ls[2])*100*v_vec[::-1],c='g',s=0.7) - w_arrow = vedo.Arrow(cm[::-1],cm[::-1]+100*w_vec[::-1],c='b',s=0.7) + w_arrow = vedo.Arrow(cm[::-1],cm[::-1]+100*w_vec[::-1],c='b',s=0.7) for axis in vaxis.keys(): pl = vedo.Plotter(offscreen=True, interactive=False,sharecam=False) @@ -147,35 +142,34 @@ def figure_FoR_UVW(debug=True): 'viewup':-vaxis[axis] }) - -# TODO: Fix lengths (voxel_size times...) +# TODO: Fix lengths (voxel_size times...) def figure_FoR_UVWp(debug=True): - implant_uvwps = homogeneous_transform(implant_zyxs*voxel_size,Muvwp) - pts = pc.Points(implant_uvwps) - + implant_uvwps = homogeneous_transform(implant_zyxs * voxel_size, Muvwp) + pts = pc.Points(implant_uvwps[:,:3]) + u_arrow = vedo.Arrow([0,0,0],1/np.sqrt(ls[0]/ls[2])*100*np.array([0,0,1]),c='r',s=0.7) v_arrow = vedo.Arrow([0,0,0],1/np.sqrt(ls[1]/ls[2])*100*v_vec[::-1],c='g',s=0.7) - w_arrow = vedo.Arrow([0,0,0],100*w_vec[::-1],c='b',s=0.7) + w_arrow = vedo.Arrow([0,0,0],100*w_vec[::-1],c='b',s=0.7) - pl = vedo.Plotter(offscreen=True, interactive=False, sharecam=False) + pl = vedo.Plotter(offscreen=True, interactive=False, sharecam=False) for axis in vaxis.keys(): pl.show([pts,u_arrow,v_arrow,w_arrow],camera={ 'pos': np.array((nz/2,ny/2,nx/2)) + 2.5*ny*daxis[axis], 'focalPoint': (nz/2,ny/2,nx/2), 'viewup':-vaxis[axis] }) - + pl.screenshot(f"{image_output_dir}/implant-FoR_UVWp-{axis}.png") if debug: - vedo.show([pts,u_arrow,v_arrow,w_arrow],interactive=True) - + vedo.show([pts,u_arrow,v_arrow,w_arrow],interactive=True) + def figure_FoR_circle(name,center,v_vec,w_vec,radius,implant_bbox,debug=True): from matplotlib.patches import Circle from matplotlib.lines import Line2D - + [U_min,U_max,V_min,V_max,W_min,W_max] = implant_bbox - + sample = np.zeros((800,800),dtype=np.float32) sample_bbox = (-2905.,2905,-1000,4810.) sample_plane(voxels,voxel_size, @@ -194,7 +188,7 @@ def figure_FoR_circle(name,center,v_vec,w_vec,radius,implant_bbox,debug=True): p2 = np.array((V_max,W_min)) m1, m2 = (p0+p1)/2, (p0+p2)/2 - + ax.add_patch(Circle((0,0), radius*1.01, ec='black',fc=circle_color)) ax.add_patch(Circle(p1, radius/40, fc='purple')) ax.add_patch(Circle(p2, radius/40, fc='purple')) @@ -206,8 +200,8 @@ def figure_FoR_circle(name,center,v_vec,w_vec,radius,implant_bbox,debug=True): ax.add_line(Line2D([p0[0],p2[0]],[p0[1],p2[1]],c='red')) ax.add_line(Line2D([m1[0]*1.05,0],[m1[1]*1.05,0],c='green')) - ax.add_line(Line2D([m2[0]*1.05,0],[m2[1]*1.05,0],c='green')) - + ax.add_line(Line2D([m2[0]*1.05,0],[m2[1]*1.05,0],c='green')) + fig.savefig(f"{image_output_dir}/implant-FoR_{name}.png",dpi=300) if debug: @@ -218,92 +212,88 @@ def figure_FoR_profiles(debug): ax1 = fig1.add_subplot(111) ax1.plot((Up_bins[1:]+Up_bins[:-1])/2, Up_integrals); fig1.savefig(f"{image_output_dir}/implant-FoR_Up-profile.png") - + fig2 = plt.figure() ax2 = fig2.add_subplot(111) ax2.plot((theta_bins[1:]+theta_bins[:-1])/2, theta_integrals) - fig2.savefig(f"{image_output_dir}/implant-FoR_theta-profile.png") - + fig2.savefig(f"{image_output_dir}/implant-FoR_theta-profile.png") + if debug: plt.show() - - - -def figure_FoR_cylinder(debug=True): +def figure_FoR_cylinder(debug=True): # center_line = vedo.Arrow(C1,C2) center_line = vedo.Cylinder((C1+C2)/2,r=implant_radius_voxels/20,height=implant_length_voxels, axis=(C2-C1),alpha=1,c='r') cylinder = vedo.Cylinder((C1+C2)/2,r=implant_radius_voxels,height=implant_length_voxels, axis=(C2-C1),alpha=0.3) - + Up_arrow = vedo.Arrow(Cp, UVW2xyz(cp+implant_length*u_prime), c='r') Vp_arrow = vedo.Arrow(Cp, UVW2xyz(cp+implant_radius*2*v_prime), c='g') Wp_arrow = vedo.Arrow(Cp, UVW2xyz(cp+implant_radius*2*w_prime), c='b') vol = vedo.Volume(implant,alpha=[0,0,0.05,0.1]) - - pl = vedo.Plotter(offscreen=True, interactive=False,sharecam=False) + pl = vedo.Plotter(offscreen=True, interactive=False,sharecam=False) for axis in vaxis.keys(): pl.show([vol,center_line,Vp_arrow,Wp_arrow,cylinder],camera={ 'pos': np.array((nz/2,ny/2,nx/2)) + 2.5*ny*daxis[axis], 'focalPoint': (nz/2,ny/2,nx/2), 'viewup':-vaxis[axis] }) - - pl.screenshot(f"{image_output_dir}/implant-FoR_cylinder-{axis}.png") - + + pl.screenshot(f"{image_output_dir}/implant-FoR_cylinder-{axis}.png") + if debug: vedo.show([vol,cylinder,Up_arrow,Vp_arrow,Wp_arrow],interactive=True) def figure_FoR_voxels(name,voxels,debug=True): vol = vedo.Volume(voxels,alpha=[0,0,0.05,0.1]) - pl = vedo.Plotter(offscreen=True, interactive=False,sharecam=False) + pl = vedo.Plotter(offscreen=True, interactive=False,sharecam=False) for axis in vaxis.keys(): pl.show([vol],camera={ 'pos': np.array((nz/2,ny/2,nx/2)) + 2.5*ny*daxis[axis], 'focalPoint': (nz/2,ny/2,nx/2), 'viewup':-vaxis[axis] - }) + }) pl.screenshot(f"{image_output_dir}/implant-FoR_voxels_{name}-{axis}.png") if debug: vedo.show([vol],interactive=True) - - - - if __name__ == "__main__": sample, scale, verbose = commandline_args({"sample" : "<required>", "scale" : 8, "verbose" : 1}) - + if(scale<8): if verbose >= 1: print(f"Selected scale is {scale}x: This should not be run at high resolution, use scale>=8.") #sys.exit(-1) ## STEP 0: LOAD MASKS, VOXELS, AND METADATA image_output_dir = f"{hdf5_root}/processed/implant-FoR/{sample}/" - if verbose >= 1: print(f"Storing all debug-images to {image_output_dir}") + if verbose >= 1: print(f"Storing all debug-images to {image_output_dir}") pathlib.Path(image_output_dir).mkdir(parents=True, exist_ok=True) - + if verbose >= 1: print(f"Loading {scale}x implant mask from {hdf5_root}/masks/{scale}x/{sample}.h5") implant_file = h5py.File(f"{hdf5_root}/masks/{scale}x/{sample}.h5",'r') - implant = implant_file["implant/mask"][:] + implant = implant_file["implant/mask"][:].astype(np.uint8) voxel_size = implant_file["implant"].attrs["voxel_size"] implant_file.close() - + if verbose >= 1: print(f"Loading {scale}x voxels from {binary_root}/voxels/{scale}x/{sample}.uint16") voxels = np.fromfile(f"{binary_root}/voxels/{scale}x/{sample}.uint16",dtype=np.uint16).reshape(implant.shape) + plt.imshow(voxels[voxels.shape[0]//2,:,:]); plt.savefig(f'{image_output_dir}/voxels-sanity-xy.png') + plt.imshow(voxels[:,voxels.shape[0]//2,:]); plt.savefig(f'{image_output_dir}/voxels-sanity-xz.png') + plt.imshow(voxels[:,:,voxels.shape[0]//2]); plt.savefig(f'{image_output_dir}/voxels-sanity-yz.png') + nz,ny,nx = implant.shape ### STEP 1: COMPUTE IMPLANT PRINCIPAL AXES FRAME OF REFERENCE ## STEP1A: DIAGONALIZE MOMENT OF INTERTIA MATRIX TO GET PRINCIPAL AXES cm = np.array(center_of_mass(implant)) # in downsampled-voxel index coordinates if verbose >= 1: print(f"Center of mass is: {cm}") - IM = np.array(inertia_matrix(implant,cm)).reshape(3,3) + IM = np.array(inertia_matrix(implant,cm)).reshape(3,3) ls,E = la.eigh(IM) ## STEP 1B: PRINCIPAL AXES ARE ONLY DEFINED UP TO A SIGN. @@ -318,7 +308,7 @@ def figure_FoR_voxels(name,voxels,debug=True): E[:,0] *= -1 if sample == "770_pag": E[:,2] *= -1 - + ix = np.argsort(np.abs(ls)); ls, E = ls[ix], E[:,ix] UVW = E.T @@ -335,7 +325,7 @@ def figure_FoR_voxels(name,voxels,debug=True): w0 = implant_uvws[:,2].min(); # In {scale}x voxel units w0v = np.array([0,0,w0]) # w-shift to get to center of implant back-plane - + ## 2B: Transform to backplane-centered coordinates in physical units implant_UVWs = (implant_uvws - w0v)*voxel_size # Physical U,V,W-coordinates, relative to implant back-plane center, in micrometers implant_Us,implant_Vs,implant_Ws = implant_UVWs.T # Implant point coordinates @@ -349,7 +339,7 @@ def figure_FoR_voxels(name,voxels,debug=True): for i in tqdm.tqdm(range(len(U_bins)-1),"Cylinder centres as fn of U"): # Everything is in micrometers U0,U1 = U_bins[i], U_bins[i+1] - + slab = implant_UVWs[(implant_Us>=U0) & (implant_Us<=U1)] slab_Us, slab_Vs, slab_Ws = slab.T @@ -360,17 +350,16 @@ def figure_FoR_voxels(name,voxels,debug=True): p1 = np.array([V0,0]) p2 = np.array([V1,0]) - # Will be way faster to + # Will be way faster to c = circle_center(p0,p1,p2) # circle center in VW-coordinates Cs[i] = np.array([(U0+U1)/2, c[0], c[1]]) - Rs[i] = la.norm(p0-c) - + Rs[i] = la.norm(p0-c) - ## 2D: Best circle centers along U forms a helix, due to the winding screw threads. To get the best cylinder, + ## 2D: Best circle centers along U forms a helix, due to the winding screw threads. To get the best cylinder, ## we solve for direction vector u_prime so C(U) = C0 + U*u_prime + e(U) with minimal least square residual error e(U) ## where C0 is the mean of the segment circle centers. - # - # U*u_prime = C(U) - C0 + # + # U*u_prime = C(U) - C0 # # Cs: (N,3) # U: N -> (N,3) @@ -378,10 +367,10 @@ def figure_FoR_voxels(name,voxels,debug=True): C0 = np.mean(Cs,axis=0) u_prime, _,_,_ = la.lstsq(Ub, Cs-C0) u_prime = u_prime[0] - + UVWp = gramschmidt(u_prime,np.array([0,1,0]),np.array([0,0,1])) u_prime, v_prime, w_prime = UVWp # U',V',W' in U,V,W coordinates - + c1 = C0 + implant_Us.min()*u_prime c2 = C0 + implant_Us.max()*u_prime cp = (c1+c2)/2 @@ -393,30 +382,28 @@ def UVW2xyz(p): C1, C2, Cp = UVW2xyz(c1), UVW2xyz(c2), UVW2xyz(cp) - - implant_length = (implant_Us.max()-implant_Us.min()) implant_radius = Rs.max() implant_length_voxels = implant_length/voxel_size implant_radius_voxels = implant_radius/voxel_size - + figure_FoR_cylinder(verbose >= 2) ### 3: In the cylinder coordinates, find radii and angle ranges to fill in the "holes" in the implant and make it solid ### (More robust than closing operations, as we don't want to effect the screw threads). ## 3A: Transform to implant cylinder coordinates - implant_UVWps = (implant_UVWs - cp) @ UVWp # We now transform to fully screw aligned coordinates with screw center origin + implant_UVWps = (implant_UVWs - cp) @ UVWp # We now transform to fully screw aligned coordinates with screw center origin implant_Ups, implant_Vps, implant_Wps = implant_UVWps.T - Up_min, Up_max = implant_Ups.min(), implant_Ups.max() + Up_min, Up_max = implant_Ups.min(), implant_Ups.max() Vp_min, Vp_max = implant_Vps.min(), implant_Vps.max() Wp_min, Wp_max = implant_Wps.min(), implant_Wps.max() #TODO: Local circle figure (instead of showing global fit on local slice, which isn't snug) bbox_uvwp = [Up_min,Up_max,Vp_min,Vp_max,Wp_min,Wp_max] - figure_FoR_circle("prime-circle",Cp*voxel_size,v_vec,w_vec,implant_radius,bbox_uvwp,verbose >= 2) + figure_FoR_circle("prime-circle",Cp*voxel_size,v_vec,w_vec,implant_radius,bbox_uvwp,verbose >= 2) ## 3B: Profile of radii and angles implant_thetas = np.arctan2(implant_Vps,implant_Wps) @@ -441,8 +428,8 @@ def UVW2xyz(p): zyxs = coordinate_image(implant.shape) uvws = (zyxs - cm) @ E # raw voxel-scale relative to center of mass UVWs = (uvws - w0v) * voxel_size # Micrometer scale relative to backplane-center - Us,Vs,Ws = UVWs[...,0], UVWs[...,1], UVWs[...,2] # UVW physical image coordinates - + Us,Vs,Ws = UVWs[...,0], UVWs[...,1], UVWs[...,2] # UVW physical image coordinates + UVWps = (UVWs - cp) @ UVWp # relative to center-of-implant-before-sawing-in-half Ups,Vps,Wps = UVWps[...,0], UVWps[...,1], UVWps[...,2] # U',V',W' physical image coordinates thetas, rs = np.arctan2(Vps,Wps), np.sqrt(Vps**2+Wps**2) # This is the good reference frame for cylindrical coords @@ -457,18 +444,17 @@ def UVW2xyz(p): solid_implant_UVWps = ((((np.array(np.nonzero(solid_quarter)).T - cm) @ E) - w0v)*voxel_size - cp) @ UVWp Up_integrals, Up_bins = np.histogram(solid_implant_UVWps[:,0],200) - figure_FoR_profiles(verbose >= 2) + figure_FoR_profiles(verbose >= 2) figure_FoR_voxels("solid_implant",solid_implant,verbose >= 2) back_mask = (Ws<0) front_mask = largest_cc_of((Ws>50)*(~solid_implant))#*(thetas>=theta_from)*(thetas<=theta_to) # back_part = voxels*back_mask - - front_part = voxels*front_mask - figure_FoR_voxels("back_part", voxels*back_mask, verbose >= 2) - figure_FoR_voxels("front_part",voxels*front_mask, verbose >= 2) + front_part = voxels*front_mask + figure_FoR_voxels("back_part", voxels*back_mask, verbose >= 2) + figure_FoR_voxels("front_part",voxels*front_mask, verbose >= 2) Cp_zyx = Cp[::-1]*voxel_size @@ -489,10 +475,10 @@ def UVW2xyz(p): update_hdf5(f"{output_dir}/{sample}.h5", group_name="implant-FoR", datasets={"UVW":UVW, - "UVWp": UVWp, - "center_of_mass":cm*voxel_size, + "UVWp": UVWp, + "center_of_mass":cm*voxel_size, "center_of_cylinder_UVW": cp, - "UVWp_transform": Muvwp, + "UVWp_transform": Muvwp, "center_of_cylinder_zyx": Cp_zyx, # Cp is in scaled voxel xyz "bounding_box_UVWp": np.array([[implant_Ups.min(),implant_Ups.max()], [implant_Vps.min(),implant_Vps.max()], @@ -502,7 +488,7 @@ def UVW2xyz(p): "theta_range": np.array([theta_from, theta_to]) }, attributes={"backplane_W_shift":w0*voxel_size, - "implant_radius": implant_radius + "implant_radius": implant_radius }, dimensions={ "center_of_mass":"zyx micrometers", @@ -515,7 +501,6 @@ def UVW2xyz(p): chunk_shape=None ) - output_dir = f"{hdf5_root}/masks/{scale}x/" pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) if verbose >= 1: print(f"Saving implant_solid mask to {output_dir}/{sample}.h5") @@ -542,40 +527,35 @@ def UVW2xyz(p): datasets={"mask":front_mask}, attributes={"sample":sample, "scale":scale, "voxel_size":voxel_size}) - if verbose >= 1: print(f"Computing bone region") hist, bins = np.histogram(front_part, 256) hist[0] = 0 peaks, info = signal.find_peaks(hist,height=0.5*hist.max()) - + try: p1, p2 = peaks[np.argsort(info['peak_heights'])[:2]] midpoint = int(round((bins[p1]+bins[p2+1])/2)) # p1 is left-edge of p1-bin, p2+1 is right edge of p2-bin if verbose >= 1: print(f"p1, p2 = ({p1,bins[p1]}), ({p2,bins[p2]}); midpoint = {midpoint}") - - bone_mask1 = front_part > midpoint - closing_diameter, opening_diameter = 400, 300 # micrometers + + bone_mask1 = front_part > midpoint + closing_diameter, opening_diameter = 400, 300 # micrometers closing_voxels = 2*int(round(closing_diameter/(2*voxel_size))) + 1 # Scale & ensure odd length opening_voxels = 2*int(round(opening_diameter/(2*voxel_size))) + 1 # Scale & ensure odd length - + for i in tqdm.tqdm(range(1),f"Closing with sphere of diameter {closing_diameter} micrometers, {closing_voxels} voxels.\n"): bone_region_mask = close_3d(bone_mask1, closing_voxels//2) - + for i in tqdm.tqdm(range(1),f"Opening with sphere of diameter {opening_diameter} micrometers, {opening_voxels} voxels.\n"): bone_region_mask &= ~solid_implant #~open_3d(implant_shell_mask, opening_voxels) bone_region_mask = open_3d(bone_region_mask,opening_voxels//2) - - + bone_region_mask = largest_cc_of(bone_region_mask) except: if verbose >= 1: print(f"Wasnt able to separate into resin and bone region. Assuming all is bone region.") bone_region_mask = front_mask - + if verbose >= 1: print(f"Saving bone_region mask to {output_dir}/{sample}.h5") update_hdf5_mask(f"{output_dir}/{sample}.h5", group_name="bone_region", datasets={"mask":bone_region_mask}, attributes={"sample":sample, "scale":scale, "voxel_size":voxel_size}) - - - From 3c39c269d356b7412e2064ab9a7019f4ce886de2 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 13 Mar 2023 12:05:46 +0100 Subject: [PATCH 126/136] #25 Handled pybind conversion for io --- src/pybind/io-pybind.cc | 37 ++++++++++++++++++++++++------------- src/test/test_io.py | 3 +++ 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/src/pybind/io-pybind.cc b/src/pybind/io-pybind.cc index 060d9d9..0e5d680 100644 --- a/src/pybind/io-pybind.cc +++ b/src/pybind/io-pybind.cc @@ -17,7 +17,18 @@ void load_slice(py::array_t<T> &np_data, const string filename, auto [Nz, Ny, Nx] = shape; auto [oz, oy, ox] = offset; uint64_t flat_offset = oz*Ny*Nx + oy*Nx + ox; + + cout << + Nz << " " << Ny << " " << Nx << " " << + oz << " " << oy << " " << ox << " " << + flat_offset << endl; + NS::load_contiguous_slice<T>(data, filename, flat_offset, data_info.size); + + T checksum = (T) 0; + for (int64_t i = 0; i < data_info.size; i++) + checksum += data[i]; + cout << checksum << " " << sizeof(T) << endl; } template <typename T> @@ -37,17 +48,17 @@ void write_slice(const py::array_t<T> &np_data, PYBIND11_MODULE(io, m) { m.doc() = "I/O functions for handling flat binary format files."; // optional module docstring - m.def("load_slice", &python_api::load_slice<uint8_t>); - m.def("load_slice", &python_api::load_slice<uint16_t>); - m.def("load_slice", &python_api::load_slice<uint32_t>); - m.def("load_slice", &python_api::load_slice<uint64_t>); - m.def("load_slice", &python_api::load_slice<float>); - m.def("load_slice", &python_api::load_slice<double>); - - m.def("write_slice", &python_api::write_slice<uint8_t>); - m.def("write_slice", &python_api::write_slice<uint16_t>); - m.def("write_slice", &python_api::write_slice<uint32_t>); - m.def("write_slice", &python_api::write_slice<uint64_t>); - m.def("write_slice", &python_api::write_slice<float>); - m.def("write_slice", &python_api::write_slice<double>); + m.def("load_slice", &python_api::load_slice<uint8_t>, py::arg("np_data").noconvert(), py::arg("filename"), py::arg("offset"), py::arg("shape")); + m.def("load_slice", &python_api::load_slice<uint16_t>, py::arg("np_data").noconvert(), py::arg("filename"), py::arg("offset"), py::arg("shape")); + m.def("load_slice", &python_api::load_slice<uint32_t>, py::arg("np_data").noconvert(), py::arg("filename"), py::arg("offset"), py::arg("shape")); + m.def("load_slice", &python_api::load_slice<uint64_t>, py::arg("np_data").noconvert(), py::arg("filename"), py::arg("offset"), py::arg("shape")); + m.def("load_slice", &python_api::load_slice<float>, py::arg("np_data").noconvert(), py::arg("filename"), py::arg("offset"), py::arg("shape")); + m.def("load_slice", &python_api::load_slice<double>, py::arg("np_data").noconvert(), py::arg("filename"), py::arg("offset"), py::arg("shape")); + + m.def("write_slice", &python_api::write_slice<uint8_t>, py::arg("np_data").noconvert(), py::arg("filename"), py::arg("offset"), py::arg("shape")); + m.def("write_slice", &python_api::write_slice<uint16_t>, py::arg("np_data").noconvert(), py::arg("filename"), py::arg("offset"), py::arg("shape")); + m.def("write_slice", &python_api::write_slice<uint32_t>, py::arg("np_data").noconvert(), py::arg("filename"), py::arg("offset"), py::arg("shape")); + m.def("write_slice", &python_api::write_slice<uint64_t>, py::arg("np_data").noconvert(), py::arg("filename"), py::arg("offset"), py::arg("shape")); + m.def("write_slice", &python_api::write_slice<float>, py::arg("np_data").noconvert(), py::arg("filename"), py::arg("offset"), py::arg("shape")); + m.def("write_slice", &python_api::write_slice<double>, py::arg("np_data").noconvert(), py::arg("filename"), py::arg("offset"), py::arg("shape")); } \ No newline at end of file diff --git a/src/test/test_io.py b/src/test/test_io.py index 09ad43a..ee16cd8 100644 --- a/src/test/test_io.py +++ b/src/test/test_io.py @@ -32,6 +32,9 @@ def test_dtype(impl, dtype): if os.path.exists(individual_tmp_file): os.remove(individual_tmp_file) data = random(dim_shape, dtype) + + assert (len(np.unique(data)) > 1) # Assert interesting data + partial = dim_size // partial_factor # Write out a new file From 9f06542bde4f6628781ef4e0a7fc0bf8d899459c Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 13 Mar 2023 12:18:54 +0100 Subject: [PATCH 127/136] #25 Fixed pybind implicit conversion for geometry --- src/pybind/geometry-pybind.cc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/pybind/geometry-pybind.cc b/src/pybind/geometry-pybind.cc index ebd09b7..7499275 100644 --- a/src/pybind/geometry-pybind.cc +++ b/src/pybind/geometry-pybind.cc @@ -119,13 +119,13 @@ void cylinder_projection(const np_array<float> &np_edt, // Euclidean Distance PYBIND11_MODULE(geometry, m) { m.doc() = "Voxel Geometry Module"; // optional module docstring - m.def("center_of_mass", &python_api::center_of_mass); - m.def("inertia_matrix", &python_api::inertia_matrix); - m.def("integrate_axes", &python_api::integrate_axes); - m.def("zero_outside_bbox", &python_api::zero_outside_bbox); - m.def("fill_implant_mask", &python_api::fill_implant_mask); - m.def("cylinder_projection", &python_api::cylinder_projection); - m.def("sample_plane", &python_api::sample_plane<uint16_t>); - m.def("sample_plane", &python_api::sample_plane<uint8_t>); - m.def("compute_front_mask", &python_api::compute_front_mask); + m.def("center_of_mass", &python_api::center_of_mass, py::arg("np_voxels")); + m.def("inertia_matrix", &python_api::inertia_matrix, py::arg("np_voxels"), py::arg("cm")); + m.def("integrate_axes", &python_api::integrate_axes, py::arg("np_voxels"), py::arg("x0"), py::arg("v_axis"), py::arg("w_axis"), py::arg("v_min"), py::arg("w_min"), py::arg("output").noconvert()); + m.def("zero_outside_bbox", &python_api::zero_outside_bbox, py::arg("principal_axes"), py::arg("parameter_ranges"), py::arg("cm"), py::arg("np_voxels").noconvert()); + m.def("fill_implant_mask", &python_api::fill_implant_mask, py::arg("implant_mask"), py::arg("voxel_size"), py::arg("bbox"), py::arg("r_fraction"), py::arg("Muvw"), py::arg("solid_implant_mask").noconvert(), py::arg("rsqr_maxs").noconvert(), py::arg("profile").noconvert()); + m.def("cylinder_projection", &python_api::cylinder_projection, py::arg("np_edt"), py::arg("np_Cs"), py::arg("Cs_voxel_size"), py::arg("d_min"), py::arg("d_max"), py::arg("theta_min"), py::arg("theta_max"), py::arg("bbox"), py::arg("Muvw"), py::arg("np_images").noconvert(), py::arg("np_counts").noconvert()); + m.def("sample_plane", &python_api::sample_plane<uint16_t>, py::arg("np_voxels"), py::arg("voxel_size"), py::arg("cm"), py::arg("u_axis"), py::arg("v_axis"), py::arg("bbox"), py::arg("np_plano_samples").noconvert()); + m.def("sample_plane", &python_api::sample_plane<uint8_t>, py::arg("np_voxels"), py::arg("voxel_size"), py::arg("cm"), py::arg("u_axis"), py::arg("v_axis"), py::arg("bbox"), py::arg("np_plano_samples").noconvert()); + m.def("compute_front_mask", &python_api::compute_front_mask, py::arg("np_solid_implant"), py::arg("voxel_size"), py::arg("Muvw"), py::arg("bbox"), py::arg("np_front_mask").noconvert()); } From b0a46b4f4e42378fa3f566b32078c6edc9612733 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 13 Mar 2023 12:20:07 +0100 Subject: [PATCH 128/136] #25 Fixed implicit pybind conversion for morphology --- src/pybind/morphology-pybind.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pybind/morphology-pybind.cc b/src/pybind/morphology-pybind.cc index b8547e7..b91daa7 100644 --- a/src/pybind/morphology-pybind.cc +++ b/src/pybind/morphology-pybind.cc @@ -28,6 +28,6 @@ void morphology_3d_sphere_wrapper( PYBIND11_MODULE(morphology, m) { m.doc() = "Morphology operations."; // optional module docstring - m.def("dilate_3d_sphere", &morphology_3d_sphere_wrapper<std::bit_or<mask_type>, false>); - m.def("erode_3d_sphere", &morphology_3d_sphere_wrapper<std::bit_and<mask_type>, true>); + m.def("dilate_3d_sphere", &morphology_3d_sphere_wrapper<std::bit_or<mask_type>, false>, py::arg("np_voxels"), py::arg("radius"), py::arg("np_result").noconvert()); + m.def("erode_3d_sphere", &morphology_3d_sphere_wrapper<std::bit_and<mask_type>, true>, py::arg("np_voxels"), py::arg("radius"), py::arg("np_result").noconvert()); } \ No newline at end of file From 610e1d5547165ce9ed4e55bc7fbe34b8269de090 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Mon, 13 Mar 2023 12:36:39 +0100 Subject: [PATCH 129/136] #25 Verified steps are correct up until 600 --- proc-steps-checklist.txt | 8 +++--- src/processing_steps/0500_rescale_cupy_bin.py | 26 +++++++++---------- src/pybind/io-pybind.cc | 11 +------- 3 files changed, 18 insertions(+), 27 deletions(-) diff --git a/proc-steps-checklist.txt b/proc-steps-checklist.txt index 30b6929..b8ba576 100644 --- a/proc-steps-checklist.txt +++ b/proc-steps-checklist.txt @@ -1,6 +1,6 @@ 0100 - Haven't checked. Requires ERDA to be set up properly. -0200 - Runs! +0200 - Verified! 0300 - Verified! -0400 - Runs! -0500 - Runs! -0600 - Runs! \ No newline at end of file +0400 - Verified! +0500 - Verified! +0600 - Verified! \ No newline at end of file diff --git a/src/processing_steps/0500_rescale_cupy_bin.py b/src/processing_steps/0500_rescale_cupy_bin.py index f7d5f47..271d17c 100644 --- a/src/processing_steps/0500_rescale_cupy_bin.py +++ b/src/processing_steps/0500_rescale_cupy_bin.py @@ -13,9 +13,9 @@ pinned_mempool.free_all_blocks() if __name__ == "__main__": - sample, image, chunk_size, dtype, verbose = commandline_args({"sample" : "<required>", + sample, image, chunk_size, dtype, verbose = commandline_args({"sample" : "<required>", "image" : "voxels", - "chunk_size" : 32*2, + "chunk_size" : 32*2, "dtype" : "uint16", "verbose" : 1}) @@ -29,13 +29,13 @@ if verbose >= 1: print(f"Input metadata from {input_meta}") if verbose >= 1: print(f"Input flat binary {dtype} data from {input_bin}") if verbose >= 1: print(f"Output flat binary {dtype} data to {output_root}/[1,2,4,8,16,32]x/{sample}.{dtype}") - + meta_h5 = h5py.File(input_meta, 'r') full_Nz, Ny, Nx = meta_h5['voxels'].shape shifts = meta_h5['volume_matching_shifts'][:] # TODO: Do this in a neater way Nz = full_Nz - np.sum(shifts) - meta_h5.close() - + meta_h5.close() + if verbose >= 1: print(f"Downscaling from 1x {(Nz,Ny,Nx)} to 2x {(Nz//2,Ny//2,Nx//2)}") if(chunk_size % 32): if verbose >= 1: print(f"Chunk size {chunk_size} is invalid: must be divisible by 32.") @@ -49,12 +49,12 @@ voxels4x = np.empty((Nz//4,Ny//4,Nx//4),dtype=T) voxels8x = np.empty((Nz//8,Ny//8,Nx//8),dtype=T) voxels16x = np.empty((Nz//16,Ny//16,Nx//16),dtype=T) - voxels32x = np.empty((Nz//32,Ny//32,Nx//32),dtype=T) - voxels = [voxels2x,voxels4x,voxels8x,voxels16x,voxels32x]; - + voxels32x = np.empty((Nz//32,Ny//32,Nx//32),dtype=T) + voxels = [voxels2x,voxels4x,voxels8x,voxels16x,voxels32x]; + for z in tqdm.tqdm(range(0,Nz,chunk_size),f"{sample}: Reading and scaling {chunk_size}-layer chunks"): zend = min(z+chunk_size, Nz) - chunk_items = (zend-z) * Ny * Nx + chunk_items = (zend-z) * Ny * Nx # # CHECK: Is a simple fread faster than numpy fromfile? # voxels1x_np = np.empty((zend-z,Ny,Nx),dtype=T); # load_slice(voxels1x_np,input_bin,(z,0,0),voxels1x_np.shape) @@ -65,7 +65,7 @@ except: if verbose >= 1: print(f"Read failed. chunk_items = {chunk_items} = {(zend-z)*Ny*Nx}, z = {z}, zend-z = {zend-z}") sys.exit(-1) - + # if verbose >= 1: print(f"Used GPU memory: {mempool.used_bytes()//1000000}MB out of {mempool.total_bytes()/1000000}MB. {pinned_mempool.n_free_blocks()} free pinned blocks.") voxels2x_chunk = downsample2x(voxels1x_chunk) del voxels1x_chunk @@ -91,11 +91,11 @@ del voxels8x_chunk del voxels16x_chunk del voxels32x_chunk - + if verbose >= 1: print(f"Allocating {(Nz//2,Ny//2,Nx//2)}={Nz//2*Ny//2*Nx//2} {dtype} for voxels2x on GPU") - + for i in tqdm.tqdm(range(len(scales)),f"{sample}: Downscaling to all smaller scales: {scales[2:]}"): output_dir = f"{output_root}/{scales[i]}x/" - pathlib.Path(f"{output_dir}").mkdir(parents=True, exist_ok=True) + pathlib.Path(f"{output_dir}").mkdir(parents=True, exist_ok=True) if verbose >= 1: print(f"Writing out scale {scales[i]}x {(voxels[i].shape)} to {output_dir}/{sample}.uint16") voxels[i].tofile(f"{output_dir}/{sample}.uint16") diff --git a/src/pybind/io-pybind.cc b/src/pybind/io-pybind.cc index 0e5d680..8da8e5d 100644 --- a/src/pybind/io-pybind.cc +++ b/src/pybind/io-pybind.cc @@ -18,17 +18,7 @@ void load_slice(py::array_t<T> &np_data, const string filename, auto [oz, oy, ox] = offset; uint64_t flat_offset = oz*Ny*Nx + oy*Nx + ox; - cout << - Nz << " " << Ny << " " << Nx << " " << - oz << " " << oy << " " << ox << " " << - flat_offset << endl; - NS::load_contiguous_slice<T>(data, filename, flat_offset, data_info.size); - - T checksum = (T) 0; - for (int64_t i = 0; i < data_info.size; i++) - checksum += data[i]; - cout << checksum << " " << sizeof(T) << endl; } template <typename T> @@ -41,6 +31,7 @@ void write_slice(const py::array_t<T> &np_data, auto [Nz, Ny, Nx] = shape; auto [oz, oy, ox] = offset; uint64_t flat_offset = oz*Ny*Nx + oy*Nx + ox; + NS::write_contiguous_slice<T>(data, filename, flat_offset, data_info.size); } From e8172006f40308aa049118ce9e206a727a721772 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Tue, 25 Apr 2023 15:12:52 +0200 Subject: [PATCH 130/136] Explicit type conversions --- src/lib/cpp/cpu_seq/geometry.cc | 99 ++++++++++++++++----------------- 1 file changed, 49 insertions(+), 50 deletions(-) diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index 5c0acfa..3c35f5e 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -18,7 +18,6 @@ array<real_t, 3> center_of_mass(const input_ndarray<mask_type> &mask) { uint64_t total_mass = 0, cmz = 0, cmy = 0, cmx = 0; BLOCK_BEGIN(mask, reduction(+:total_mass,cmz,cmy,cmx)); { - // TODO James approves; now RUN! mask_type m = mask_buffer[flat_index]; @@ -85,9 +84,9 @@ void cylinder_projection(const input_ndarray<float> edt, // Euclidean Distance const auto& [U_min, U_max, V_min, V_max, W_min, W_max] = bbox; real_t - edz = edt_Nz / real_t(C_Nz), - edy = edt_Ny / real_t(C_Ny), - edx = edt_Nx / real_t(C_Nx); + //edz = real_t(edt_Nz) / real_t(C_Nz), + edy = real_t(edt_Ny) / real_t(C_Ny), + edx = real_t(edt_Nx) / real_t(C_Nx); //printf("Segmenting from %g to %g micrometers distance of implant.\n",d_min,d_max); //printf("Bounding box is [U_min,U_max,V_min,V_max,W_min,W_max] = [[%g,%g],[%g,%g],[%g,%g]]\n", @@ -125,9 +124,9 @@ void cylinder_projection(const input_ndarray<float> edt, // Euclidean Distance // Index into local block const int64_t Xl = (k / (C_Ny*C_Nz)), Yl = (k / C_Nz) % C_Ny, Zl = k % C_Nz; // Index into local edt block. Note EDT has 1-slice padding top+bottom - const float x = (Xl+1)*edx, y = Yl*edy, z = Zl*edy; + const float x = float(Xl+1)*edx, y = float(Yl)*edy, z = float(Zl)*edy; - if (x > block_height) { + if (x > float(block_height)) { printf("Block number k=%ld.\nX,Y,Z=%ld,%ld,%ld\nXl,Yl,Zl=%ld,%ld,%ld\nx,y,z=%.2f, %.2f, %.2f\n",k,X,Y,Z,Xl,Yl,Zl,x,y,z); abort(); } @@ -136,7 +135,7 @@ void cylinder_projection(const input_ndarray<float> edt, // Euclidean Distance real_t distance = resample2x2x2<float>(edt_block, {this_edt_length/(edt_Ny*edt_Nz),edt_Ny,edt_Nz}, {x,y,z}); if (distance > d_min && distance <= d_max) { // TODO: and W>w_min - array<real_t,4> Xs = {X*voxel_size, Y*voxel_size, Z*voxel_size, 1}; + array<real_t,4> Xs = {real_t(X)*voxel_size, real_t(Y)*voxel_size, real_t(Z)*voxel_size, 1}; auto [U,V,W,c] = hom_transform(Xs,Muvw); n_shell ++; @@ -147,10 +146,10 @@ void cylinder_projection(const input_ndarray<float> edt, // Euclidean Distance if (theta >= theta_min && theta <= theta_max) { n_shell_bbox++; - ssize_t theta_i = floor( (theta-theta_min) * (n_theta-1)/(theta_max-theta_min) ); - ssize_t U_i = floor( (U - U_min) * (n_U -1)/( U_max- U_min) ); + ssize_t theta_i = ssize_t(floor( (theta-theta_min) * real_t(n_theta-1)/(theta_max-theta_min) )); + ssize_t U_i = ssize_t(floor( (U - U_min) * real_t(n_U -1)/( U_max- U_min) )); - real_t p = C_buffer[k]/255.; + real_t p = real_t(C_buffer[k])/255.f; assert(theta >= theta_min); assert(theta <= theta_max); @@ -307,6 +306,46 @@ array<real_t,9> inertia_matrix(const input_ndarray<mask_type> &mask, const array }; } +void integrate_axes(const input_ndarray<mask_type> &mask, + const array<real_t,3> &x0, + const array<real_t,3> &v_axis, + const array<real_t,3> &w_axis, + const real_t v_min, const real_t w_min, + output_ndarray<uint64_t> output) { + UNPACK_NUMPY(mask); + ssize_t Nv = output.shape[0], Nw = output.shape[1]; + uint64_t *output_data = output.data; + + // TODO: Check v_axis & w_axis projections to certify bounds and get rid of runtime check + #pragma acc data copy(output_data[:Nv*Nw]) copyin(x0, v_axis, w_axis, v_min, w_min) + { + BLOCK_BEGIN(mask, ) { + + mask_type voxel = mask_buffer[flat_index]; + if (voxel != 0) { + real_t xs[3] = { + real_t(x) - x0[0], + real_t(y) - x0[1], + real_t(z) - x0[2] + }; + + real_t + v = dot(xs, v_axis), + w = dot(xs, w_axis); + int64_t + i_v = int64_t(round(v - v_min)), + j_w = int64_t(round(w - w_min)); + + if (i_v >= 0 && j_w >= 0 && i_v < Nv && j_w < Nw) { + ATOMIC() + output_data[i_v*Nw + j_w] += voxel; + } + } + + BLOCK_END() } + } +} + template <typename T> void sample_plane(const input_ndarray<T> &voxels, const real_t voxel_size, // In micrometers @@ -361,46 +400,6 @@ void sample_plane(const input_ndarray<T> &voxels, } } -void integrate_axes(const input_ndarray<mask_type> &mask, - const array<real_t,3> &x0, - const array<real_t,3> &v_axis, - const array<real_t,3> &w_axis, - const real_t v_min, const real_t w_min, - output_ndarray<uint64_t> output) { - UNPACK_NUMPY(mask); - ssize_t Nv = output.shape[0], Nw = output.shape[1]; - uint64_t *output_data = output.data; - - // TODO: Check v_axis & w_axis projections to certify bounds and get rid of runtime check - #pragma acc data copy(output_data[:Nv*Nw]) copyin(x0, v_axis, w_axis, v_min, w_min) - { - BLOCK_BEGIN(mask, ) { - - mask_type voxel = mask_buffer[flat_index]; - if (voxel != 0) { - real_t xs[3] = { - real_t(x) - x0[0], - real_t(y) - x0[1], - real_t(z) - x0[2] - }; - - real_t - v = dot(xs, v_axis), - w = dot(xs, w_axis); - int64_t - i_v = int64_t(round(v - v_min)), - j_w = int64_t(round(w - w_min)); - - if (i_v >= 0 && j_w >= 0 && i_v < Nv && j_w < Nw) { - ATOMIC() - output_data[i_v*Nw + j_w] += voxel; - } - } - - BLOCK_END() } - } -} - // NB: xyz are in indices, not micrometers void zero_outside_bbox(const array<real_t,9> &principal_axes, const array<real_t,6> ¶meter_ranges, From 8dfe3073e40fb6961aa178ff85318d4d8a2f46b3 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Tue, 25 Apr 2023 15:13:44 +0200 Subject: [PATCH 131/136] Added debug image generation --- src/processing_steps/0700_implant_FoR.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/processing_steps/0700_implant_FoR.py b/src/processing_steps/0700_implant_FoR.py index b311755..81943e8 100644 --- a/src/processing_steps/0700_implant_FoR.py +++ b/src/processing_steps/0700_implant_FoR.py @@ -176,6 +176,11 @@ def figure_FoR_circle(name,center,v_vec,w_vec,radius,implant_bbox,debug=True): tuple(center), tuple(v_vec), tuple(w_vec), sample_bbox,sample) + print (voxel_size, cm, v_vec, w_vec, sample_bbox) + plt.imshow(sample) + plt.savefig(f'{image_output_dir}/sample_plane_check.png') + plt.clf() + fig = plt.figure() ax = fig.add_subplot(111) ax.imshow(sample.T[::-1], extent=sample_bbox,cmap='RdYlBu') @@ -283,6 +288,9 @@ def figure_FoR_voxels(name,voxels,debug=True): if verbose >= 1: print(f"Loading {scale}x voxels from {binary_root}/voxels/{scale}x/{sample}.uint16") voxels = np.fromfile(f"{binary_root}/voxels/{scale}x/{sample}.uint16",dtype=np.uint16).reshape(implant.shape) + plt.imshow(implant[implant.shape[0]//2,:,:]); plt.savefig(f'{image_output_dir}/implant-sanity-xy.png') + plt.imshow(implant[:,implant.shape[0]//2,:]); plt.savefig(f'{image_output_dir}/implant-sanity-xz.png') + plt.imshow(implant[:,:,implant.shape[0]//2]); plt.savefig(f'{image_output_dir}/implant-sanity-yz.png') plt.imshow(voxels[voxels.shape[0]//2,:,:]); plt.savefig(f'{image_output_dir}/voxels-sanity-xy.png') plt.imshow(voxels[:,voxels.shape[0]//2,:]); plt.savefig(f'{image_output_dir}/voxels-sanity-xz.png') plt.imshow(voxels[:,:,voxels.shape[0]//2]); plt.savefig(f'{image_output_dir}/voxels-sanity-yz.png') From 2b7fbe14dcee238860b478e6e3c1ce245b6d98e8 Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Tue, 25 Apr 2023 15:14:47 +0200 Subject: [PATCH 132/136] Added note about how to profile OpenACC. Should not be here in the future --- src/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Makefile b/src/Makefile index f64e876..57bc345 100644 --- a/src/Makefile +++ b/src/Makefile @@ -50,6 +50,8 @@ $(foreach PLATFORM, $(PLATFORMS), \ ) \ ) +# TODO lightweight openacc profiling can be done with the environment variable NV_ACC_TIME=1 !!! + test: all $(PYTHON) -m pytest -n auto test From d402b9778104152d2e8408e612af3550c25a925a Mon Sep 17 00:00:00 2001 From: Carl Johnsen <carl-johannes@di.ku.dk> Date: Tue, 25 Apr 2023 15:16:27 +0200 Subject: [PATCH 133/136] #25 Changed in_bbox to work on tuples, rather than three parameters --- src/lib/cpp/include/geometry.hh | 55 ++++++++++++++------------------- 1 file changed, 24 insertions(+), 31 deletions(-) diff --git a/src/lib/cpp/include/geometry.hh b/src/lib/cpp/include/geometry.hh index 937c5d2..307f7d9 100644 --- a/src/lib/cpp/include/geometry.hh +++ b/src/lib/cpp/include/geometry.hh @@ -27,45 +27,38 @@ inline vector4 hom_transform(const vector4 &x, const matrix4x4 &M) { return c; } -inline bool in_bbox(float U, float V, float W, const std::array<float, 6> &bbox) { - const auto& [U_min, U_max, V_min, V_max, W_min, W_max] = bbox; - - bool inside = - U >= U_min && - U <= U_max && - V >= V_min && - V <= V_max && - W >= W_min && - W <= W_max; - - // printf("in_bbox: (%.1f,%.1f,%.1f) \in ([%.1f,%.1f],[%.1f,%.1f],[%.1f,%.1f]) == %d\n", - // U,V,W,U_min,U_max,V_min,V_max,U_min,U_max,inside); - - return inside; +inline bool in_bbox(const std::array<float, 3> index, const std::array<float, 6> &bbox) { + const auto& [z, y, x] = index; + const auto& [zmin, zmax, ymin, ymax, xmin, xmax] = bbox; + + return + z >= zmin && z <= zmax && + y >= ymin && y <= ymax && + x >= xmin && x <= xmax; } template <typename T> -float resample2x2x2(const T *voxels, - const array<ssize_t, 3> &shape, - const array<float, 3> &X) { - auto [Nx,Ny,Nz] = shape; +float resample2x2x2(const T *voxels, + const std::array<ssize_t, 3> &shape, + const std::array<float, 3> &index) { + auto [Nz,Ny,Nx] = shape; - if (!in_bbox(X[0], X[1], X[2], {0.5f, float(Nx)-0.5f, 0.5f, float(Ny)-0.5f, 0.5f, float(Nz)-0.5f})) { - uint64_t voxel_index = uint64_t(floor(X[0]))*Ny*Nz + uint64_t(floor(X[1]))*Ny + uint64_t(floor(X[2])); + if (!in_bbox(index, {0.5f, float(Nx)-0.5f, 0.5f, float(Ny)-0.5f, 0.5f, float(Nz)-0.5f})) { + uint64_t voxel_index = uint64_t(floor(index[0]))*Nz*Ny + uint64_t(floor(index[1]))*Nx + uint64_t(floor(index[2])); return voxels[voxel_index]; } - float Xfrac[2][3]; // {Xminus[3], Xplus[3]} - int64_t Xint[2][3]; // {Iminus[3], Iplus[3]} + float Ifrac[2][3]; // {Xminus[3], Xplus[3]} + int64_t Iint[2][3]; // {Iminus[3], Iplus[3]} float value = 0; for (int i = 0; i < 3; i++) { float Iminus, Iplus; - Xfrac[0][i] = 1-modf(X[i]-0.5f, &Iminus); // 1-{X[i]-1/2}, floor(X[i]-1/2) - Xfrac[1][i] = modf(X[i]+0.5f, &Iplus); // {X[i]+1/2}, floor(X[i]+1/2) + Ifrac[0][i] = 1-std::modf(index[i]-0.5f, &Iminus); // 1-{X[i]-1/2}, floor(X[i]-1/2) + Ifrac[1][i] = std::modf(index[i]+0.5f, &Iplus); // {X[i]+1/2}, floor(X[i]+1/2) - Xint[0][i] = (int64_t) Iminus; - Xint[1][i] = (int64_t) Iplus; + Iint[0][i] = (int64_t) Iminus; + Iint[1][i] = (int64_t) Iplus; } for (int ijk = 0; ijk <= 7; ijk++) { @@ -74,8 +67,8 @@ float resample2x2x2(const T *voxels, for (int axis = 0; axis < 3; axis++) { // x-1/2 or x+1/2 int pm = (ijk >> axis) & 1; - IJK[axis] = Xint[pm][axis]; - weight *= Xfrac[pm][axis]; + IJK[axis] = Iint[pm][axis]; + weight *= Ifrac[pm][axis]; } auto [I,J,K] = IJK; @@ -87,11 +80,11 @@ float resample2x2x2(const T *voxels, // printf("(I,J,K) = (%ld,%ld,%ld), (Nx,Ny,Nz) = (%ld,%ld,%ld)\n",I,J,K,Nx,Ny,Nz); // abort(); // } - uint64_t voxel_index = I*Ny*Nz+J*Ny+K; + uint64_t voxel_index = I*Ny*Nx + J*Nx + K; //assert(I>=0 && J>=0 && K>=0); //assert(I<Nx && J<Ny && K<Nz); float voxel = (float) voxels[voxel_index]; - value += voxel*weight; + value += voxel * weight; } return value; From 2176297e0a83d9ee24fcc050e9f34d9e6c20319d Mon Sep 17 00:00:00 2001 From: James Avery <avery@nbi.ku.dk> Date: Tue, 25 Apr 2023 15:41:00 +0200 Subject: [PATCH 134/136] Fix link path --- src/processing_steps/1400_rescale_cupy_bin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/processing_steps/1400_rescale_cupy_bin.py b/src/processing_steps/1400_rescale_cupy_bin.py index ec15bd3..d68be24 120000 --- a/src/processing_steps/1400_rescale_cupy_bin.py +++ b/src/processing_steps/1400_rescale_cupy_bin.py @@ -1 +1 @@ -processing_steps/0500_rescale_cupy_bin.py \ No newline at end of file +0500_rescale_cupy_bin.py \ No newline at end of file From 3ecc845abf768975b2e23bae8091af9c8e465052 Mon Sep 17 00:00:00 2001 From: James Avery <avery@nbi.ku.dk> Date: Tue, 25 Apr 2023 15:52:47 +0200 Subject: [PATCH 135/136] in_bbox predicate now takes a std::array<real_t,3> for coordinates. --- src/lib/cpp/cpu_seq/geometry.cc | 8 ++++---- src/lib/cpp/gpu/geometry.cc | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/lib/cpp/cpu_seq/geometry.cc b/src/lib/cpp/cpu_seq/geometry.cc index 3c35f5e..fc7c45f 100644 --- a/src/lib/cpp/cpu_seq/geometry.cc +++ b/src/lib/cpp/cpu_seq/geometry.cc @@ -140,7 +140,7 @@ void cylinder_projection(const input_ndarray<float> edt, // Euclidean Distance n_shell ++; // printf("distance = %.1f, U,V,W = %.2f,%.2f,%.2f\n",distance,U,V,W); - if (in_bbox(U,V,W,bbox)) { + if (in_bbox({{U,V,W}},bbox)) { real_t theta = atan2(V,W); if (theta >= theta_min && theta <= theta_max) { @@ -218,7 +218,7 @@ void fill_implant_mask(const input_ndarray<mask_type> mask, int U_i = int(floor((U - U_min) * real_t(n_segments-1) / (U_max - U_min))); // if (U_i >= 0 && U_i < n_segments) { - if ( in_bbox(U, V, W, bbox) ) { + if ( in_bbox({{U, V, W}}, bbox) ) { rsqr_maxs_d[U_i] = max(rsqr_maxs_d[U_i], float(r_sqr)); theta_min = min(theta_min, theta); theta_max = max(theta_max, theta); @@ -388,8 +388,8 @@ void sample_plane(const input_ndarray<T> &voxels, // printf("u,v = %g,%g -> %.1f,%.1f,%.1f -> %d, %d, %d\n",u,v,X,Y,Z,int(round(x)),int(round(y)),int(round(z))); T value = 0; - std::array<float, 6> local_bbox = {0.5f, float(voxels_Nx)-0.5f, 0.5f, float(voxels_Ny)-0.5f, 0.5f, float(voxels_Nz)-0.5f}; - if (in_bbox(x,y,z, local_bbox)) + std::array<float, 6> local_bbox = {0.5f, float(voxels_Nx)-0.5f, 0.5f, float(voxels_Ny)-0.5f, 0.5f, float(voxels_Nz)-0.5f}; + if (in_bbox({{x,y,z}}, local_bbox)) value = (T) round(resample2x2x2<T>(voxels.data, {voxels_Nx, voxels_Ny, voxels_Nz}, {x, y, z})); // else // fprintf(stderr,"Sampling outside image: x,y,z = %.1f,%.1f,%.1f, Nx,Ny,Nz = %ld,%ld,%ld\n",x,y,z,Nx,Ny,Nz); diff --git a/src/lib/cpp/gpu/geometry.cc b/src/lib/cpp/gpu/geometry.cc index b76db65..230ddc0 100644 --- a/src/lib/cpp/gpu/geometry.cc +++ b/src/lib/cpp/gpu/geometry.cc @@ -82,7 +82,7 @@ void fill_implant_mask(const input_ndarray<mask_type> mask, int U_i = int(floor((U - U_min) * real_t(n_segments-1) / (U_max - U_min))); - if ( in_bbox(U,V,W,bbox) ) { + if ( in_bbox({{U,V,W}},bbox) ) { //#pragma acc atomic update rsqr_maxs_d[U_i] = max(rsqr_maxs_d[U_i], float(r_sqr)); theta_min = min(theta_min, theta); @@ -175,4 +175,4 @@ void zero_outside_bbox(const array<real_t,9> &principal_axes, return cpu_seq::zero_outside_bbox(principal_axes, parameter_ranges, cm, voxels); } -} \ No newline at end of file +} From bfe1508f1e4dde101947e0ef2a0886b1bbf3c951 Mon Sep 17 00:00:00 2001 From: James Avery <avery@nbi.ku.dk> Date: Tue, 25 Apr 2023 16:03:50 +0200 Subject: [PATCH 136/136] Added pip_install target to install python requirements --- src/Makefile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Makefile b/src/Makefile index 57bc345..fdc1bd9 100644 --- a/src/Makefile +++ b/src/Makefile @@ -37,7 +37,7 @@ else $(info OpenACC compiler nvc++ not found. Compiling without.) endif -all: $(TARGETS) +all: $(TARGETS) pip_install define GEN_RULE $(CPP_FOLDER)/$(PLATFORM)/$(LIB)$(PYBIND_SUFFIX): pybind/$(LIB)-pybind.cc $(CPP_FOLDER)/$(PLATFORM)/$(LIB).cc $(CPP_FOLDER)/include/*.hh @@ -52,6 +52,9 @@ $(foreach PLATFORM, $(PLATFORMS), \ # TODO lightweight openacc profiling can be done with the environment variable NV_ACC_TIME=1 !!! +pip_install: + $(PYTHON) -m pip install -r requirements.txt + test: all $(PYTHON) -m pytest -n auto test @@ -59,4 +62,4 @@ test_%: test/test_%.py all $(PYTHON) -m pytest -n auto $< clean: - rm -rf $(CLEANUP) __pycache__ test/__pycache__ .pytest_cache lib/cpp/**/*.so \ No newline at end of file + rm -rf $(CLEANUP) __pycache__ test/__pycache__ .pytest_cache lib/cpp/**/*.so