Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reorder Jacobian calculation #707

Merged
merged 13 commits into from
Feb 11, 2025
Merged
7 changes: 6 additions & 1 deletion .github/workflows/docker_and_coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,14 @@ jobs:

- name: Run tests in container
# only run this if we are not running coverage tests or have built the cuda files
if: matrix.dockerfile != 'Dockerfile.coverage' && matrix.dockerfile != 'Dockerfile.nvhpc'
if: matrix.dockerfile != 'Dockerfile.coverage' && matrix.dockerfile != 'Dockerfile.nvhpc' && matrix.dockerfile != 'Dockerfile.llvm'
run: docker run --name test-container -t micm bash -c 'make test ARGS="--rerun-failed --output-on-failure -j8"'

- name: Run tests using LLVM in container
# excludes some very expensive memcheck tests
if: matrix.dockerfile == 'Dockerfile.llvm'
run: docker run --name test-container -t micm bash -c 'make test ARGS="--rerun-failed --output-on-failure -j8 -E \"memcheck_jit_linear_solver|memcheck_analytical_rosenbrock_integration\""'

- name: Run coverage tests in container
if: matrix.dockerfile == 'Dockerfile.coverage'
run: docker run --name test-container -t micm bash -c 'make coverage ARGS="--rerun-failed --output-on-failure -j8"'
Expand Down
21 changes: 12 additions & 9 deletions .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ jobs:
runs-on: windows-2019
strategy:
matrix:
build_type: [Release]
architecture: [x64]

steps:
Expand All @@ -27,15 +28,15 @@ jobs:
version: 12.2.0 # https://github.com/egor-tensin/setup-mingw/issues/14

- name: Run Cmake
run: cmake -S . -B build -G "MinGW Makefiles"
run: cmake -S . -B build -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -G "MinGW Makefiles"

- name: Build
run: cmake --build build --parallel 10
run: cmake --build build --config ${{ matrix.build_type }} --parallel 10

- name: Run tests
run: |
cd build
ctest -C Debug --rerun-failed --output-on-failure . --verbose
ctest -C ${{ matrix.build_type }} --rerun-failed --output-on-failure . --verbose

msvc2022:
runs-on: windows-2022
Expand All @@ -59,31 +60,33 @@ jobs:
continue-on-error: true
strategy:
matrix:
build_type: [Release]
version: [11, 12, 13, 14, 15]

steps:
- uses: actions/checkout@v4
- name: Install Clang
run: curl -fsSL -o LLVM${{ matrix.version }}.exe https://github.com/llvm/llvm-project/releases/download/llvmorg-${{ matrix.version }}.0.0/LLVM-${{ matrix.version }}.0.0-win64.exe ; 7z x LLVM${{ matrix.version }}.exe -y -o"C:/Program Files/LLVM"
- name: Run CMake
run: cmake -S . -B build -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang++.exe" -G"MinGW Makefiles"
run: cmake -S . -B build -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang++.exe" -G"MinGW Makefiles"
- name: Build
run: cmake --build build --parallel 10
run: cmake --build build --config ${{ matrix.build_type }} --parallel 10
- name: Test
run: cd build ; ctest -j 10 -C Debug --exclude-regex "test-unicode" --output-on-failure
run: cd build ; ctest -j 10 -C ${{ matrix.build_type }} --exclude-regex "test-unicode" --output-on-failure

clang-cl-11:
runs-on: windows-2019
continue-on-error: true
strategy:
matrix:
build_type: [Release]
architecture: [x64]

steps:
- uses: actions/checkout@v4
- name: Run CMake
run: cmake -S . -B build -G "Visual Studio 16 2019" -A ${{ matrix.architecture }} -T ClangCL
run: cmake -S . -B build -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -G "Visual Studio 16 2019" -A ${{ matrix.architecture }} -T ClangCL
- name: Build
run: cmake --build build --config Debug --parallel 10
run: cmake --build build --config ${{ matrix.build_type }} --parallel 10
- name: Test
run: cd build ; ctest -j 10 -C Debug --exclude-regex "test-unicode" --output-on-failure
run: cd build ; ctest -j 10 -C ${{ matrix.build_type }} --exclude-regex "test-unicode" --output-on-failure
2 changes: 1 addition & 1 deletion docker/Dockerfile.nvhpc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ RUN mkdir /build \
-D MICM_ENABLE_CUDA=ON \
-D MICM_GPU_TYPE="a100" \
../micm \
&& make
&& make -j 8

WORKDIR /build
7 changes: 3 additions & 4 deletions include/micm/cuda/process/cuda_process_set.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,9 @@ namespace micm
/// members of class "CudaProcessSet" on the device
void FreeConstData(ProcessSetParam& devstruct);

/// This is the function that will copy the "jacobian_flat_id"
/// of class "ProcessSet" to the device, after the matrix
/// structure is known
void CopyJacobiFlatId(ProcessSetParam& hoststruct, ProcessSetParam& devstruct);
/// This is the function that will copy the information needed
/// to calculated Jacobian terms to the device
void CopyJacobianParams(ProcessSetParam& hoststruct, ProcessSetParam& devstruct);

/// This is the host function that will call the CUDA kernel
/// to calculate the forcing terms
Expand Down
20 changes: 19 additions & 1 deletion include/micm/cuda/process/cuda_process_set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,29 @@ namespace micm
micm::ProcessSet::SetJacobianFlatIds(matrix);

ProcessSetParam hoststruct;
std::vector<ProcessInfoParam> jacobian_process_info(this->jacobian_process_info_.size());
std::size_t i_process = 0;
for (const auto & process_info : this->jacobian_process_info_)
{
jacobian_process_info[i_process].process_id_ = process_info.process_id_;
jacobian_process_info[i_process].independent_id_ = process_info.independent_id_;
jacobian_process_info[i_process].number_of_dependent_reactants_ = process_info.number_of_dependent_reactants_;
jacobian_process_info[i_process].number_of_products_ = process_info.number_of_products_;
++i_process;
}
hoststruct.jacobian_process_info_ = jacobian_process_info.data();
hoststruct.jacobian_process_info_size_ = jacobian_process_info.size();
hoststruct.jacobian_reactant_ids_ = this->jacobian_reactant_ids_.data();
hoststruct.jacobian_reactant_ids_size_ = this->jacobian_reactant_ids_.size();
hoststruct.jacobian_product_ids_ = this->jacobian_product_ids_.data();
hoststruct.jacobian_product_ids_size_ = this->jacobian_product_ids_.size();
hoststruct.jacobian_yields_ = this->jacobian_yields_.data();
hoststruct.jacobian_yields_size_ = this->jacobian_yields_.size();
hoststruct.jacobian_flat_ids_ = this->jacobian_flat_ids_.data();
hoststruct.jacobian_flat_ids_size_ = this->jacobian_flat_ids_.size();

// Copy the data from host struct to device struct
micm::cuda::CopyJacobiFlatId(hoststruct, this->devstruct_);
micm::cuda::CopyJacobianParams(hoststruct, this->devstruct_);
}

template<class MatrixPolicy>
Expand Down
17 changes: 17 additions & 0 deletions include/micm/cuda/util/cuda_param.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@
// make sure to choose the BLOCK_SIZE from [32, 64, 128, 256, 512, 1024]
const std::size_t BLOCK_SIZE = 512;

/// This struct holds information about a process for the Jacobian calculation
struct ProcessInfoParam
{
std::size_t process_id_;
std::size_t independent_id_;
std::size_t number_of_dependent_reactants_;
std::size_t number_of_products_;
};

/// This struct holds the (1) pointer to, and (2) size of
/// each constatnt data member from the class "ProcessSet";
/// This struct could be allocated on the host or device;
Expand All @@ -19,12 +28,20 @@ struct ProcessSetParam
std::size_t* number_of_products_ = nullptr;
std::size_t* product_ids_ = nullptr;
double* yields_ = nullptr;
ProcessInfoParam* jacobian_process_info_ = nullptr;
std::size_t* jacobian_reactant_ids_ = nullptr;
std::size_t* jacobian_product_ids_ = nullptr;
double* jacobian_yields_ = nullptr;
std::size_t* jacobian_flat_ids_ = nullptr;
std::size_t number_of_reactants_size_;
std::size_t reactant_ids_size_;
std::size_t number_of_products_size_;
std::size_t product_ids_size_;
std::size_t yields_size_;
std::size_t jacobian_process_info_size_;
std::size_t jacobian_reactant_ids_size_;
std::size_t jacobian_product_ids_size_;
std::size_t jacobian_yields_size_;
std::size_t jacobian_flat_ids_size_;
};

Expand Down
127 changes: 60 additions & 67 deletions include/micm/jit/process/jit_process_set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,81 +222,74 @@ namespace micm
llvm::AllocaInst *rate_array = func.builder_->CreateAlloca(
rate_array_type, llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, 1)), "d_rate_d_ind_array");

auto react_ids = reactant_ids_.begin();
auto yields = yields_.begin();
auto react_ids = jacobian_reactant_ids_.begin();
auto yields = jacobian_yields_.begin();
auto flat_id = jacobian_flat_ids_.begin();
for (std::size_t i_rxn = 0; i_rxn < number_of_reactants_.size(); ++i_rxn)
for (const auto& process_info : jacobian_process_info_)
{
llvm::Value *rc_start = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, i_rxn * L));
for (std::size_t i_ind = 0; i_ind < number_of_reactants_[i_rxn]; ++i_ind)
llvm::Value *rc_start = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, process_info.process_id_ * L));
// save rate constant in d_rate_d_ind for each grid cell
auto loop = func.StartLoop("rate constant", 0, L);
llvm::Value *ptr_index[1];
ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, rc_start);
llvm::Value *rate_const = func.GetArrayElement(func.arguments_[0], ptr_index, JitType::Double);
llvm::Value *array_index[2];
array_index[0] = zero;
array_index[1] = loop.index_;
llvm::Value *rate_ptr = func.builder_->CreateInBoundsGEP(rate_array_type, rate_array, array_index);
func.builder_->CreateStore(rate_const, rate_ptr);
func.EndLoop(loop);

// d_rate_d_ind[i_cell] *= reactant_concentration for each reactant except ind
for (std::size_t i_react = 0; i_react < process_info.number_of_dependent_reactants_; ++i_react)
{
loop = func.StartLoop("d_rate_d_ind calc", 0, L);
llvm::Value *react_id = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, react_ids[i_react] * L));
ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, react_id);
llvm::Value *react_conc = func.GetArrayElement(func.arguments_[1], ptr_index, JitType::Double);
array_index[1] = loop.index_;
rate_ptr = func.builder_->CreateInBoundsGEP(rate_array_type, rate_array, array_index);
llvm::Value *rate = func.builder_->CreateLoad(double_type, rate_ptr, "d_rate_d_ind");
rate = func.builder_->CreateFMul(rate, react_conc, "d_rate_d_ind");
func.builder_->CreateStore(rate, rate_ptr);
func.EndLoop(loop);
}

// set jacobian terms for each reactant jac[i_react][i_ind][i_cell] += d_rate_d_ind[i_cell]
for (std::size_t i_dep = 0; i_dep < process_info.number_of_dependent_reactants_+1; ++i_dep)
{
// save rate constant in d_rate_d_ind for each grid cell
auto loop = func.StartLoop("rate constant", 0, L);
llvm::Value *ptr_index[1];
ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, rc_start);
llvm::Value *rate_const = func.GetArrayElement(func.arguments_[0], ptr_index, JitType::Double);
llvm::Value *array_index[2];
array_index[0] = zero;
loop = func.StartLoop("reactant term", 0, L);
llvm::Value *dep_id = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, *(flat_id++)));
ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, dep_id);
llvm::Value *dep_jac_ptr = func.builder_->CreateGEP(double_type, func.arguments_[2].ptr_, ptr_index);
llvm::Value *dep_jac = func.builder_->CreateLoad(double_type, dep_jac_ptr, "reactant jacobian term");
array_index[1] = loop.index_;
llvm::Value *rate_ptr = func.builder_->CreateInBoundsGEP(rate_array_type, rate_array, array_index);
func.builder_->CreateStore(rate_const, rate_ptr);
rate_ptr = func.builder_->CreateInBoundsGEP(rate_array_type, rate_array, array_index);
llvm::Value *rate = func.builder_->CreateLoad(double_type, rate_ptr, "d_rate_d_ind");
dep_jac = func.builder_->CreateFAdd(dep_jac, rate, "reactant jacobian term");
func.builder_->CreateStore(dep_jac, dep_jac_ptr);
func.EndLoop(loop);
}

// d_rate_d_ind[i_cell] *= reactant_concentration for each reactant except ind
for (std::size_t i_react = 0; i_react < number_of_reactants_[i_rxn]; ++i_react)
{
if (i_react == i_ind)
{
continue;
}
loop = func.StartLoop("d_rate_d_ind calc", 0, L);
llvm::Value *react_id = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, react_ids[i_react] * L));
ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, react_id);
llvm::Value *react_conc = func.GetArrayElement(func.arguments_[1], ptr_index, JitType::Double);
array_index[1] = loop.index_;
rate_ptr = func.builder_->CreateInBoundsGEP(rate_array_type, rate_array, array_index);
llvm::Value *rate = func.builder_->CreateLoad(double_type, rate_ptr, "d_rate_d_ind");
rate = func.builder_->CreateFMul(rate, react_conc, "d_rate_d_ind");
func.builder_->CreateStore(rate, rate_ptr);
func.EndLoop(loop);
}

// set jacobian terms for each reactant jac[i_react][i_ind][i_cell] += d_rate_d_ind[i_cell]
for (std::size_t i_dep = 0; i_dep < number_of_reactants_[i_rxn]; ++i_dep)
// set jacobian terms for each product jac[i_prod][i_ind][i_cell] -= yield * d_rate_d_ind[i_cell]
for (std::size_t i_dep = 0; i_dep < process_info.number_of_products_; ++i_dep)
{
loop = func.StartLoop("reactant term", 0, L);
llvm::Value *dep_id = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, *(flat_id++)));
ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, dep_id);
llvm::Value *dep_jac_ptr = func.builder_->CreateGEP(double_type, func.arguments_[2].ptr_, ptr_index);
llvm::Value *dep_jac = func.builder_->CreateLoad(double_type, dep_jac_ptr, "reactant jacobian term");
array_index[1] = loop.index_;
rate_ptr = func.builder_->CreateInBoundsGEP(rate_array_type, rate_array, array_index);
llvm::Value *rate = func.builder_->CreateLoad(double_type, rate_ptr, "d_rate_d_ind");
dep_jac = func.builder_->CreateFAdd(dep_jac, rate, "reactant jacobian term");
func.builder_->CreateStore(dep_jac, dep_jac_ptr);
func.EndLoop(loop);
}

// set jacobian terms for each product jac[i_prod][i_ind][i_cell] -= yield * d_rate_d_ind[i_cell]
for (std::size_t i_dep = 0; i_dep < number_of_products_[i_rxn]; ++i_dep)
{
loop = func.StartLoop("product term", 0, L);
llvm::Value *dep_id = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, *(flat_id++)));
ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, dep_id);
llvm::Value *dep_jac_ptr = func.builder_->CreateGEP(double_type, func.arguments_[2].ptr_, ptr_index);
llvm::Value *dep_jac = func.builder_->CreateLoad(double_type, dep_jac_ptr, "product jacobian term");
array_index[1] = loop.index_;
rate_ptr = func.builder_->CreateInBoundsGEP(rate_array_type, rate_array, array_index);
llvm::Value *rate = func.builder_->CreateLoad(double_type, rate_ptr, "d_rate_d_ind");
llvm::Value *yield = llvm::ConstantFP::get(*(func.context_), llvm::APFloat(yields[i_dep]));
rate = func.builder_->CreateFMul(rate, yield, "product yield");
dep_jac = func.builder_->CreateFSub(dep_jac, rate, "product jacobian term");
func.builder_->CreateStore(dep_jac, dep_jac_ptr);
func.EndLoop(loop);
}
loop = func.StartLoop("product term", 0, L);
llvm::Value *dep_id = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, *(flat_id++)));
ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, dep_id);
llvm::Value *dep_jac_ptr = func.builder_->CreateGEP(double_type, func.arguments_[2].ptr_, ptr_index);
llvm::Value *dep_jac = func.builder_->CreateLoad(double_type, dep_jac_ptr, "product jacobian term");
array_index[1] = loop.index_;
rate_ptr = func.builder_->CreateInBoundsGEP(rate_array_type, rate_array, array_index);
llvm::Value *rate = func.builder_->CreateLoad(double_type, rate_ptr, "d_rate_d_ind");
llvm::Value *yield = llvm::ConstantFP::get(*(func.context_), llvm::APFloat(yields[i_dep]));
rate = func.builder_->CreateFMul(rate, yield, "product yield");
dep_jac = func.builder_->CreateFSub(dep_jac, rate, "product jacobian term");
func.builder_->CreateStore(dep_jac, dep_jac_ptr);
func.EndLoop(loop);
}
react_ids += number_of_reactants_[i_rxn];
yields += number_of_products_[i_rxn];
react_ids += process_info.number_of_dependent_reactants_;
yields += process_info.number_of_products_;
}
func.builder_->CreateRetVoid();

Expand Down
Loading
Loading