From 4ae05ffd8dfe9b54d2c0c6a0d19c5da3c28b339f Mon Sep 17 00:00:00 2001 From: mcarlson801 Date: Mon, 12 Aug 2024 14:09:54 -0700 Subject: [PATCH 01/11] Port DistParamDeriv and HessianVec specializations for uvm-free optimization --- src/evaluators/gather/PHAL_GatherSolution.hpp | 4 + .../gather/PHAL_GatherSolution_Def.hpp | 25 ++-- .../PHAL_ResponseSquaredL2DifferenceSide.hpp | 10 +- ...AL_ResponseSquaredL2DifferenceSide_Def.hpp | 93 ++++++------ .../PHAL_SeparableScatterScalarResponse.hpp | 11 ++ ...HAL_SeparableScatterScalarResponse_Def.hpp | 140 ++++++++++-------- 6 files changed, 156 insertions(+), 127 deletions(-) diff --git a/src/evaluators/gather/PHAL_GatherSolution.hpp b/src/evaluators/gather/PHAL_GatherSolution.hpp index 4064ce4e97..3bd1ce519b 100644 --- a/src/evaluators/gather/PHAL_GatherSolution.hpp +++ b/src/evaluators/gather/PHAL_GatherSolution.hpp @@ -297,6 +297,10 @@ class GatherSolution using Base::get_ref_dotdot; using Base::numFields; using Base::m_fields_offsets; + +protected: + using ExecutionSpace = typename PHX::Device::execution_space; + using RangePolicy = Kokkos::RangePolicy; }; // ************************************************************** diff --git a/src/evaluators/gather/PHAL_GatherSolution_Def.hpp b/src/evaluators/gather/PHAL_GatherSolution_Def.hpp index 2ef3130b4c..0c55b9cd97 100644 --- a/src/evaluators/gather/PHAL_GatherSolution_Def.hpp +++ b/src/evaluators/gather/PHAL_GatherSolution_Def.hpp @@ -574,37 +574,38 @@ evaluateFields(typename Traits::EvalData workset) this->gather_fields_offsets(dof_mgr); const auto ws = workset.wsIndex; - const auto elem_lids = workset.disc->getWsElementLIDs().host(); - const auto elem_dof_lids = dof_mgr->elem_dof_lids().host(); + const auto elem_lids = workset.disc->getWsElementLIDs().dev(); + const auto elem_dof_lids = dof_mgr->elem_dof_lids().dev(); constexpr auto ALL = Kokkos::ALL; //get const (read-only) view of x and xdot - using const_data_t = Teuchos::ArrayRCP; + using const_data_t = Albany::ThyraVDeviceView; const_data_t x_data, xdot_data, xdotdot_data; - x_data = Albany::getLocalData(x); + x_data = Albany::getDeviceData(x); if (xdot!=Teuchos::null) - xdot_data = Albany::getLocalData(xdot); + xdot_data = Albany::getDeviceData(xdot); if (xdotdot!=Teuchos::null) - xdotdot_data = Albany::getLocalData(xdotdot); + xdotdot_data = Albany::getDeviceData(xdotdot); - const auto& fields_offsets = m_fields_offsets.host(); + const auto& fields_offsets = m_fields_offsets.dev(); const int first_dof = this->offset; - for (std::size_t cell=0; cell < workset.numCells; ++cell ) { + Kokkos::parallel_for(this->getName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { const auto elem_LID = elem_lids(ws,cell); const auto dof_lids = Kokkos::subview(elem_dof_lids,elem_LID,ALL); for (int node = 0; node < this->numNodes; ++node) { for (int eq = 0; eq < numFields; ++eq) { const auto lid = dof_lids(fields_offsets(node,eq+first_dof)); - get_ref(cell,node,eq) = x_data[lid]; + this->device_sol.get_ref(cell,node,eq) = x_data(lid); if (gather_xdot) - get_ref_dot(cell,node,eq) = xdot_data[lid]; + this->device_sol.get_ref_dot(cell,node,eq) = xdot_data(lid); if (gather_xdotdot) - get_ref_dotdot(cell,node,eq) = xdotdot_data[lid]; + this->device_sol.get_ref_dotdot(cell,node,eq) = xdotdot_data(lid); } } - } + }); } // ********************************************************************** diff --git a/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide.hpp b/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide.hpp index 5eb4222c8e..56bec8ea1e 100644 --- a/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide.hpp +++ b/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide.hpp @@ -9,6 +9,8 @@ #include "PHAL_SeparableScatterScalarResponse.hpp" +#include "Albany_KokkosUtils.hpp" + namespace PHAL { /** * \brief Response Description @@ -44,6 +46,8 @@ class ResponseSquaredL2DifferenceSideBase : public PHAL::SeparableScatterScalarR int sideDim; int numQPs; int fieldDim; + int dims_2; + int dims_3; std::vector dims; bool target_value, rmsScaling, extrudedParams, isFieldGradient; @@ -57,10 +61,10 @@ class ResponseSquaredL2DifferenceSideBase : public PHAL::SeparableScatterScalarR PHX::MDField metric; PHX::MDField w_measure; - std::vector diff_1; - std::vector> diff_2; - bool resp_depends_on_sol_column; + + using ExecutionSpace = typename PHX::Device::execution_space; + using RangePolicy = Kokkos::RangePolicy; }; //-- SourceScalarT = ScalarT diff --git a/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide_Def.hpp b/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide_Def.hpp index 0f47414b1b..3d64013d18 100644 --- a/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide_Def.hpp +++ b/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide_Def.hpp @@ -55,6 +55,10 @@ ResponseSquaredL2DifferenceSideBase(Teuchos::ParameterList& p, const Teuchos::RC layout->dimensions(dims); + // std::vector isn't accessible on device + dims_2 = dims[2]; + dims_3 = dims[3]; + std::string sideSetNameForMetric = (plist->isParameter("Is Side Set Planar") && plist->get("Is Side Set Planar")) ? sideSetName + "_planar" : @@ -167,15 +171,7 @@ evaluateFields(typename Traits::EvalData workset) "Side sets defined in input file but not properly specified on the mesh" << std::endl); // Zero out local response - PHAL::set(this->local_response_eval, 0.0); - - if (fieldDim == 1) { - diff_1.resize(dims[2]); - } else if (fieldDim == 2) { - diff_2.resize(dims[2]); - for(size_t i=0; ilocal_response_eval.get_view(), 0.0); if (workset.sideSets->find(sideSetName) != workset.sideSets->end()) { @@ -184,10 +180,10 @@ evaluateFields(typename Traits::EvalData workset) switch (fieldDim) { case 0: - for (int sideSet_idx = 0; sideSet_idx < sideSet.size; ++sideSet_idx) - { + Kokkos::parallel_for(this->getName(),RangePolicy(0,sideSet.size), + KOKKOS_CLASS_LAMBDA(const int& sideSet_idx) { // Get the local data of cell - const int cell = sideSet.ws_elem_idx.h_view(sideSet_idx); + const int cell = sideSet.ws_elem_idx.d_view(sideSet_idx); ScalarT sum = 0; for (int qp=0; qplocal_response_eval(cell, 0) = sum*scaling; - this->global_response_eval(0) += sum*scaling; - } + KU::atomic_add(&(this->local_response_eval(cell, 0)), sum*scaling); + KU::atomic_add(&(this->global_response_eval(0)), sum*scaling); + }); break; case 1: - for (int sideSet_idx = 0; sideSet_idx < sideSet.size; ++sideSet_idx) - { + Kokkos::parallel_for(this->getName(),RangePolicy(0,sideSet.size), + KOKKOS_CLASS_LAMBDA(const int& sideSet_idx) { // Get the local data of cell - const int cell = sideSet.ws_elem_idx.h_view(sideSet_idx); + const int cell = sideSet.ws_elem_idx.d_view(sideSet_idx); ScalarT sum = 0; for (int qp=0; qplocal_response_eval(cell, 0) = sum*scaling; - this->global_response_eval(0) += sum*scaling; - } + KU::atomic_add(&(this->local_response_eval(cell, 0)), sum*scaling); + KU::atomic_add(&(this->global_response_eval(0)), sum*scaling); + }); break; case 2: - for (int sideSet_idx = 0; sideSet_idx < sideSet.size; ++sideSet_idx) - { + Kokkos::parallel_for(this->getName(),RangePolicy(0,sideSet.size), + KOKKOS_CLASS_LAMBDA(const int& sideSet_idx) { // Get the local data of cell - const int cell = sideSet.ws_elem_idx.h_view(sideSet_idx); + const int cell = sideSet.ws_elem_idx.d_view(sideSet_idx); ScalarT sum = 0; for (int qp=0; qplocal_response_eval(cell, 0) = sum*scaling; - this->global_response_eval(0) += sum*scaling; - } + KU::atomic_add(&(this->local_response_eval(cell, 0)), sum*scaling); + KU::atomic_add(&(this->global_response_eval(0)), sum*scaling); + }); break; } diff --git a/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse.hpp b/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse.hpp index f310d9b0c8..37812cde50 100644 --- a/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse.hpp +++ b/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse.hpp @@ -205,6 +205,12 @@ class SeparableScatterScalarResponse private: typedef typename AlbanyTraits::DistParamDeriv::ScalarT ScalarT; + +protected: + using ExecutionSpace = typename PHX::Device::execution_space; + using RangePolicy = Kokkos::RangePolicy; + MDFieldVectorRight global_response_reader; + }; @@ -311,6 +317,11 @@ class SeparableScatterScalarResponse private: typedef typename AlbanyTraits::HessianVec::ScalarT ScalarT; + +protected: + using ExecutionSpace = typename PHX::Device::execution_space; + using RangePolicy = Kokkos::RangePolicy; + MDFieldVectorRight global_response_reader; }; diff --git a/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp b/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp index 18f43467b5..f0bc4cc8ec 100644 --- a/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp +++ b/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp @@ -293,16 +293,19 @@ evaluateFields(typename Traits::EvalData workset) } constexpr auto ALL = Kokkos::ALL(); - const auto dgdp_data = Albany::getNonconstLocalData(dgdp); + const auto dgdp_data = Albany::getNonconstDeviceData(dgdp); const int ws = workset.wsIndex; const int num_deriv = numNodes; - const auto elem_lids = workset.disc->getElementLIDs_host(ws); + const auto elem_lids_all = workset.disc->getWsElementLIDs(); + const auto elem_lids = Kokkos::subview(elem_lids_all.dev(),ws,ALL); + const auto param = workset.distParamLib->get(workset.dist_param_deriv_name); - const auto p_elem_dof_lids = param->get_dof_mgr()->elem_dof_lids().host(); + const auto p_elem_dof_lids = param->get_dof_mgr()->elem_dof_lids().dev(); // Loop over cells in workset - for (std::size_t cell=0; cell < workset.numCells; ++cell) { + Kokkos::parallel_for(this->getName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { const auto elem_LID = elem_lids(cell); const auto dof_lids = Kokkos::subview(p_elem_dof_lids,elem_LID,ALL); @@ -314,11 +317,11 @@ evaluateFields(typename Traits::EvalData workset) // If param defined at this node, update dg/dp const int row = dof_lids(deriv); if(row >=0){ - dgdp_data[res][row] += this->local_response(cell, res).dx(deriv); + KU::atomic_add(&dgdp_data(row,res), this->local_response(cell, res).dx(deriv)); } } // deriv } // response - } // cell + }); // cell } template @@ -327,10 +330,14 @@ postEvaluate(typename Traits::PostEvalData workset) { auto g = workset.g; if (g != Teuchos::null) { - auto g_nonconstView = Albany::getNonconstLocalData(g); - for (std::size_t res=0; resglobal_response.size(); ++res) { - g_nonconstView[res] = this->global_response[res].val(); - } + auto g_nonconstView = Albany::getNonconstDeviceData(g); + MDFieldVectorRight gr(this->global_response); + global_response_reader = gr; + Kokkos::parallel_for(this->getName(), + Kokkos::RangePolicy(0,this->global_response.size()), + KOKKOS_CLASS_LAMBDA(const int i) { + g_nonconstView(i) = global_response_reader[i].val(); + }); } auto dgdp = workset.dgdp; @@ -478,18 +485,18 @@ evaluateFields(typename Traits::EvalData workset) } // Extract multivectors raw data - using mv_data_t = Teuchos::ArrayRCP>; + using mv_data_t = Albany::ThyraMVDeviceView; mv_data_t hess_vec_prod_g_xx_data, hess_vec_prod_g_xp_data, hess_vec_prod_g_px_data, hess_vec_prod_g_pp_data; if (!hess_vec_prod_g_xx.is_null()) - hess_vec_prod_g_xx_data = Albany::getNonconstLocalData(hess_vec_prod_g_xx); + hess_vec_prod_g_xx_data = Albany::getNonconstDeviceData(hess_vec_prod_g_xx); if (!hess_vec_prod_g_xp.is_null()) - hess_vec_prod_g_xp_data = Albany::getNonconstLocalData(hess_vec_prod_g_xp); + hess_vec_prod_g_xp_data = Albany::getNonconstDeviceData(hess_vec_prod_g_xp); if (!hess_vec_prod_g_px.is_null()) - hess_vec_prod_g_px_data = Albany::getNonconstLocalData(hess_vec_prod_g_px); + hess_vec_prod_g_px_data = Albany::getNonconstDeviceData(hess_vec_prod_g_px); if (!hess_vec_prod_g_pp.is_null()) - hess_vec_prod_g_pp_data = Albany::getNonconstLocalData(hess_vec_prod_g_pp); + hess_vec_prod_g_pp_data = Albany::getNonconstDeviceData(hess_vec_prod_g_pp); const bool do_xx = !hess_vec_prod_g_xx.is_null(); const bool do_xp = !hess_vec_prod_g_xp.is_null(); @@ -500,10 +507,10 @@ evaluateFields(typename Traits::EvalData workset) // Get some data from the discretization const auto param_name = workset.dist_param_deriv_name; - Albany::DualView::host_t p_elem_dof_lids; + Albany::DualView::dev_t p_elem_dof_lids; if (do_dist_pp || do_dist_px) { auto dist_param = workset.distParamLib->get(param_name); - p_elem_dof_lids = dist_param->get_dof_mgr()->elem_dof_lids().host(); + p_elem_dof_lids = dist_param->get_dof_mgr()->elem_dof_lids().dev(); } const auto& dof_mgr = workset.disc->getDOFManager(); @@ -511,68 +518,67 @@ evaluateFields(typename Traits::EvalData workset) const int ws = workset.wsIndex; const int neq = dof_mgr->getNumFields(); - const auto& elem_dof_lids = dof_mgr->elem_dof_lids().host(); - const auto& elem_lids = workset.disc->getElementLIDs_host(ws); + const int g_px_size = hess_vec_prod_g_px_data.extent(1); + const int g_pp_size = hess_vec_prod_g_pp_data.extent(1); - for (size_t cell=0; cellglobal_response.size(); ++res) { - auto lresp = this->local_response(cell, res); - - if (do_xx || do_xp) { - // Loop over equations per node - for (int eq_dof=0; eq_dofgetGIDFieldOffsets(eq_dof); + const auto& elem_dof_lids = dof_mgr->elem_dof_lids().dev(); + const auto elem_lids_all = workset.disc->getWsElementLIDs(); + const auto elem_lids = Kokkos::subview(elem_lids_all.dev(),ws,Kokkos::ALL); - const int num_nodes = offsets.size(); + for (int eq_dof=0; eq_dofgetGIDFieldOffsetsKokkos(eq_dof); + const int num_nodes = offsets.size(); + Kokkos::parallel_for(this->getName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { + const auto elem_LID = elem_lids(cell); + // Loop over responses + for (size_t res=0; resglobal_response.size(); ++res) { + auto lresp = this->local_response(cell, res); + if (do_xx || do_xp) { for (int i=0; i=0) { - hess_vec_prod_g_px_data[res][row] += lresp.dx(deriv).dx(0); + if (do_dist_px) { + for (int deriv=0; deriv=0) { + hess_vec_prod_g_px_data(row,res) += lresp.dx(deriv).dx(0); + } } } - } - if (do_dist_pp) { - for (int deriv=0; deriv=0) { - hess_vec_prod_g_pp_data[res][row] += lresp.dx(deriv).dx(0); + if (do_dist_pp) { + for (int deriv=0; deriv=0) { + hess_vec_prod_g_pp_data(row,res) += lresp.dx(deriv).dx(0); + } } } - } - if (do_scalar_px) { - for (int deriv=0; deriv @@ -588,10 +594,14 @@ postEvaluate(typename Traits::PostEvalData workset) const auto g = workset.g; if (g != Teuchos::null) { - const auto g_nonconstView = Albany::getNonconstLocalData(g); - for (size_t res=0; resglobal_response.size(); res++) { - g_nonconstView[res] = this->global_response[res].val().val(); - } + Albany::ThyraVDeviceView g_nonconstView = Albany::getNonconstDeviceData(g); + MDFieldVectorRight gr(this->global_response); + global_response_reader = gr; + Kokkos::parallel_for(this->getName(), + Kokkos::RangePolicy(0,this->global_response.size()), + KOKKOS_CLASS_LAMBDA(const int i) { + g_nonconstView(i) = this->global_response[i].val().val(); + }); } const auto& hws = workset.hessianWorkset; From 205f1522650b6c0942df1e9471d720ac61f94ab9 Mon Sep 17 00:00:00 2001 From: mcarlson801 Date: Fri, 30 Aug 2024 09:12:54 -0700 Subject: [PATCH 02/11] Convert local_VP to view and port hessianVec/DistParamDeriv gather/scatter evaluators --- src/Albany_Application.cpp | 1 - src/PHAL_Workset.hpp | 2 +- src/disc/Albany_LayeredMeshNumbering.hpp | 4 +- src/evaluators/bc/PHAL_Neumann_Def.hpp | 8 +- .../PHAL_GatherScalarNodalParameter.hpp | 3 + .../PHAL_GatherScalarNodalParameter_Def.hpp | 98 +++--- .../scatter/PHAL_ScatterResidual.hpp | 9 +- .../scatter/PHAL_ScatterResidual_Def.hpp | 293 ++++++++++++------ .../PHAL_ScatterSideEqnResidual_Def.hpp | 11 +- 9 files changed, 283 insertions(+), 146 deletions(-) diff --git a/src/Albany_Application.cpp b/src/Albany_Application.cpp index 80f8a84e51..b00332c379 100644 --- a/src/Albany_Application.cpp +++ b/src/Albany_Application.cpp @@ -3144,7 +3144,6 @@ Application::loadWorksetBucketInfo(PHAL::Workset& workset, const int& ws, workset.EBName = wsEBNames[ws]; workset.wsIndex = ws; - workset.local_Vp.resize(workset.numCells); workset.savedMDFields = phxSetup->get_saved_fields(evalName); diff --git a/src/PHAL_Workset.hpp b/src/PHAL_Workset.hpp index 9ea2acc666..595df94d76 100644 --- a/src/PHAL_Workset.hpp +++ b/src/PHAL_Workset.hpp @@ -127,7 +127,7 @@ struct Workset Teuchos::RCP distParamLib; std::string dist_param_deriv_name; bool transpose_dist_param_deriv; - Teuchos::ArrayRCP>> local_Vp; + Kokkos::View local_Vp; Teuchos::ArrayRCP> wsCoords; std::string EBName; diff --git a/src/disc/Albany_LayeredMeshNumbering.hpp b/src/disc/Albany_LayeredMeshNumbering.hpp index 8da4299316..eec52f24d4 100644 --- a/src/disc/Albany_LayeredMeshNumbering.hpp +++ b/src/disc/Albany_LayeredMeshNumbering.hpp @@ -38,6 +38,7 @@ struct LayeredMeshNumbering { numLayers = _numLayers; } + KOKKOS_INLINE_FUNCTION T getId(const T column_id, const T level_index) const { return layerOrd ? column_id + level_index*numHorizEntities : column_id * numLayers + level_index; @@ -62,11 +63,12 @@ struct LayeredMeshNumbering { return layerOrd ? 1 : numLayers; } - + KOKKOS_INLINE_FUNCTION T getColumnId (const T id) const { return layerOrd ? id % numHorizEntities : id / numLayers; } + KOKKOS_INLINE_FUNCTION T getLayerId (const T id) const { return layerOrd ? id / numHorizEntities : id % numLayers; } diff --git a/src/evaluators/bc/PHAL_Neumann_Def.hpp b/src/evaluators/bc/PHAL_Neumann_Def.hpp index 05f95f4154..f940228c13 100644 --- a/src/evaluators/bc/PHAL_Neumann_Def.hpp +++ b/src/evaluators/bc/PHAL_Neumann_Def.hpp @@ -971,6 +971,8 @@ evaluateFields(typename Traits::EvalData workset) this->evaluateNeumannContribution(workset); + const auto& local_Vp = workset.local_Vp; + constexpr auto ALL = Kokkos::ALL(); if (trans) { const int neq = workset.numEqs; @@ -979,7 +981,6 @@ evaluateFields(typename Traits::EvalData workset) const auto elem_lids = workset.disc->getElementLIDs_host(workset.wsIndex); for (size_t cell=0; cellnumNodes; ++node) { for (int dim = 0; dim < this->numDOFsSet; ++dim){ int eq = this->offset[dim]; - val += this->neumann(cell, node, dim).dx(i)*local_Vp[node*neq+eq][col]; + val += this->neumann(cell, node, dim).dx(i)*local_Vp(cell,node*neq+eq,col); } } fpV_nonconst2dView[col][row] += val; @@ -1007,7 +1008,6 @@ evaluateFields(typename Traits::EvalData workset) const auto& offsets = this->fields_offsets; for (std::size_t cell=0; cell < workset.numCells; ++cell ) { const auto elem_LID = elem_lids(cell); - const auto& local_Vp = workset.local_Vp[cell]; const int num_deriv = local_Vp.size(); const auto dof_lids = Kokkos::subview(elem_dof_lids,elem_LID,ALL); @@ -1018,7 +1018,7 @@ evaluateFields(typename Traits::EvalData workset) for (int col=0; colneumann(cell, node, dim).dx(i)*local_Vp[i][col]; + val += this->neumann(cell, node, dim).dx(i)*local_Vp(cell,i,col); } fpV_nonconst2dView[col][row] += val; } diff --git a/src/evaluators/gather/PHAL_GatherScalarNodalParameter.hpp b/src/evaluators/gather/PHAL_GatherScalarNodalParameter.hpp index 5d46182248..47d46be822 100644 --- a/src/evaluators/gather/PHAL_GatherScalarNodalParameter.hpp +++ b/src/evaluators/gather/PHAL_GatherScalarNodalParameter.hpp @@ -111,6 +111,9 @@ class GatherScalarNodalParameter : void evaluateFields(typename Traits::EvalData d); private: typedef typename PHAL::AlbanyTraits::DistParamDeriv::ParamScalarT ParamScalarT; +protected: + using ExecutionSpace = typename PHX::Device::execution_space; + using RangePolicy = Kokkos::RangePolicy; }; diff --git a/src/evaluators/gather/PHAL_GatherScalarNodalParameter_Def.hpp b/src/evaluators/gather/PHAL_GatherScalarNodalParameter_Def.hpp index f0c7287496..a8d989ac4e 100644 --- a/src/evaluators/gather/PHAL_GatherScalarNodalParameter_Def.hpp +++ b/src/evaluators/gather/PHAL_GatherScalarNodalParameter_Def.hpp @@ -191,17 +191,19 @@ evaluateFields(typename Traits::EvalData workset) // Distributed parameter vector const auto p = workset.distParamLib->get(this->param_name); - const auto p_data = Albany::getLocalData(p->overlapped_vector().getConst()); + const auto p_data = Albany::getDeviceData(p->overlapped_vector().getConst()); const auto Vp = workset.Vp; - const auto Vp_data = !Vp.is_null() ? Albany::getLocalData(Vp) : Teuchos::null; + Albany::ThyraMVDeviceView Vp_data; + if (!Vp.is_null()) Vp_data = Albany::getDeviceData(Vp); // Parameter/solution/nodes dof numbering info const auto dof_mgr = workset.disc->getDOFManager(); - const auto p_elem_dof_lids = p->get_dof_mgr()->elem_dof_lids().host(); + const auto p_elem_dof_lids = p->get_dof_mgr()->elem_dof_lids().dev(); const auto ws = workset.wsIndex; - const auto elem_lids = workset.disc->getElementLIDs_host(ws); + const auto ws_elem_lids = workset.disc->getWsElementLIDs().dev(); + const auto elem_lids = Kokkos::subview(ws_elem_lids,ws,Kokkos::ALL()); // Are we differentiating w.r.t. this parameter? const bool is_active = (workset.dist_param_deriv_name == this->param_name); @@ -211,58 +213,79 @@ evaluateFields(typename Traits::EvalData workset) const int neq = dof_mgr->getNumFields(); const int num_deriv = this->numNodes; bool trans = workset.transpose_dist_param_deriv; - const auto elem_dof_lids = dof_mgr->elem_dof_lids().host(); - for (std::size_t cell=0; cellelem_dof_lids().dev(); + + // allocate View for local_Vp + int num_cols = 0; + if (Vp != Teuchos::null) { + num_cols = Vp->domain()->dim(); + if (trans) { + const int num_dofs = elem_dof_lids.extent(1); + workset.local_Vp = Kokkos::View("local_Vp",workset.numCells,num_dofs,num_cols); + } else { + workset.local_Vp = Kokkos::View("local_Vp",workset.numCells,num_deriv,num_cols); + } + } + + const auto& local_Vp = workset.local_Vp; + + Kokkos::parallel_for(this->getName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { const auto elem_LID = elem_lids(cell); const auto p_dof_lids = Kokkos::subview(p_elem_dof_lids,elem_LID,ALL); for (int node=0; node=0 ? p_data[lid] : 0; + const auto p_val = lid>=0 ? p_data(lid) : 0; ParamScalarT v(num_deriv, node, p_val); this->val(cell,node) = v; } - - if (Vp != Teuchos::null) { - const int num_cols = Vp->domain()->dim(); - - auto& local_Vp = workset.local_Vp[cell]; - - if (trans) { - auto dof_lids = Kokkos::subview(elem_dof_lids,elem_LID,ALL); - // const auto& offsets = this->m_sol_fields_offsets; - local_Vp.resize(dof_lids.size()); - for (int eq=0; eqgetGIDFieldOffsets(eq); - for (const auto o : offsets) { - local_Vp[o].resize(num_cols); + }); + + if (Vp != Teuchos::null) { + if (trans) { + for (int eq=0; eqgetGIDFieldOffsetsKokkos(eq); + const int num_offsets = offsets.size(); + + Kokkos::parallel_for(this->getName()+"_transvp",RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { + const auto elem_LID = elem_lids(cell); + auto dof_lids = Kokkos::subview(elem_dof_lids,elem_LID,ALL); + for (int o=0; ogetName()+"_notransvp",RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { + const auto elem_LID = elem_lids(cell); + const auto p_dof_lids = Kokkos::subview(p_elem_dof_lids,elem_LID,ALL); for (int node=0; node=0 ? Vp_data[col][lid] : 0; + local_Vp(cell,node,col) = lid>=0 ? Vp_data(lid,col) : 0; } - } + }); } } } else { // If not active, just set the parameter value in the phalanx field - for (std::size_t cell=0; cell < workset.numCells; ++cell ) { + const int num_nodes = this->numNodes; + Kokkos::parallel_for(this->getName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { const auto elem_LID = elem_lids(cell); const auto p_dof_lids = Kokkos::subview(p_elem_dof_lids,elem_LID,ALL); - for (int node=0; nodenumNodes; ++node) { + for (int node=0; nodeval(cell,node) = lid>=0 ? p_data[lid] : 0; + this->val(cell,node) = lid>=0 ? p_data(lid) : 0; } - } + }); } } @@ -325,6 +348,8 @@ evaluateFields(typename Traits::EvalData workset) // Are we differentiating w.r.t. this parameter? const bool is_active = (workset.dist_param_deriv_name == this->param_name); + const auto& local_Vp = workset.local_Vp; + // If active, initialize data needed for differentiation if (is_active) { const int neq = sol_dof_mgr->getNumFields(); @@ -349,27 +374,22 @@ evaluateFields(typename Traits::EvalData workset) if (Vp != Teuchos::null) { const int num_cols = workset.Vp->domain()->dim(); - auto& local_Vp = workset.local_Vp[cell]; if (trans) { auto dof_lids = Kokkos::subview(elem_dof_lids,elem_LID,ALL); - local_Vp.resize(dof_lids.size()); for (int eq=0; eqgetGIDFieldOffsets(eq); for (const auto o : sol_offsets) { - local_Vp[o].resize(num_cols); const LO lid = dof_lids(o); for (int col=0; col=0 ? Vp_data[col][p_lid] : 0; + local_Vp(cell,node,col) = p_lid>=0 ? Vp_data[col][p_lid] : 0; } } } diff --git a/src/evaluators/scatter/PHAL_ScatterResidual.hpp b/src/evaluators/scatter/PHAL_ScatterResidual.hpp index bd10be77ba..2c8f3e78fd 100644 --- a/src/evaluators/scatter/PHAL_ScatterResidual.hpp +++ b/src/evaluators/scatter/PHAL_ScatterResidual.hpp @@ -267,7 +267,9 @@ class ScatterResidualWithExtrudedParams protected: using Base = ScatterResidual; using ScalarT = typename Base::ScalarT; - + using ExecutionSpace = typename Base::ExecutionSpace; + using RangePolicy = typename Base::RangePolicy; + using Base::get_resid; using Base::m_fields_offsets; using Base::numNodes; @@ -328,6 +330,8 @@ class ScatterResidual void evaluateFields(typename Traits::EvalData d); protected: using Base = ScatterResidualBase; + using ExecutionSpace = typename Base::ExecutionSpace; + using RangePolicy = typename Base::RangePolicy; using ScalarT = typename Base::ScalarT; using Base::get_resid; @@ -377,6 +381,9 @@ class ScatterResidualWithExtrudedParams using Base = ScatterResidual; using ScalarT = typename Base::ScalarT; + using ExecutionSpace = typename PHX::Device::execution_space; + using RangePolicy = Kokkos::RangePolicy; + using Base::get_resid; using Base::m_fields_offsets; using Base::numNodes; diff --git a/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp b/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp index fd8f82f99c..6515504968 100644 --- a/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp +++ b/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp @@ -394,7 +394,7 @@ evaluateFields(typename Traits::EvalData workset) { // In case the parameter has not been gathered, e.g. parameter // is used only in Dirichlet conditions. - if(workset.local_Vp[0].size() == 0) { return; } + if(workset.local_Vp.size() == 0) { return; } this->gather_fields_offsets (workset.disc->getDOFManager()); @@ -412,6 +412,8 @@ evaluateFields(typename Traits::EvalData workset) const auto& elem_dof_lids = dof_mgr->elem_dof_lids().host(); const auto eq_offset = this->offset; + const auto& local_Vp = workset.local_Vp; + if (trans) { const auto& pname = workset.dist_param_deriv_name; const auto& p_dof_mgr = workset.disc->getDOFManager(pname); @@ -420,7 +422,6 @@ evaluateFields(typename Traits::EvalData workset) const int num_deriv = numNodes;//local_Vp.size()/numFields; for (size_t cell=0; cellfind(workset.dist_param_deriv_name); if(level_it == extruded_params_levels->end()) { @@ -495,15 +495,21 @@ evaluateFields(typename Traits::EvalData workset) const int ws = workset.wsIndex; const auto fpV = workset.fpV; - const auto fpV_data = Albany::getNonconstLocalData(fpV); + const auto fpV_data = Albany::getNonconstDeviceData(fpV); const bool trans = workset.transpose_dist_param_deriv; const int num_cols = workset.Vp->domain()->dim(); const auto dof_mgr = workset.disc->getDOFManager(); - const auto elem_lids = workset.disc->getElementLIDs_host(ws); + const auto elem_lids_ws = workset.disc->getWsElementLIDs(); + const auto elem_lids = Kokkos::subview(elem_lids_ws.dev(),ws,Kokkos::ALL); + + const auto resid_offsets = m_fields_offsets.dev(); - const auto resid_offsets = m_fields_offsets.host(); + const auto& local_Vp = workset.local_Vp; + + const int offset = this->offset; + const auto& device_resid = this->device_resid; if (trans) { const auto& cell_layers_data = workset.disc->getMeshStruct()->local_cell_layers_data; @@ -516,65 +522,113 @@ evaluateFields(typename Traits::EvalData workset) const auto node_dof_mgr = workset.disc->getNodeDOFManager(); const auto p = workset.distParamLib->get(workset.dist_param_deriv_name); const auto p_dof_mgr = p->get_dof_mgr(); - const auto p_elem_dof_lids = p->get_dof_mgr()->elem_dof_lids().host(); + const auto p_elem_dof_lids = p->get_dof_mgr()->elem_dof_lids().dev(); const auto p_offsets = p_dof_mgr->getGIDFieldOffsetsSide(0,field_pos); const auto top_nodes = node_dof_mgr->getGIDFieldOffsetsSide(0,top,field_pos); const auto bot_nodes = node_dof_mgr->getGIDFieldOffsetsSide(0,bot,field_pos); + const auto elem_lids_host = Kokkos::subview(elem_lids_ws.host(),ws,Kokkos::ALL); + Albany::DualView basal_elem_LIDs("basal_elem_LIDs", elem_lids_host.size()); + Albany::DualView field_elem_LIDs("field_elem_LIDs", elem_lids_host.size()); + + for (int i = 0; i < elem_lids_host.size(); ++i) { + basal_elem_LIDs.host()(i) = cell_layers_data->getColumnId(i); + field_elem_LIDs.host()(i) = cell_layers_data->getId(i,fieldLayer); + } + + basal_elem_LIDs.sync_to_dev(); + field_elem_LIDs.sync_to_dev(); + + Albany::DualView p_offsets_dv("p_offsets",p_offsets.size()); + Albany::DualView top_nodes_dv("top_nodes",top_nodes.size()); + Albany::DualView bot_nodes_dv("bot_nodes",bot_nodes.size()); + + for (int i = 0; i < p_offsets.size(); ++i) { + p_offsets_dv.host()(i) = p_offsets[i]; + } + for (int i = 0; i < top_nodes.size(); ++i) { + top_nodes_dv.host()(i) = top_nodes[i]; + } + for (int i = 0; i < bot_nodes.size(); ++i) { + bot_nodes_dv.host()(i) = bot_nodes[i]; + } + + p_offsets_dv.sync_to_dev(); + top_nodes_dv.sync_to_dev(); + bot_nodes_dv.sync_to_dev(); + + const auto p_offsets_dev = p_offsets_dv.dev(); + const auto top_nodes_dev = top_nodes_dv.dev(); + const auto bot_nodes_dev = bot_nodes_dv.dev(); + const int num_nodes_side = p_offsets.size(); + // Pick a cell layer that contains the field level. Can be same as fieldLevel, // except for the last level. - for (size_t cell=0; cellgetName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { const auto elem_LID = elem_lids(cell); - const auto& local_Vp = workset.local_Vp[cell]; - const LO basal_elem_LID = cell_layers_data->getColumnId(elem_LID); - const LO field_elem_LID = cell_layers_data->getId(basal_elem_LID,fieldLayer); - const auto p_elem_gids = p->get_dof_mgr()->getElementGIDs(field_elem_LID); + const LO basal_elem_LID = basal_elem_LIDs.dev()(elem_LID); + const LO field_elem_LID = field_elem_LIDs.dev()(basal_elem_LID); - auto do_derivatives = [&](const std::vector& derivs) { - for (int i=0; ioffset)][col]; - } + const int deriv = bot_nodes_dev(i); + for (int col=0; coldevice_resid.get(cell,node,eq); + val += res.dx(deriv)*local_Vp(cell,resid_offsets(node,eq+this->offset),col); } - fpV_data[col][row] += val; } + KU::atomic_add(&(fpV_data(row,col)), val); } - }; - do_derivatives(bot_nodes); - do_derivatives(top_nodes); - } + } + // Top nodes + for (int i=0; idevice_resid.get(cell,node,eq); + val += res.dx(deriv)*local_Vp(cell,resid_offsets(node,eq+this->offset),col); + } + } + KU::atomic_add(&(fpV_data(row,col)), val); + } + } + }); } else { constexpr auto ALL = Kokkos::ALL(); - const auto elem_dof_lids = dof_mgr->elem_dof_lids().host(); - for (size_t cell=0; cellelem_dof_lids().dev(); + const int num_deriv = local_Vp.extent(1); + Kokkos::parallel_for(this->getName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { + const auto elem_LID = elem_lids(cell); + + const auto dof_lids = Kokkos::subview(elem_dof_lids,elem_LID,ALL); for (int node=0; nodeoffset)); + auto res = device_resid.get(cell,node,eq); + const int row = dof_lids(resid_offsets(node,eq+offset)); for (int col=0; col(&(fpV_data(row,col)), val); } } } - } + }); } } @@ -622,20 +676,20 @@ evaluateFields(typename Traits::EvalData workset) const auto f_multiplier = workset.hessianWorkset.overlapped_f_multiplier; - using mv_data_t = Teuchos::ArrayRCP >; + using mv_data_t = Albany::ThyraMVDeviceView; mv_data_t hess_vec_prod_f_xx_data, hess_vec_prod_f_xp_data, hess_vec_prod_f_px_data, hess_vec_prod_f_pp_data; - auto f_multiplier_data = Albany::getNonconstLocalData(f_multiplier); + auto f_multiplier_data = Albany::getNonconstDeviceData(f_multiplier); if(f_xx_is_active) - hess_vec_prod_f_xx_data = Albany::getNonconstLocalData(hess_vec_prod_f_xx); + hess_vec_prod_f_xx_data = Albany::getNonconstDeviceData(hess_vec_prod_f_xx); if(f_xp_is_active) - hess_vec_prod_f_xp_data = Albany::getNonconstLocalData(hess_vec_prod_f_xp); + hess_vec_prod_f_xp_data = Albany::getNonconstDeviceData(hess_vec_prod_f_xp); if(f_px_is_active) - hess_vec_prod_f_px_data = Albany::getNonconstLocalData(hess_vec_prod_f_px); + hess_vec_prod_f_px_data = Albany::getNonconstDeviceData(hess_vec_prod_f_px); if(f_pp_is_active) - hess_vec_prod_f_pp_data = Albany::getNonconstLocalData(hess_vec_prod_f_pp); + hess_vec_prod_f_pp_data = Albany::getNonconstDeviceData(hess_vec_prod_f_pp); constexpr auto ALL = Kokkos::ALL(); const int ws = workset.wsIndex; @@ -645,27 +699,33 @@ evaluateFields(typename Traits::EvalData workset) // If the parameter associated to workset.dist_param_deriv_name is a distributed parameter, // the function needs to access the associated dof manager to deduce the IDs of the entries // of the resulting vector. - Albany::DualView::host_t p_elem_dof_lids; + Albany::DualView::dev_t p_elem_dof_lids; if(l1_is_distributed && (f_px_is_active || f_pp_is_active)) { auto p_dof_mgr = workset.disc->getDOFManager(workset.dist_param_deriv_name); - p_elem_dof_lids = p_dof_mgr->elem_dof_lids().host(); + p_elem_dof_lids = p_dof_mgr->elem_dof_lids().dev(); } - const auto elem_lids = workset.disc->getElementLIDs_host(ws); - const auto elem_dof_lids = dof_mgr->elem_dof_lids().host(); + const auto elem_lids_ws = workset.disc->getWsElementLIDs(); + const auto elem_lids = Kokkos::subview(elem_lids_ws.dev(),ws,Kokkos::ALL); - const auto& fields_offsets = m_fields_offsets.host(); + const auto elem_dof_lids = dof_mgr->elem_dof_lids().dev(); + + const int hess_vec_prod_f_px_data_size = hess_vec_prod_f_px_data.extent(0); + const int hess_vec_prod_f_pp_data_size = hess_vec_prod_f_pp_data.extent(0); + + const auto& fields_offsets = m_fields_offsets.dev(); const int eq_offset = this->offset; - for (size_t cell=0; cellgetName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { const auto elem_LID = elem_lids(cell); const auto dof_lids = Kokkos::subview(elem_dof_lids,elem_LID,ALL); ScalarT value=0.0; for (int node=0; nodedevice_resid.get(cell,node,eq); const int row = dof_lids(fields_offsets(node,eq+eq_offset)); - value += res * f_multiplier_data[row]; + value += res * f_multiplier_data(row); } } @@ -675,9 +735,9 @@ evaluateFields(typename Traits::EvalData workset) const int row = dof_lids(fields_offsets(node,eq+eq_offset)); const auto& dx = value.dx(fields_offsets(node,eq)).dx(0); if (f_xx_is_active) - hess_vec_prod_f_xx_data[0][row] += dx; + KU::atomic_add(&(hess_vec_prod_f_xx_data(row,0)), dx); if (f_xp_is_active) - hess_vec_prod_f_xp_data[0][row] += dx; + KU::atomic_add(&(hess_vec_prod_f_xp_data(row,0)), dx); } } @@ -686,9 +746,9 @@ evaluateFields(typename Traits::EvalData workset) if(row >=0){ const auto& dx = value.dx(node).dx(0); if(f_px_is_active) - hess_vec_prod_f_px_data[0][row] += dx; + KU::atomic_add(&(hess_vec_prod_f_px_data(row,0)), dx); if(f_pp_is_active) - hess_vec_prod_f_pp_data[0][row] += dx; + KU::atomic_add(&(hess_vec_prod_f_pp_data(row,0)), dx); } } } // node @@ -698,13 +758,13 @@ evaluateFields(typename Traits::EvalData workset) // the nodes: if(!l1_is_distributed && (f_px_is_active || f_pp_is_active)) { if(f_px_is_active) - for (unsigned int l1_i=0; l1_i(&(hess_vec_prod_f_px_data(l1_i,0)), value.dx(l1_i).dx(0)); if(f_pp_is_active) - for (unsigned int l1_i=0; l1_i(&(hess_vec_prod_f_pp_data(l1_i,0)), value.dx(l1_i).dx(0)); } - } // cell + }); // cell } // ********************************************************************** @@ -755,7 +815,8 @@ evaluate2DFieldsDerivativesDueToExtrudedParams(typename Traits::EvalData workset constexpr auto ALL = Kokkos::ALL(); const int ws = workset.wsIndex; - const auto elem_lids = workset.disc->getElementLIDs_host(ws); + const auto elem_lids_ws = workset.disc->getWsElementLIDs(); + const auto elem_lids = Kokkos::subview(elem_lids_ws.dev(),ws,Kokkos::ALL); const auto& hws = workset.hessianWorkset; @@ -764,7 +825,7 @@ evaluate2DFieldsDerivativesDueToExtrudedParams(typename Traits::EvalData workset // Here we scatter the *local* response derivative const auto f_multiplier = hws.overlapped_f_multiplier; - const auto f_multiplier_data = Albany::getNonconstLocalData(f_multiplier); + const auto f_multiplier_data = Albany::getNonconstDeviceData(f_multiplier); auto level_it = extruded_params_levels->find(workset.dist_param_deriv_name); int fieldLevel = level_it->second; @@ -772,13 +833,13 @@ evaluate2DFieldsDerivativesDueToExtrudedParams(typename Traits::EvalData workset const auto hess_vec_prod_f_px = hws.overlapped_hess_vec_prod_f_px; const auto hess_vec_prod_f_pp = hws.overlapped_hess_vec_prod_f_pp; - using mv_data_t = Teuchos::ArrayRCP>; + using mv_data_t = Albany::ThyraMVDeviceView; mv_data_t hess_vec_prod_f_px_data, hess_vec_prod_f_pp_data; if(f_px_is_active) - hess_vec_prod_f_px_data = Albany::getNonconstLocalData(hess_vec_prod_f_px); + hess_vec_prod_f_px_data = Albany::getNonconstDeviceData(hess_vec_prod_f_px); if(f_pp_is_active) - hess_vec_prod_f_pp_data = Albany::getNonconstLocalData(hess_vec_prod_f_pp); + hess_vec_prod_f_pp_data = Albany::getNonconstDeviceData(hess_vec_prod_f_pp); const auto& layers_data = workset.disc->getLayeredMeshNumberingLO(); const int top = layers_data->top_side_pos; @@ -788,8 +849,8 @@ evaluate2DFieldsDerivativesDueToExtrudedParams(typename Traits::EvalData workset const auto dof_mgr = workset.disc->getDOFManager(); const auto p_dof_mgr = workset.disc->getDOFManager(workset.dist_param_deriv_name); - const auto elem_dof_lids = dof_mgr->elem_dof_lids().host(); - const auto p_elem_dof_lids = p_dof_mgr->elem_dof_lids().host(); + const auto elem_dof_lids = dof_mgr->elem_dof_lids().dev(); + const auto p_elem_dof_lids = p_dof_mgr->elem_dof_lids().dev(); // Note: grab offsets on top/bot ordered in the same way as on side $field_pos // to guarantee corresponding nodes are vertically aligned. @@ -798,38 +859,82 @@ evaluate2DFieldsDerivativesDueToExtrudedParams(typename Traits::EvalData workset const auto p_offsets = fieldLevel==fieldLayer ? bot_offsets : top_offsets; const auto numSideNodes = p_offsets.size(); - const auto offsets = m_fields_offsets.host(); - for (size_t cell=0; cell basal_elem_LIDs("basal_elem_LIDs", elem_lids_host.size()); + Albany::DualView field_elem_LIDs("field_elem_LIDs", elem_lids_host.size()); + + for (int i = 0; i < elem_lids_host.size(); ++i) { + basal_elem_LIDs.host()(i) = layers_data->getColumnId(i); + field_elem_LIDs.host()(i) = layers_data->getId(i,fieldLayer); + } + + basal_elem_LIDs.sync_to_dev(); + field_elem_LIDs.sync_to_dev(); + + Albany::DualView p_offsets_dv("p_offsets",p_offsets.size()); + Albany::DualView top_offsets_dv("top_offsets",top_offsets.size()); + Albany::DualView bot_offsets_dv("bot_offsets",bot_offsets.size()); + + for (int i = 0; i < p_offsets.size(); ++i) { + p_offsets_dv.host()(i) = p_offsets[i]; + } + for (int i = 0; i < top_offsets.size(); ++i) { + top_offsets_dv.host()(i) = top_offsets[i]; + } + for (int i = 0; i < bot_offsets.size(); ++i) { + bot_offsets_dv.host()(i) = bot_offsets[i]; + } + + p_offsets_dv.sync_to_dev(); + top_offsets_dv.sync_to_dev(); + bot_offsets_dv.sync_to_dev(); + + const auto p_offsets_dev = p_offsets_dv.dev(); + const auto top_offsets_dev = top_offsets_dv.dev(); + const auto bot_offsets_dev = bot_offsets_dv.dev(); + + const auto offsets = m_fields_offsets.dev(); + Kokkos::parallel_for(this->getName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { const auto elem_LID = elem_lids(cell); const auto dof_lids = Kokkos::subview(elem_dof_lids,elem_LID,ALL); ScalarT value=0.0; for (int node=0; nodedevice_resid.get(cell,node,eq); const auto lid = dof_lids(offsets(node,eq+this->offset)); - value += res * f_multiplier_data[lid]; + value += res * f_multiplier_data(lid); } } - const auto basal_elem_LID = layers_data->getColumnId(elem_LID); - const auto field_elem_LID = layers_data->getId(basal_elem_LID,fieldLayer); - const auto do_nodes = [&] (const std::vector& offsets) { - for (std::size_t node=0; node=0) { - const auto& dx = value.dx(offsets[node]).dx(0); - if (f_px_is_active) - hess_vec_prod_f_px_data[0][row] += dx; - if (f_pp_is_active) - hess_vec_prod_f_pp_data[0][row] += dx; - } + const LO basal_elem_LID = basal_elem_LIDs.dev()(elem_LID); + const LO field_elem_LID = field_elem_LIDs.dev()(basal_elem_LID); + + // do bot_offsets + for (std::size_t node=0; node=0) { + const auto& dx = value.dx(bot_offsets_dev(node)).dx(0); + if (f_px_is_active) + KU::atomic_add(&(hess_vec_prod_f_px_data(row,0)), dx); + if (f_pp_is_active) + KU::atomic_add(&(hess_vec_prod_f_pp_data(row,0)), dx); } - }; + } - do_nodes (bot_offsets); - do_nodes (top_offsets); - } + // do top_offsets + for (std::size_t node=0; node=0) { + const auto& dx = value.dx(top_offsets_dev(node)).dx(0); + if (f_px_is_active) + KU::atomic_add(&(hess_vec_prod_f_px_data(row,0)), dx); + if (f_pp_is_active) + KU::atomic_add(&(hess_vec_prod_f_pp_data(row,0)), dx); + } + } + }); } } // namespace PHAL diff --git a/src/evaluators/scatter/PHAL_ScatterSideEqnResidual_Def.hpp b/src/evaluators/scatter/PHAL_ScatterSideEqnResidual_Def.hpp index 135ffced23..d6da6656f9 100644 --- a/src/evaluators/scatter/PHAL_ScatterSideEqnResidual_Def.hpp +++ b/src/evaluators/scatter/PHAL_ScatterSideEqnResidual_Def.hpp @@ -474,7 +474,7 @@ doEvaluateFields(typename Traits::EvalData workset, } // Check for early return - if(workset.local_Vp[0].size() == 0) { + if(workset.local_Vp.size() == 0) { // In case the parameter has not been gathered. // E.g., parameter is used only in Dirichlet conditions. return; @@ -496,6 +496,8 @@ doEvaluateFields(typename Traits::EvalData workset, const auto node_dof_mgr = workset.disc->getNodeDOFManager(); const auto elem_dof_lids = dof_mgr->elem_dof_lids().host(); + const auto& local_Vp = workset.local_Vp; + constexpr auto ALL = Kokkos::ALL(); const int neq = dof_mgr->getNumFields(); const int numSides = sideSet.size(); @@ -504,8 +506,7 @@ doEvaluateFields(typename Traits::EvalData workset, const auto icell = side.ws_elem_idx; const auto elem_LID = elem_lids(icell); - const auto& local_Vp = workset.local_Vp[icell]; - + const auto& side_nodes = node_dof_mgr->getGIDFieldOffsetsSide(0,side.side_pos); const int numNodes = side_nodes.size(); const auto dof_lids = Kokkos::subview(elem_dof_lids,elem_LID,ALL); @@ -524,7 +525,7 @@ doEvaluateFields(typename Traits::EvalData workset, const int node = side_nodes[inode]; for (int eq = 0; eq < this->numFields; eq++) { auto res = this->get_resid(iside,inode,eq); - val += res.dx(i)*local_Vp[node*neq+eq+this->offset][col]; + val += res.dx(i)*local_Vp(icell,node*neq+eq+this->offset,col); } } fpV_data[col][row] += val; @@ -541,7 +542,7 @@ doEvaluateFields(typename Traits::EvalData workset, for (int col=0; col Date: Wed, 4 Sep 2024 09:11:55 -0700 Subject: [PATCH 03/11] Restore ResponseSquaredL2DifferenceSide precomputation, add atomic_adds for separableScatterScalarResponse --- ...AL_ResponseSquaredL2DifferenceSide_Def.hpp | 49 ++++++++++--------- ...HAL_SeparableScatterScalarResponse_Def.hpp | 12 ++--- 2 files changed, 32 insertions(+), 29 deletions(-) diff --git a/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide_Def.hpp b/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide_Def.hpp index 3d64013d18..269d18a149 100644 --- a/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide_Def.hpp +++ b/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide_Def.hpp @@ -56,8 +56,8 @@ ResponseSquaredL2DifferenceSideBase(Teuchos::ParameterList& p, const Teuchos::RC layout->dimensions(dims); // std::vector isn't accessible on device - dims_2 = dims[2]; - dims_3 = dims[3]; + if(fieldDim >= 1) dims_2 = sourceField.extent(2); + if(fieldDim >= 2) dims_3 = sourceField.extent(3); std::string sideSetNameForMetric = (plist->isParameter("Is Side Set Planar") && plist->get("Is Side Set Planar")) ? @@ -196,7 +196,7 @@ evaluateFields(typename Traits::EvalData workset) sum += sq * w_measure(sideSet_idx,qp); } - KU::atomic_add(&(this->local_response_eval(cell, 0)), sum*scaling); + this->local_response_eval(cell,0) = sum*scaling; KU::atomic_add(&(this->global_response_eval(0)), sum*scaling); }); break; @@ -206,29 +206,32 @@ evaluateFields(typename Traits::EvalData workset) // Get the local data of cell const int cell = sideSet.ws_elem_idx.d_view(sideSet_idx); + ScalarT diff_1[8] = {0}; + ScalarT sum = 0; for (int qp=0; qp(&(this->local_response_eval(cell, 0)), sum*scaling); + this->local_response_eval(cell,0) = sum*scaling; KU::atomic_add(&(this->global_response_eval(0)), sum*scaling); }); break; @@ -238,34 +241,34 @@ evaluateFields(typename Traits::EvalData workset) // Get the local data of cell const int cell = sideSet.ws_elem_idx.d_view(sideSet_idx); + ScalarT diff_2[8][8] = {0}; + ScalarT sum = 0; for (int qp=0; qp(&(this->local_response_eval(cell, 0)), sum*scaling); + this->local_response_eval(cell,0) = sum*scaling; KU::atomic_add(&(this->global_response_eval(0)), sum*scaling); }); break; diff --git a/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp b/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp index f0bc4cc8ec..4054d586dc 100644 --- a/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp +++ b/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp @@ -543,17 +543,17 @@ evaluateFields(typename Traits::EvalData workset) const int dof_lid = elem_dof_lids(elem_LID,deriv); if (do_xx) - hess_vec_prod_g_xx_data(dof_lid, res) += lresp.dx(deriv).dx(0); + KU::atomic_add(&(hess_vec_prod_g_xx_data(dof_lid, res)), lresp.dx(deriv).dx(0)); if (do_xp) - hess_vec_prod_g_xp_data(dof_lid, res) += lresp.dx(deriv).dx(0); + KU::atomic_add(&(hess_vec_prod_g_xp_data(dof_lid, res)), lresp.dx(deriv).dx(0)); } // column nodes } if (do_dist_px) { for (int deriv=0; deriv=0) { - hess_vec_prod_g_px_data(row,res) += lresp.dx(deriv).dx(0); + KU::atomic_add(&(hess_vec_prod_g_px_data(row,res)), lresp.dx(deriv).dx(0)); } } } @@ -561,19 +561,19 @@ evaluateFields(typename Traits::EvalData workset) for (int deriv=0; deriv=0) { - hess_vec_prod_g_pp_data(row,res) += lresp.dx(deriv).dx(0); + KU::atomic_add(&(hess_vec_prod_g_pp_data(row,res)), lresp.dx(deriv).dx(0)); } } } if (do_scalar_px) { for (int deriv=0; deriv(&(hess_vec_prod_g_px_data(deriv,res)), lresp.dx(deriv).dx(0)); } } if (do_scalar_pp) { for (int deriv=0; deriv(&(hess_vec_prod_g_pp_data(deriv,res)), lresp.dx(deriv).dx(0)); } } } // response From 39db7d7a8a61e685dd97c3ebbbdc3dd258b6f8da Mon Sep 17 00:00:00 2001 From: mcarlson801 Date: Thu, 5 Sep 2024 11:54:28 -0700 Subject: [PATCH 04/11] Fixes for GatherScalarNodalParameter --- .../PHAL_GatherScalarNodalParameter_Def.hpp | 45 +++++++++++-------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/src/evaluators/gather/PHAL_GatherScalarNodalParameter_Def.hpp b/src/evaluators/gather/PHAL_GatherScalarNodalParameter_Def.hpp index a8d989ac4e..bd3ce3c998 100644 --- a/src/evaluators/gather/PHAL_GatherScalarNodalParameter_Def.hpp +++ b/src/evaluators/gather/PHAL_GatherScalarNodalParameter_Def.hpp @@ -215,20 +215,6 @@ evaluateFields(typename Traits::EvalData workset) bool trans = workset.transpose_dist_param_deriv; const auto elem_dof_lids = dof_mgr->elem_dof_lids().dev(); - // allocate View for local_Vp - int num_cols = 0; - if (Vp != Teuchos::null) { - num_cols = Vp->domain()->dim(); - if (trans) { - const int num_dofs = elem_dof_lids.extent(1); - workset.local_Vp = Kokkos::View("local_Vp",workset.numCells,num_dofs,num_cols); - } else { - workset.local_Vp = Kokkos::View("local_Vp",workset.numCells,num_deriv,num_cols); - } - } - - const auto& local_Vp = workset.local_Vp; - Kokkos::parallel_for(this->getName(),RangePolicy(0,workset.numCells), KOKKOS_CLASS_LAMBDA(const int& cell) { const auto elem_LID = elem_lids(cell); @@ -244,6 +230,18 @@ evaluateFields(typename Traits::EvalData workset) }); if (Vp != Teuchos::null) { + const int num_cols = Vp->domain()->dim(); + + const int num_dofs = elem_dof_lids.extent(1); + if (trans) { + const int num_dofs = elem_dof_lids.extent(1); + workset.local_Vp = Kokkos::View("local_Vp",workset.numCells,num_dofs,num_cols); + } else { + workset.local_Vp = Kokkos::View("local_Vp",workset.numCells,num_deriv,num_cols); + } + + const auto& local_Vp = workset.local_Vp; + if (trans) { for (int eq=0; eqgetGIDFieldOffsetsKokkos(eq); @@ -252,8 +250,9 @@ evaluateFields(typename Traits::EvalData workset) Kokkos::parallel_for(this->getName()+"_transvp",RangePolicy(0,workset.numCells), KOKKOS_CLASS_LAMBDA(const int& cell) { const auto elem_LID = elem_lids(cell); - auto dof_lids = Kokkos::subview(elem_dof_lids,elem_LID,ALL); - for (int o=0; oparam_name); - const auto& local_Vp = workset.local_Vp; - // If active, initialize data needed for differentiation if (is_active) { const int neq = sol_dof_mgr->getNumFields(); const int num_deriv = this->numNodes; const bool trans = workset.transpose_dist_param_deriv; + + if (Vp != Teuchos::null) { + const int num_cols = workset.Vp->domain()->dim(); + if (trans) { + const int num_dofs = elem_dof_lids.extent(1); + workset.local_Vp = Kokkos::View("local_Vp",workset.numCells,num_dofs,num_cols); + } else { + workset.local_Vp = Kokkos::View("local_Vp",workset.numCells,num_deriv,num_cols); + } + } + const auto& local_Vp = workset.local_Vp; + for (std::size_t cell=0; cellgetColumnId(elem_LID); From ae4e26fa6ed4b3a013e09d39b41496bd4ceca416 Mon Sep 17 00:00:00 2001 From: mcarlson801 Date: Fri, 6 Sep 2024 09:11:50 -0700 Subject: [PATCH 05/11] Fix for ScatterResidual --- src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp b/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp index 6515504968..9a583dd9e0 100644 --- a/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp +++ b/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp @@ -442,7 +442,7 @@ evaluateFields(typename Traits::EvalData workset) } } else { for (size_t cell=0; cell Date: Wed, 9 Oct 2024 11:21:39 -0700 Subject: [PATCH 06/11] Loop bound fixes --- .../response/PHAL_ResponseSquaredL2DifferenceSide_Def.hpp | 4 ++-- .../scatter/PHAL_SeparableScatterScalarResponse_Def.hpp | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide_Def.hpp b/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide_Def.hpp index 269d18a149..19e6a0cf22 100644 --- a/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide_Def.hpp +++ b/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide_Def.hpp @@ -56,8 +56,8 @@ ResponseSquaredL2DifferenceSideBase(Teuchos::ParameterList& p, const Teuchos::RC layout->dimensions(dims); // std::vector isn't accessible on device - if(fieldDim >= 1) dims_2 = sourceField.extent(2); - if(fieldDim >= 2) dims_3 = sourceField.extent(3); + dims_2 = (fieldDim >= 1) ? dims[2] : 0; + dims_3 = (fieldDim >= 2) ? dims[3] : 0; std::string sideSetNameForMetric = (plist->isParameter("Is Side Set Planar") && plist->get("Is Side Set Planar")) ? diff --git a/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp b/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp index 4054d586dc..2562350cdd 100644 --- a/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp +++ b/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp @@ -518,8 +518,10 @@ evaluateFields(typename Traits::EvalData workset) const int ws = workset.wsIndex; const int neq = dof_mgr->getNumFields(); - const int g_px_size = hess_vec_prod_g_px_data.extent(1); - const int g_pp_size = hess_vec_prod_g_pp_data.extent(1); + const int num_responses = this->global_response.size(); + + const int g_px_size = hess_vec_prod_g_px_data.extent(0); + const int g_pp_size = hess_vec_prod_g_pp_data.extent(0); const auto& elem_dof_lids = dof_mgr->elem_dof_lids().dev(); const auto elem_lids_all = workset.disc->getWsElementLIDs(); @@ -533,7 +535,7 @@ evaluateFields(typename Traits::EvalData workset) KOKKOS_CLASS_LAMBDA(const int& cell) { const auto elem_LID = elem_lids(cell); // Loop over responses - for (size_t res=0; resglobal_response.size(); ++res) { + for (size_t res=0; reslocal_response(cell, res); if (do_xx || do_xp) { From baf1b3b7e7cd8cbbfb8d8470314f1110adbaf41e Mon Sep 17 00:00:00 2001 From: mcarlson801 Date: Fri, 11 Oct 2024 14:32:31 -0700 Subject: [PATCH 07/11] Update scatter unit tests to work without uvm, fixed bug in SeparableScatterScalarResponse --- ...HAL_SeparableScatterScalarResponse_Def.hpp | 44 ++-- tests/unit/evaluators/ScatterResidual.cpp | 220 ++++++++++-------- .../unit/evaluators/ScatterScalarResponse.cpp | 107 +++++---- 3 files changed, 204 insertions(+), 167 deletions(-) diff --git a/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp b/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp index 2562350cdd..7932dc7928 100644 --- a/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp +++ b/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp @@ -527,30 +527,37 @@ evaluateFields(typename Traits::EvalData workset) const auto elem_lids_all = workset.disc->getWsElementLIDs(); const auto elem_lids = Kokkos::subview(elem_lids_all.dev(),ws,Kokkos::ALL); - for (int eq_dof=0; eq_dofgetGIDFieldOffsetsKokkos(eq_dof); - const int num_nodes = offsets.size(); - Kokkos::parallel_for(this->getName(),RangePolicy(0,workset.numCells), + if (do_xx || do_xp) { + for (int eq_dof=0; eq_dofgetGIDFieldOffsetsKokkos(eq_dof); + const int num_nodes = offsets.size(); + Kokkos::parallel_for(this->getName(),RangePolicy(0,workset.numCells), KOKKOS_CLASS_LAMBDA(const int& cell) { - const auto elem_LID = elem_lids(cell); - // Loop over responses - for (size_t res=0; reslocal_response(cell, res); - - if (do_xx || do_xp) { + const auto elem_LID = elem_lids(cell); + // Loop over responses + for (size_t res=0; reslocal_response(cell, res); for (int i=0; i(&(hess_vec_prod_g_xx_data(dof_lid, res)), lresp.dx(deriv).dx(0)); + KU::atomic_add(&(hess_vec_prod_g_xx_data(dof_lid,res)), lresp.dx(deriv).dx(0)); if (do_xp) - KU::atomic_add(&(hess_vec_prod_g_xp_data(dof_lid, res)), lresp.dx(deriv).dx(0)); - } // column nodes + KU::atomic_add(&(hess_vec_prod_g_xp_data(dof_lid,res)), lresp.dx(deriv).dx(0)); + } } + }); + } + } else { + Kokkos::parallel_for(this->getName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { + const auto elem_LID = elem_lids(cell); + for (size_t res=0; reslocal_response(cell, res); if (do_dist_px) { for (int deriv=0; deriv(&(hess_vec_prod_g_px_data(deriv,res)), lresp.dx(deriv).dx(0)); @@ -578,9 +584,9 @@ evaluateFields(typename Traits::EvalData workset) KU::atomic_add(&(hess_vec_prod_g_pp_data(deriv,res)), lresp.dx(deriv).dx(0)); } } - } // response - }); // cell - } // column equations + } + }); + } } template diff --git a/tests/unit/evaluators/ScatterResidual.cpp b/tests/unit/evaluators/ScatterResidual.cpp index 2651cb9f1f..bdfa5ab4f7 100644 --- a/tests/unit/evaluators/ScatterResidual.cpp +++ b/tests/unit/evaluators/ScatterResidual.cpp @@ -75,30 +75,30 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank0) const auto f_multiplier = Thyra::createMember(ov_x_space); - const auto hess_vec_prod_f_xx = Thyra::createMember(x_space); - const auto hess_vec_prod_f_xp = Thyra::createMember(x_space); - const auto hess_vec_prod_f_px = Thyra::createMember(p_space); - const auto hess_vec_prod_f_pp = Thyra::createMember(p_space); - - const auto ov_hess_vec_prod_f_xx = Thyra::createMember(ov_x_space); - const auto ov_hess_vec_prod_f_xp = Thyra::createMember(ov_x_space); - const auto ov_hess_vec_prod_f_px = Thyra::createMember(ov_p_space); - const auto ov_hess_vec_prod_f_pp = Thyra::createMember(ov_p_space); - - const auto diff_x_out = Thyra::createMember(x_space); - const auto one_x_out = Thyra::createMember(x_space); - const auto diff_p_out = Thyra::createMember(p_space); - const auto one_p_out = Thyra::createMember(p_space); - - const auto hess_vec_prod_f_xx_out = Thyra::createMember(x_space); - const auto hess_vec_prod_f_xp_out = Thyra::createMember(x_space); - const auto hess_vec_prod_f_px_out = Thyra::createMember(p_space); - const auto hess_vec_prod_f_pp_out = Thyra::createMember(p_space); - - const auto ov_hess_vec_prod_f_xx_out = Thyra::createMember(ov_x_space); - const auto ov_hess_vec_prod_f_xp_out = Thyra::createMember(ov_x_space); - const auto ov_hess_vec_prod_f_px_out = Thyra::createMember(ov_p_space); - const auto ov_hess_vec_prod_f_pp_out = Thyra::createMember(ov_p_space); + const auto hess_vec_prod_f_xx = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_f_xp = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_f_px = Thyra::createMembers(p_space,1); + const auto hess_vec_prod_f_pp = Thyra::createMembers(p_space,1); + + const auto ov_hess_vec_prod_f_xx = Thyra::createMembers(ov_x_space,1); + const auto ov_hess_vec_prod_f_xp = Thyra::createMembers(ov_x_space,1); + const auto ov_hess_vec_prod_f_px = Thyra::createMembers(ov_p_space,1); + const auto ov_hess_vec_prod_f_pp = Thyra::createMembers(ov_p_space,1); + + const auto diff_x_out = Thyra::createMembers(x_space,1); + const auto one_x_out = Thyra::createMembers(x_space,1); + const auto diff_p_out = Thyra::createMembers(p_space,1); + const auto one_p_out = Thyra::createMembers(p_space,1); + + const auto hess_vec_prod_f_xx_out = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_f_xp_out = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_f_px_out = Thyra::createMembers(p_space,1); + const auto hess_vec_prod_f_pp_out = Thyra::createMembers(p_space,1); + + const auto ov_hess_vec_prod_f_xx_out = Thyra::createMembers(ov_x_space,1); + const auto ov_hess_vec_prod_f_xp_out = Thyra::createMembers(ov_x_space,1); + const auto ov_hess_vec_prod_f_px_out = Thyra::createMembers(ov_p_space,1); + const auto ov_hess_vec_prod_f_pp_out = Thyra::createMembers(ov_p_space,1); ov_hess_vec_prod_f_xx->assign(0.0); ov_hess_vec_prod_f_xp->assign(0.0); @@ -116,10 +116,15 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank0) ov_hess_vec_prod_f_pp_out->assign(0.0); { - auto ov_hess_vec_prod_f_xx_out_data = Albany::getNonconstLocalData(ov_hess_vec_prod_f_xx_out); - auto ov_hess_vec_prod_f_xp_out_data = Albany::getNonconstLocalData(ov_hess_vec_prod_f_xp_out); - auto ov_hess_vec_prod_f_px_out_data = Albany::getNonconstLocalData(ov_hess_vec_prod_f_px_out); - auto ov_hess_vec_prod_f_pp_out_data = Albany::getNonconstLocalData(ov_hess_vec_prod_f_pp_out); + auto ov_hess_vec_prod_f_xx_out_data = Albany::getNonconstDeviceData(ov_hess_vec_prod_f_xx_out); + auto ov_hess_vec_prod_f_xp_out_data = Albany::getNonconstDeviceData(ov_hess_vec_prod_f_xp_out); + auto ov_hess_vec_prod_f_px_out_data = Albany::getNonconstDeviceData(ov_hess_vec_prod_f_px_out); + auto ov_hess_vec_prod_f_pp_out_data = Albany::getNonconstDeviceData(ov_hess_vec_prod_f_pp_out); + + auto ov_hess_vec_prod_f_xx_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_f_xx_out_data); + auto ov_hess_vec_prod_f_xp_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_f_xp_out_data); + auto ov_hess_vec_prod_f_px_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_f_px_out_data); + auto ov_hess_vec_prod_f_pp_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_f_pp_out_data); auto p_elem_dof_lids = p_dof_mgr->elem_dof_lids().host(); auto x_elem_dof_lids = x_dof_mgr->elem_dof_lids().host(); @@ -128,14 +133,19 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank0) auto p_dof_lids = Kokkos::subview(p_elem_dof_lids,cell,ALL); auto x_dof_lids = Kokkos::subview(x_elem_dof_lids,cell,ALL); for (size_t i=0; icombine(ov_hess_vec_prod_f_px, hess_vec_prod_f_px, ADD); p_cas_manager->combine(ov_hess_vec_prod_f_pp, hess_vec_prod_f_pp, ADD); - Thyra::V_VmV(diff_x_out.ptr(), *one_x_out, *hess_vec_prod_f_xx); + Thyra::V_VmV(diff_x_out->col(0).ptr(), *(one_x_out->col(0)), *(hess_vec_prod_f_xx->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_xx_out", *one_x_out, - "hess_vec_prod_f_xx", *diff_x_out, + "hess_vec_prod_f_xx_out", *(one_x_out->col(0)), + "hess_vec_prod_f_xx", *(diff_x_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); - Thyra::V_VmV(diff_x_out.ptr(), *one_x_out, *hess_vec_prod_f_xp); + Thyra::V_VmV(diff_x_out->col(0).ptr(), *(one_x_out->col(0)), *(hess_vec_prod_f_xp->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_xp_out", *one_x_out, - "hess_vec_prod_f_xp", *diff_x_out, + "hess_vec_prod_f_xp_out", *(one_x_out->col(0)), + "hess_vec_prod_f_xp", *(diff_x_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); - Thyra::V_VmV(diff_p_out.ptr(), *one_p_out, *hess_vec_prod_f_px); + Thyra::V_VmV(diff_p_out->col(0).ptr(), *(one_p_out->col(0)), *(hess_vec_prod_f_px->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_px_out", *one_p_out, - "hess_vec_prod_f_px", *diff_p_out, + "hess_vec_prod_f_px_out", *(one_p_out->col(0)), + "hess_vec_prod_f_px", *(diff_p_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); - Thyra::V_VmV(diff_p_out.ptr(), *one_p_out, *hess_vec_prod_f_pp); + Thyra::V_VmV(diff_p_out->col(0).ptr(), *(one_p_out->col(0)), *(hess_vec_prod_f_pp->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_pp_out", *one_p_out, - "hess_vec_prod_f_pp", *diff_p_out, + "hess_vec_prod_f_pp_out", *(one_p_out->col(0)), + "hess_vec_prod_f_pp", *(diff_p_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -272,8 +282,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank0) TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_xx_out", *hess_vec_prod_f_xx_out, - "hess_vec_prod_f_xx", *hess_vec_prod_f_xx, + "hess_vec_prod_f_xx_out", *(hess_vec_prod_f_xx_out->col(0)), + "hess_vec_prod_f_xx", *(hess_vec_prod_f_xx->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -302,8 +312,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank0) TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_xp_out", *hess_vec_prod_f_xp_out, - "hess_vec_prod_f_xp", *hess_vec_prod_f_xp, + "hess_vec_prod_f_xp_out", *(hess_vec_prod_f_xp_out->col(0)), + "hess_vec_prod_f_xp", *(hess_vec_prod_f_xp->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -332,8 +342,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank0) TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_px_out", *hess_vec_prod_f_px_out, - "hess_vec_prod_f_px", *hess_vec_prod_f_px, + "hess_vec_prod_f_px_out", *(hess_vec_prod_f_px_out->col(0)), + "hess_vec_prod_f_px", *(hess_vec_prod_f_px->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -362,8 +372,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank0) TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_pp_out", *hess_vec_prod_f_pp_out, - "hess_vec_prod_f_pp", *hess_vec_prod_f_pp, + "hess_vec_prod_f_pp_out", *(hess_vec_prod_f_pp_out->col(0)), + "hess_vec_prod_f_pp", *(hess_vec_prod_f_pp->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -432,30 +442,30 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank1) const auto f_multiplier = Thyra::createMember(ov_x_space); - const auto hess_vec_prod_f_xx = Thyra::createMember(x_space); - const auto hess_vec_prod_f_xp = Thyra::createMember(x_space); - const auto hess_vec_prod_f_px = Thyra::createMember(p_space); - const auto hess_vec_prod_f_pp = Thyra::createMember(p_space); + const auto hess_vec_prod_f_xx = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_f_xp = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_f_px = Thyra::createMembers(p_space,1); + const auto hess_vec_prod_f_pp = Thyra::createMembers(p_space,1); - const auto ov_hess_vec_prod_f_xx = Thyra::createMember(ov_x_space); - const auto ov_hess_vec_prod_f_xp = Thyra::createMember(ov_x_space); - const auto ov_hess_vec_prod_f_px = Thyra::createMember(ov_p_space); - const auto ov_hess_vec_prod_f_pp = Thyra::createMember(ov_p_space); + const auto ov_hess_vec_prod_f_xx = Thyra::createMembers(ov_x_space,1); + const auto ov_hess_vec_prod_f_xp = Thyra::createMembers(ov_x_space,1); + const auto ov_hess_vec_prod_f_px = Thyra::createMembers(ov_p_space,1); + const auto ov_hess_vec_prod_f_pp = Thyra::createMembers(ov_p_space,1); - const auto diff_x_out = Thyra::createMember(x_space); - const auto one_x_out = Thyra::createMember(x_space); - const auto diff_p_out = Thyra::createMember(p_space); - const auto one_p_out = Thyra::createMember(p_space); + const auto diff_x_out = Thyra::createMembers(x_space,1); + const auto one_x_out = Thyra::createMembers(x_space,1); + const auto diff_p_out = Thyra::createMembers(p_space,1); + const auto one_p_out = Thyra::createMembers(p_space,1); - const auto hess_vec_prod_f_xx_out = Thyra::createMember(x_space); - const auto hess_vec_prod_f_xp_out = Thyra::createMember(x_space); - const auto hess_vec_prod_f_px_out = Thyra::createMember(p_space); - const auto hess_vec_prod_f_pp_out = Thyra::createMember(p_space); + const auto hess_vec_prod_f_xx_out = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_f_xp_out = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_f_px_out = Thyra::createMembers(p_space,1); + const auto hess_vec_prod_f_pp_out = Thyra::createMembers(p_space,1); - const auto ov_hess_vec_prod_f_xx_out = Thyra::createMember(ov_x_space); - const auto ov_hess_vec_prod_f_xp_out = Thyra::createMember(ov_x_space); - const auto ov_hess_vec_prod_f_px_out = Thyra::createMember(ov_p_space); - const auto ov_hess_vec_prod_f_pp_out = Thyra::createMember(ov_p_space); + const auto ov_hess_vec_prod_f_xx_out = Thyra::createMembers(ov_x_space,1); + const auto ov_hess_vec_prod_f_xp_out = Thyra::createMembers(ov_x_space,1); + const auto ov_hess_vec_prod_f_px_out = Thyra::createMembers(ov_p_space,1); + const auto ov_hess_vec_prod_f_pp_out = Thyra::createMembers(ov_p_space,1); ov_hess_vec_prod_f_xx->assign(0.0); ov_hess_vec_prod_f_xp->assign(0.0); @@ -473,10 +483,15 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank1) ov_hess_vec_prod_f_pp_out->assign(0.0); { - auto ov_hess_vec_prod_f_xx_out_data = Albany::getNonconstLocalData(ov_hess_vec_prod_f_xx_out); - auto ov_hess_vec_prod_f_xp_out_data = Albany::getNonconstLocalData(ov_hess_vec_prod_f_xp_out); - auto ov_hess_vec_prod_f_px_out_data = Albany::getNonconstLocalData(ov_hess_vec_prod_f_px_out); - auto ov_hess_vec_prod_f_pp_out_data = Albany::getNonconstLocalData(ov_hess_vec_prod_f_pp_out); + auto ov_hess_vec_prod_f_xx_out_data = Albany::getNonconstDeviceData(ov_hess_vec_prod_f_xx_out); + auto ov_hess_vec_prod_f_xp_out_data = Albany::getNonconstDeviceData(ov_hess_vec_prod_f_xp_out); + auto ov_hess_vec_prod_f_px_out_data = Albany::getNonconstDeviceData(ov_hess_vec_prod_f_px_out); + auto ov_hess_vec_prod_f_pp_out_data = Albany::getNonconstDeviceData(ov_hess_vec_prod_f_pp_out); + + auto ov_hess_vec_prod_f_xx_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_f_xx_out_data); + auto ov_hess_vec_prod_f_xp_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_f_xp_out_data); + auto ov_hess_vec_prod_f_px_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_f_px_out_data); + auto ov_hess_vec_prod_f_pp_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_f_pp_out_data); auto p_elem_dof_lids = p_dof_mgr->elem_dof_lids().host(); auto x_elem_dof_lids = x_dof_mgr->elem_dof_lids().host(); @@ -485,14 +500,19 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank1) auto p_dof_lids = Kokkos::subview(p_elem_dof_lids,cell,ALL); auto x_dof_lids = Kokkos::subview(x_elem_dof_lids,cell,ALL); for (size_t i=0; icombine(ov_hess_vec_prod_f_px, hess_vec_prod_f_px, ADD); p_cas_manager->combine(ov_hess_vec_prod_f_pp, hess_vec_prod_f_pp, ADD); - Thyra::V_VmV(diff_x_out.ptr(), *one_x_out, *hess_vec_prod_f_xx); + Thyra::V_VmV(diff_x_out->col(0).ptr(), *(one_x_out->col(0)), *(hess_vec_prod_f_xx->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_xx_out", *one_x_out, - "hess_vec_prod_f_xx", *diff_x_out, + "hess_vec_prod_f_xx_out", *(one_x_out->col(0)), + "hess_vec_prod_f_xx", *(diff_x_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); - Thyra::V_VmV(diff_x_out.ptr(), *one_x_out, *hess_vec_prod_f_xp); + Thyra::V_VmV(diff_x_out->col(0).ptr(), *(one_x_out->col(0)), *(hess_vec_prod_f_xp->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_xp_out", *one_x_out, - "hess_vec_prod_f_xp", *diff_x_out, + "hess_vec_prod_f_xp_out", *(one_x_out->col(0)), + "hess_vec_prod_f_xp", *(diff_x_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); - Thyra::V_VmV(diff_p_out.ptr(), *one_p_out, *hess_vec_prod_f_px); + Thyra::V_VmV(diff_p_out->col(0).ptr(), *(one_p_out->col(0)), *(hess_vec_prod_f_px->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_px_out", *one_p_out, - "hess_vec_prod_f_px", *diff_p_out, + "hess_vec_prod_f_px_out", *(one_p_out->col(0)), + "hess_vec_prod_f_px", *(diff_p_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); - Thyra::V_VmV(diff_p_out.ptr(), *one_p_out, *hess_vec_prod_f_pp); + Thyra::V_VmV(diff_p_out->col(0).ptr(), *(one_p_out->col(0)), *(hess_vec_prod_f_pp->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_pp_out", *one_p_out, - "hess_vec_prod_f_pp", *diff_p_out, + "hess_vec_prod_f_pp_out", *(one_p_out->col(0)), + "hess_vec_prod_f_pp", *(diff_p_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -633,8 +653,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank1) TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_xx_out", *ov_hess_vec_prod_f_xx_out, - "hess_vec_prod_f_xx", *ov_hess_vec_prod_f_xx, + "hess_vec_prod_f_xx_out", *(ov_hess_vec_prod_f_xx_out->col(0)), + "hess_vec_prod_f_xx", *(ov_hess_vec_prod_f_xx->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -663,8 +683,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank1) TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_xp_out", *hess_vec_prod_f_xp_out, - "hess_vec_prod_f_xp", *hess_vec_prod_f_xp, + "hess_vec_prod_f_xp_out", *(hess_vec_prod_f_xp_out->col(0)), + "hess_vec_prod_f_xp", *(hess_vec_prod_f_xp->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -693,8 +713,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank1) TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_px_out", *hess_vec_prod_f_px_out, - "hess_vec_prod_f_px", *hess_vec_prod_f_px, + "hess_vec_prod_f_px_out", *(hess_vec_prod_f_px_out->col(0)), + "hess_vec_prod_f_px", *(hess_vec_prod_f_px->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -723,8 +743,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank1) TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_pp_out", *hess_vec_prod_f_pp_out, - "hess_vec_prod_f_pp", *hess_vec_prod_f_pp, + "hess_vec_prod_f_pp_out", *(hess_vec_prod_f_pp_out->col(0)), + "hess_vec_prod_f_pp", *(hess_vec_prod_f_pp->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); diff --git a/tests/unit/evaluators/ScatterScalarResponse.cpp b/tests/unit/evaluators/ScatterScalarResponse.cpp index f9b0728ebf..a41f2ee012 100644 --- a/tests/unit/evaluators/ScatterScalarResponse.cpp +++ b/tests/unit/evaluators/ScatterScalarResponse.cpp @@ -74,31 +74,31 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, separableScatterScalarResponseHessianVe auto overlapped_p_space = p_dof_mgr->ov_indexer()->getVectorSpace(); auto overlapped_x_space = x_dof_mgr->ov_indexer()->getVectorSpace(); - const auto hess_vec_prod_g_xx = Thyra::createMember(x_space); - const auto hess_vec_prod_g_xp = Thyra::createMember(x_space); - const auto hess_vec_prod_g_px = Thyra::createMember(p_space); - const auto hess_vec_prod_g_pp = Thyra::createMember(p_space); + const auto hess_vec_prod_g_xx = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_g_xp = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_g_px = Thyra::createMembers(p_space,1); + const auto hess_vec_prod_g_pp = Thyra::createMembers(p_space,1); - const auto overlapped_hess_vec_prod_g_xx = Thyra::createMember(overlapped_x_space); - const auto overlapped_hess_vec_prod_g_xp = Thyra::createMember(overlapped_x_space); - const auto overlapped_hess_vec_prod_g_px = Thyra::createMember(overlapped_p_space); - const auto overlapped_hess_vec_prod_g_pp = Thyra::createMember(overlapped_p_space); + const auto overlapped_hess_vec_prod_g_xx = Thyra::createMembers(overlapped_x_space,1); + const auto overlapped_hess_vec_prod_g_xp = Thyra::createMembers(overlapped_x_space,1); + const auto overlapped_hess_vec_prod_g_px = Thyra::createMembers(overlapped_p_space,1); + const auto overlapped_hess_vec_prod_g_pp = Thyra::createMembers(overlapped_p_space,1); - const auto diff_x_out = Thyra::createMember(x_space); - const auto diff_p_out = Thyra::createMember(p_space); - const auto one_x_out = Thyra::createMember(x_space); - const auto one_p_out = Thyra::createMember(p_space); + const auto diff_x_out = Thyra::createMembers(x_space,1); + const auto diff_p_out = Thyra::createMembers(p_space,1); + const auto one_x_out = Thyra::createMembers(x_space,1); + const auto one_p_out = Thyra::createMembers(p_space,1); - const auto hess_vec_prod_g_xx_out = Thyra::createMember(x_space); - const auto hess_vec_prod_g_xp_out = Thyra::createMember(x_space); - const auto hess_vec_prod_g_px_out = Thyra::createMember(p_space); - const auto hess_vec_prod_g_pp_out = Thyra::createMember(p_space); + const auto hess_vec_prod_g_xx_out = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_g_xp_out = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_g_px_out = Thyra::createMembers(p_space,1); + const auto hess_vec_prod_g_pp_out = Thyra::createMembers(p_space,1); - const auto overlapped_hess_vec_prod_g_xx_out = Thyra::createMember(overlapped_x_space); - const auto overlapped_hess_vec_prod_g_xp_out = Thyra::createMember(overlapped_x_space); - const auto overlapped_hess_vec_prod_g_px_out = Thyra::createMember(overlapped_p_space); - const auto overlapped_hess_vec_prod_g_pp_out = Thyra::createMember(overlapped_p_space); + const auto overlapped_hess_vec_prod_g_xx_out = Thyra::createMembers(overlapped_x_space,1); + const auto overlapped_hess_vec_prod_g_xp_out = Thyra::createMembers(overlapped_x_space,1); + const auto overlapped_hess_vec_prod_g_px_out = Thyra::createMembers(overlapped_p_space,1); + const auto overlapped_hess_vec_prod_g_pp_out = Thyra::createMembers(overlapped_p_space,1); overlapped_hess_vec_prod_g_xx->assign(0.0); overlapped_hess_vec_prod_g_xp->assign(0.0); @@ -114,10 +114,15 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, separableScatterScalarResponseHessianVe overlapped_hess_vec_prod_g_pp_out->assign(0.0); { - auto ov_hess_vec_prod_g_xx_out_data = Albany::getNonconstLocalData(overlapped_hess_vec_prod_g_xx_out); - auto ov_hess_vec_prod_g_xp_out_data = Albany::getNonconstLocalData(overlapped_hess_vec_prod_g_xp_out); - auto ov_hess_vec_prod_g_px_out_data = Albany::getNonconstLocalData(overlapped_hess_vec_prod_g_px_out); - auto ov_hess_vec_prod_g_pp_out_data = Albany::getNonconstLocalData(overlapped_hess_vec_prod_g_pp_out); + auto ov_hess_vec_prod_g_xx_out_data = Albany::getNonconstDeviceData(overlapped_hess_vec_prod_g_xx_out); + auto ov_hess_vec_prod_g_xp_out_data = Albany::getNonconstDeviceData(overlapped_hess_vec_prod_g_xp_out); + auto ov_hess_vec_prod_g_px_out_data = Albany::getNonconstDeviceData(overlapped_hess_vec_prod_g_px_out); + auto ov_hess_vec_prod_g_pp_out_data = Albany::getNonconstDeviceData(overlapped_hess_vec_prod_g_pp_out); + + auto ov_hess_vec_prod_g_xx_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_g_xx_out_data); + auto ov_hess_vec_prod_g_xp_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_g_xp_out_data); + auto ov_hess_vec_prod_g_px_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_g_px_out_data); + auto ov_hess_vec_prod_g_pp_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_g_pp_out_data); auto p_elem_dof_lids = p_dof_mgr->elem_dof_lids().host(); auto x_elem_dof_lids = x_dof_mgr->elem_dof_lids().host(); @@ -126,14 +131,19 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, separableScatterScalarResponseHessianVe auto p_dof_lids = Kokkos::subview(p_elem_dof_lids,cell,ALL); auto x_dof_lids = Kokkos::subview(x_elem_dof_lids,cell,ALL); for (size_t i=0; icombine(overlapped_hess_vec_prod_g_px, hess_vec_prod_g_px, ADD); p_cas_manager->combine(overlapped_hess_vec_prod_g_pp, hess_vec_prod_g_pp, ADD); - Thyra::V_VmV(diff_x_out.ptr(), *one_x_out, *hess_vec_prod_g_xx); + std::cout << "Running hess_vec_prod_g_**...\n"; + Thyra::V_VmV(diff_x_out->col(0).ptr(), *(one_x_out->col(0)), *(hess_vec_prod_g_xx->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_g_xx_out", *one_x_out, - "hess_vec_prod_g_xx", *diff_x_out, + "hess_vec_prod_g_xx_out", *(one_x_out->col(0)), + "hess_vec_prod_g_xx", *(diff_x_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); - Thyra::V_VmV(diff_x_out.ptr(), *one_x_out, *hess_vec_prod_g_xp); + Thyra::V_VmV(diff_x_out->col(0).ptr(), *(one_x_out->col(0)), *(hess_vec_prod_g_xp->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_g_xp_out", *one_x_out, - "hess_vec_prod_g_xp", *diff_x_out, + "hess_vec_prod_g_xp_out", *(one_x_out->col(0)), + "hess_vec_prod_g_xp", *(diff_x_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); - Thyra::V_VmV(diff_p_out.ptr(), *one_p_out, *hess_vec_prod_g_px); + Thyra::V_VmV(diff_p_out->col(0).ptr(), *(one_p_out->col(0)), *(hess_vec_prod_g_px->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_g_px_out", *one_p_out, - "hess_vec_prod_g_px", *diff_p_out, + "hess_vec_prod_g_px_out", *(one_p_out->col(0)), + "hess_vec_prod_g_px", *(diff_p_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); - Thyra::V_VmV(diff_p_out.ptr(), *one_p_out, *hess_vec_prod_g_pp); + Thyra::V_VmV(diff_p_out->col(0).ptr(), *(one_p_out->col(0)), *(hess_vec_prod_g_pp->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_g_pp_out", *one_p_out, - "hess_vec_prod_g_pp", *diff_p_out, + "hess_vec_prod_g_pp_out", *(one_p_out->col(0)), + "hess_vec_prod_g_pp", *(diff_p_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -286,8 +297,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, separableScatterScalarResponseHessianVe TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_g_xx_out", *hess_vec_prod_g_xx_out, - "hess_vec_prod_g_xx", *hess_vec_prod_g_xx, + "hess_vec_prod_g_xx_out", *(hess_vec_prod_g_xx_out->col(0)), + "hess_vec_prod_g_xx", *(hess_vec_prod_g_xx->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -316,8 +327,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, separableScatterScalarResponseHessianVe TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_g_xp_out", *hess_vec_prod_g_xp_out, - "hess_vec_prod_g_xp", *hess_vec_prod_g_xp, + "hess_vec_prod_g_xp_out", *(hess_vec_prod_g_xp_out->col(0)), + "hess_vec_prod_g_xp", *(hess_vec_prod_g_xp->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -346,8 +357,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, separableScatterScalarResponseHessianVe TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_g_px_out", *hess_vec_prod_g_px_out, - "hess_vec_prod_g_px", *hess_vec_prod_g_px, + "hess_vec_prod_g_px_out", *(hess_vec_prod_g_px_out->col(0)), + "hess_vec_prod_g_px", *(hess_vec_prod_g_px->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -376,8 +387,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, separableScatterScalarResponseHessianVe TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_g_pp_out", *hess_vec_prod_g_pp_out, - "hess_vec_prod_g_pp", *hess_vec_prod_g_pp, + "hess_vec_prod_g_pp_out", *(hess_vec_prod_g_pp_out->col(0)), + "hess_vec_prod_g_pp", *(hess_vec_prod_g_pp->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); From 3d1babc0c37210ba6fc2856ce722ceac80fb3cde Mon Sep 17 00:00:00 2001 From: mcarlson801 Date: Tue, 15 Oct 2024 14:46:20 -0600 Subject: [PATCH 08/11] Fix warnings --- .../PHAL_GatherScalarNodalParameter_Def.hpp | 4 +--- .../PHAL_ResponseSquaredL2DifferenceSide.hpp | 4 ++-- .../scatter/PHAL_ScatterResidual_Def.hpp | 20 +++++++++---------- ...HAL_SeparableScatterScalarResponse_Def.hpp | 2 +- 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/evaluators/gather/PHAL_GatherScalarNodalParameter_Def.hpp b/src/evaluators/gather/PHAL_GatherScalarNodalParameter_Def.hpp index bd3ce3c998..cda028b8aa 100644 --- a/src/evaluators/gather/PHAL_GatherScalarNodalParameter_Def.hpp +++ b/src/evaluators/gather/PHAL_GatherScalarNodalParameter_Def.hpp @@ -230,11 +230,9 @@ evaluateFields(typename Traits::EvalData workset) }); if (Vp != Teuchos::null) { - const int num_cols = Vp->domain()->dim(); - + const int num_cols = Vp->domain()->dim(); const int num_dofs = elem_dof_lids.extent(1); if (trans) { - const int num_dofs = elem_dof_lids.extent(1); workset.local_Vp = Kokkos::View("local_Vp",workset.numCells,num_dofs,num_cols); } else { workset.local_Vp = Kokkos::View("local_Vp",workset.numCells,num_deriv,num_cols); diff --git a/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide.hpp b/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide.hpp index 56bec8ea1e..5c22cba59c 100644 --- a/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide.hpp +++ b/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide.hpp @@ -46,8 +46,8 @@ class ResponseSquaredL2DifferenceSideBase : public PHAL::SeparableScatterScalarR int sideDim; int numQPs; int fieldDim; - int dims_2; - int dims_3; + size_t dims_2; + size_t dims_3; std::vector dims; bool target_value, rmsScaling, extrudedParams, isFieldGradient; diff --git a/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp b/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp index 9a583dd9e0..2ae615edb5 100644 --- a/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp +++ b/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp @@ -531,7 +531,7 @@ evaluateFields(typename Traits::EvalData workset) Albany::DualView basal_elem_LIDs("basal_elem_LIDs", elem_lids_host.size()); Albany::DualView field_elem_LIDs("field_elem_LIDs", elem_lids_host.size()); - for (int i = 0; i < elem_lids_host.size(); ++i) { + for (size_t i = 0; i < elem_lids_host.size(); ++i) { basal_elem_LIDs.host()(i) = cell_layers_data->getColumnId(i); field_elem_LIDs.host()(i) = cell_layers_data->getId(i,fieldLayer); } @@ -543,13 +543,13 @@ evaluateFields(typename Traits::EvalData workset) Albany::DualView top_nodes_dv("top_nodes",top_nodes.size()); Albany::DualView bot_nodes_dv("bot_nodes",bot_nodes.size()); - for (int i = 0; i < p_offsets.size(); ++i) { + for (size_t i = 0; i < p_offsets.size(); ++i) { p_offsets_dv.host()(i) = p_offsets[i]; } - for (int i = 0; i < top_nodes.size(); ++i) { + for (size_t i = 0; i < top_nodes.size(); ++i) { top_nodes_dv.host()(i) = top_nodes[i]; } - for (int i = 0; i < bot_nodes.size(); ++i) { + for (size_t i = 0; i < bot_nodes.size(); ++i) { bot_nodes_dv.host()(i) = bot_nodes[i]; } @@ -710,8 +710,8 @@ evaluateFields(typename Traits::EvalData workset) const auto elem_dof_lids = dof_mgr->elem_dof_lids().dev(); - const int hess_vec_prod_f_px_data_size = hess_vec_prod_f_px_data.extent(0); - const int hess_vec_prod_f_pp_data_size = hess_vec_prod_f_pp_data.extent(0); + const size_t hess_vec_prod_f_px_data_size = hess_vec_prod_f_px_data.extent(0); + const size_t hess_vec_prod_f_pp_data_size = hess_vec_prod_f_pp_data.extent(0); const auto& fields_offsets = m_fields_offsets.dev(); const int eq_offset = this->offset; @@ -863,7 +863,7 @@ evaluate2DFieldsDerivativesDueToExtrudedParams(typename Traits::EvalData workset Albany::DualView basal_elem_LIDs("basal_elem_LIDs", elem_lids_host.size()); Albany::DualView field_elem_LIDs("field_elem_LIDs", elem_lids_host.size()); - for (int i = 0; i < elem_lids_host.size(); ++i) { + for (size_t i = 0; i < elem_lids_host.size(); ++i) { basal_elem_LIDs.host()(i) = layers_data->getColumnId(i); field_elem_LIDs.host()(i) = layers_data->getId(i,fieldLayer); } @@ -875,13 +875,13 @@ evaluate2DFieldsDerivativesDueToExtrudedParams(typename Traits::EvalData workset Albany::DualView top_offsets_dv("top_offsets",top_offsets.size()); Albany::DualView bot_offsets_dv("bot_offsets",bot_offsets.size()); - for (int i = 0; i < p_offsets.size(); ++i) { + for (size_t i = 0; i < p_offsets.size(); ++i) { p_offsets_dv.host()(i) = p_offsets[i]; } - for (int i = 0; i < top_offsets.size(); ++i) { + for (size_t i = 0; i < top_offsets.size(); ++i) { top_offsets_dv.host()(i) = top_offsets[i]; } - for (int i = 0; i < bot_offsets.size(); ++i) { + for (size_t i = 0; i < bot_offsets.size(); ++i) { bot_offsets_dv.host()(i) = bot_offsets[i]; } diff --git a/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp b/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp index 7932dc7928..c1700f0c68 100644 --- a/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp +++ b/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp @@ -518,7 +518,7 @@ evaluateFields(typename Traits::EvalData workset) const int ws = workset.wsIndex; const int neq = dof_mgr->getNumFields(); - const int num_responses = this->global_response.size(); + const size_t num_responses = this->global_response.size(); const int g_px_size = hess_vec_prod_g_px_data.extent(0); const int g_pp_size = hess_vec_prod_g_pp_data.extent(0); From d2e177b5e865c6d65091eb8901672b0cb727c215 Mon Sep 17 00:00:00 2001 From: mcarlson801 Date: Thu, 31 Oct 2024 09:01:21 -0700 Subject: [PATCH 09/11] Move temporary data movement out of ScatterResidual and into Albany DOFManager to avoid excess transfers --- src/disc/Albany_DOFManager.cpp | 60 ++++++++++ src/disc/Albany_DOFManager.hpp | 12 ++ .../scatter/PHAL_ScatterResidual_Def.hpp | 108 +++++------------- 3 files changed, 99 insertions(+), 81 deletions(-) diff --git a/src/disc/Albany_DOFManager.cpp b/src/disc/Albany_DOFManager.cpp index 4c980e1b90..886165b17f 100644 --- a/src/disc/Albany_DOFManager.cpp +++ b/src/disc/Albany_DOFManager.cpp @@ -65,6 +65,11 @@ void DOFManager::build () // 4. Possibly restrict the DOFs list restrict (part_name()); + // Allocate/populate nodes view + convertVecsToViews (m_subcell_closures, m_subcell_closures_view, "m_subcell_closures_view"); + if (m_topo_dim>2 && m_top_bot_well_defined) + convertVecsToViews (m_side_closure_orderd_as_side, m_side_closure_orderd_as_side_view, "m_side_closure_orderd_as_side_view"); + // Done m_built = true; } @@ -161,6 +166,16 @@ getGIDFieldOffsetsSubcell (int fieldNum, return m_subcell_closures[fieldNum][subcell_dim][subcell_pos]; } +Kokkos::Subview,int,int,int,decltype(Kokkos::ALL)> +DOFManager:: +getGIDFieldOffsetsSubcellKokkos (int fieldNum, + int subcell_dim, + int subcell_pos) const +{ + return Kokkos::subview(m_subcell_closures_view,fieldNum,subcell_dim,subcell_pos,Kokkos::ALL); +} + + const std::vector& DOFManager:: getGIDFieldOffsetsSide (int fieldNum, int side) const @@ -168,6 +183,13 @@ getGIDFieldOffsetsSide (int fieldNum, int side) const return getGIDFieldOffsetsSubcell(fieldNum,m_topo_dim-1,side); } +Kokkos::Subview,int,int,int,decltype(Kokkos::ALL)> +DOFManager:: +getGIDFieldOffsetsSideKokkos (int fieldNum, int side) const +{ + return getGIDFieldOffsetsSubcellKokkos(fieldNum,m_topo_dim-1,side); +} + const std::vector& DOFManager:: getGIDFieldOffsetsSide (int fieldNum, int side, const int orderAsInSide) const @@ -191,6 +213,44 @@ getGIDFieldOffsetsSide (int fieldNum, int side, const int orderAsInSide) const return m_side_closure_orderd_as_side[fieldNum][side][orderAsInSide]; } +Kokkos::Subview,int,int,int,decltype(Kokkos::ALL)> +DOFManager:: +getGIDFieldOffsetsSideKokkos(int fieldNum, int side, const int orderAsInSide) const +{ + return Kokkos::subview(m_side_closure_orderd_as_side_view,fieldNum,side,orderAsInSide,Kokkos::ALL); +} + +void +DOFManager:: +convertVecsToViews (vec4int& vec, Kokkos::View& view, std::string view_name) +{ + size_t max_size_i = vec.size(); + size_t max_size_j = 0; size_t max_size_k = 0; size_t max_size_l = 0; + for (size_t i=0; i < vec.size(); ++i) { + max_size_j = std::max(max_size_j, vec[i].size()); + for (size_t j=0; j < vec[i].size(); ++j) { + max_size_k = std::max(max_size_k, vec[i][j].size()); + for (size_t k=0; k < vec[i][j].size(); ++k) { + max_size_l = std::max(max_size_l, vec[i][j][k].size()); + } + } + } + + view = Kokkos::View(view_name, + max_size_i, + max_size_j, + max_size_k, + max_size_l); + + const auto view_host = Kokkos::create_mirror_view(view); + for (size_t i=0; i < vec.size(); ++i) + for (size_t j=0; j < vec[i].size(); ++j) + for (size_t k=0; k < vec[i][j].size(); ++k) + for (size_t l=0; l < vec[i][j][k].size(); ++l) + view_host(i,j,k,l) = vec[i][j][k][l]; + Kokkos::deep_copy(view,view_host); +} + void DOFManager:: restrict (const std::string& sub_part_name) { diff --git a/src/disc/Albany_DOFManager.hpp b/src/disc/Albany_DOFManager.hpp index 5be2832ec9..c121086b9b 100644 --- a/src/disc/Albany_DOFManager.hpp +++ b/src/disc/Albany_DOFManager.hpp @@ -66,11 +66,15 @@ class DOFManager : public panzer::DOFManager { using panzer::DOFManager::getGIDFieldOffsets_closure; const std::vector& getGIDFieldOffsetsSubcell (int fieldNum, int subcell_dim, int subcell_pos) const; + Kokkos::Subview,int,int,int,decltype(Kokkos::ALL)> + getGIDFieldOffsetsSubcellKokkos (int fieldNum, int subcell_dim, int subcell_pos) const; // Special case of the above, for subcell being the top or bottom side // NOTE: only for quad/hexa/wedge const std::vector& getGIDFieldOffsetsSide (int fieldNum, int side) const; + Kokkos::Subview,int,int,int,decltype(Kokkos::ALL)> + getGIDFieldOffsetsSideKokkos (int fieldNum, int side) const; // If side!=orderedAsInSide, this version returns side offsets ordered // in such a way that off[i] on side=$side is directly above/below @@ -78,6 +82,8 @@ class DOFManager : public panzer::DOFManager { // This makes sense ONLY IF $side and $orderAsInSide are both in {top,bot} const std::vector& getGIDFieldOffsetsSide (int fieldNum, int side, int orderAsInSide) const; + Kokkos::Subview,int,int,int,decltype(Kokkos::ALL)> + getGIDFieldOffsetsSideKokkos (int fieldNum, int side, int orderAsInSide) const; const std::string& part_name () const { return m_part_name; @@ -123,10 +129,15 @@ class DOFManager : public panzer::DOFManager { DualView m_elem_dof_lids; using vec4int = std::vector>>>; + + // Convert DOF vec4ints to Kokkos Views for device access + void convertVecsToViews (vec4int& vec, Kokkos::View& view, std::string view_name); + // m_subcell_closures[ifield][dim][ord] is the vector of the offsets of // field $ifield on the $ord-th subcell of dimension $dim. More precisely, // it's the closure of all offsets on all entities belonging to that subcell vec4int m_subcell_closures; + Kokkos::View m_subcell_closures_view; // Shortcut for location of top/bot sides in the cell list of sides. bool m_top_bot_well_defined = false; @@ -145,6 +156,7 @@ class DOFManager : public panzer::DOFManager { // dof at offset m_subcell_closures[F][side_dim][top] // NOTE: this vec4int m_side_closure_orderd_as_side; + Kokkos::View m_side_closure_orderd_as_side_view; Teuchos::RCP m_conn_mgr; diff --git a/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp b/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp index 2ae615edb5..74f487f375 100644 --- a/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp +++ b/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp @@ -523,45 +523,17 @@ evaluateFields(typename Traits::EvalData workset) const auto p = workset.distParamLib->get(workset.dist_param_deriv_name); const auto p_dof_mgr = p->get_dof_mgr(); const auto p_elem_dof_lids = p->get_dof_mgr()->elem_dof_lids().dev(); - const auto p_offsets = p_dof_mgr->getGIDFieldOffsetsSide(0,field_pos); - const auto top_nodes = node_dof_mgr->getGIDFieldOffsetsSide(0,top,field_pos); - const auto bot_nodes = node_dof_mgr->getGIDFieldOffsetsSide(0,bot,field_pos); + const auto p_offsets = p_dof_mgr->getGIDFieldOffsetsSideKokkos(0,field_pos); + const auto top_nodes = node_dof_mgr->getGIDFieldOffsetsSideKokkos(0,top,field_pos); + const auto bot_nodes = node_dof_mgr->getGIDFieldOffsetsSideKokkos(0,bot,field_pos); const auto elem_lids_host = Kokkos::subview(elem_lids_ws.host(),ws,Kokkos::ALL); - Albany::DualView basal_elem_LIDs("basal_elem_LIDs", elem_lids_host.size()); - Albany::DualView field_elem_LIDs("field_elem_LIDs", elem_lids_host.size()); - for (size_t i = 0; i < elem_lids_host.size(); ++i) { - basal_elem_LIDs.host()(i) = cell_layers_data->getColumnId(i); - field_elem_LIDs.host()(i) = cell_layers_data->getId(i,fieldLayer); - } - - basal_elem_LIDs.sync_to_dev(); - field_elem_LIDs.sync_to_dev(); - - Albany::DualView p_offsets_dv("p_offsets",p_offsets.size()); - Albany::DualView top_nodes_dv("top_nodes",top_nodes.size()); - Albany::DualView bot_nodes_dv("bot_nodes",bot_nodes.size()); - - for (size_t i = 0; i < p_offsets.size(); ++i) { - p_offsets_dv.host()(i) = p_offsets[i]; - } - for (size_t i = 0; i < top_nodes.size(); ++i) { - top_nodes_dv.host()(i) = top_nodes[i]; - } - for (size_t i = 0; i < bot_nodes.size(); ++i) { - bot_nodes_dv.host()(i) = bot_nodes[i]; - } - - p_offsets_dv.sync_to_dev(); - top_nodes_dv.sync_to_dev(); - bot_nodes_dv.sync_to_dev(); - - const auto p_offsets_dev = p_offsets_dv.dev(); - const auto top_nodes_dev = top_nodes_dv.dev(); - const auto bot_nodes_dev = bot_nodes_dv.dev(); + const auto layerOrd = cell_layers_data->layerOrd; + const auto numHorizEntities = cell_layers_data->numHorizEntities; + const auto numLayers = cell_layers_data->numLayers; - const int num_nodes_side = p_offsets.size(); + const int num_nodes_side = p_dof_mgr->getGIDFieldOffsetsSide(0,field_pos).size(); // Pick a cell layer that contains the field level. Can be same as fieldLevel, // except for the last level. @@ -569,15 +541,16 @@ evaluateFields(typename Traits::EvalData workset) KOKKOS_CLASS_LAMBDA(const int& cell) { const auto elem_LID = elem_lids(cell); - const LO basal_elem_LID = basal_elem_LIDs.dev()(elem_LID); - const LO field_elem_LID = field_elem_LIDs.dev()(basal_elem_LID); + const LO basal_elem_LID = layerOrd ? elem_LID % numHorizEntities : elem_LID / numLayers; + const LO field_elem_LID = layerOrd ? basal_elem_LID + fieldLayer*numHorizEntities : + basal_elem_LID * numLayers + fieldLayer; // Bottom nodes for (int i=0; igetGIDFieldOffsetsSide(0,top,field_pos); - const auto bot_offsets = p_dof_mgr->getGIDFieldOffsetsSide(0,bot,field_pos); + const auto top_offsets = p_dof_mgr->getGIDFieldOffsetsSideKokkos(0,top,field_pos); + const auto bot_offsets = p_dof_mgr->getGIDFieldOffsetsSideKokkos(0,bot,field_pos); const auto p_offsets = fieldLevel==fieldLayer ? bot_offsets : top_offsets; - const auto numSideNodes = p_offsets.size(); + const auto numSideNodes = p_dof_mgr->getGIDFieldOffsetsSide(0,top,field_pos).size(); const auto elem_lids_host = Kokkos::subview(elem_lids_ws.host(),ws,Kokkos::ALL); - Albany::DualView basal_elem_LIDs("basal_elem_LIDs", elem_lids_host.size()); - Albany::DualView field_elem_LIDs("field_elem_LIDs", elem_lids_host.size()); - - for (size_t i = 0; i < elem_lids_host.size(); ++i) { - basal_elem_LIDs.host()(i) = layers_data->getColumnId(i); - field_elem_LIDs.host()(i) = layers_data->getId(i,fieldLayer); - } - - basal_elem_LIDs.sync_to_dev(); - field_elem_LIDs.sync_to_dev(); - - Albany::DualView p_offsets_dv("p_offsets",p_offsets.size()); - Albany::DualView top_offsets_dv("top_offsets",top_offsets.size()); - Albany::DualView bot_offsets_dv("bot_offsets",bot_offsets.size()); - - for (size_t i = 0; i < p_offsets.size(); ++i) { - p_offsets_dv.host()(i) = p_offsets[i]; - } - for (size_t i = 0; i < top_offsets.size(); ++i) { - top_offsets_dv.host()(i) = top_offsets[i]; - } - for (size_t i = 0; i < bot_offsets.size(); ++i) { - bot_offsets_dv.host()(i) = bot_offsets[i]; - } - - p_offsets_dv.sync_to_dev(); - top_offsets_dv.sync_to_dev(); - bot_offsets_dv.sync_to_dev(); - const auto p_offsets_dev = p_offsets_dv.dev(); - const auto top_offsets_dev = top_offsets_dv.dev(); - const auto bot_offsets_dev = bot_offsets_dv.dev(); + const auto layerOrd = layers_data->layerOrd; + const auto numHorizEntities = layers_data->numHorizEntities; + const auto numLayers = layers_data->numLayers; const auto offsets = m_fields_offsets.dev(); Kokkos::parallel_for(this->getName(),RangePolicy(0,workset.numCells), @@ -908,14 +853,15 @@ evaluate2DFieldsDerivativesDueToExtrudedParams(typename Traits::EvalData workset } } - const LO basal_elem_LID = basal_elem_LIDs.dev()(elem_LID); - const LO field_elem_LID = field_elem_LIDs.dev()(basal_elem_LID); + const LO basal_elem_LID = layerOrd ? elem_LID % numHorizEntities : elem_LID / numLayers; + const LO field_elem_LID = layerOrd ? basal_elem_LID + fieldLayer*numHorizEntities : + basal_elem_LID * numLayers + fieldLayer; // do bot_offsets for (std::size_t node=0; node=0) { - const auto& dx = value.dx(bot_offsets_dev(node)).dx(0); + const auto& dx = value.dx(bot_offsets(node)).dx(0); if (f_px_is_active) KU::atomic_add(&(hess_vec_prod_f_px_data(row,0)), dx); if (f_pp_is_active) @@ -925,9 +871,9 @@ evaluate2DFieldsDerivativesDueToExtrudedParams(typename Traits::EvalData workset // do top_offsets for (std::size_t node=0; node=0) { - const auto& dx = value.dx(top_offsets_dev(node)).dx(0); + const auto& dx = value.dx(top_offsets(node)).dx(0); if (f_px_is_active) KU::atomic_add(&(hess_vec_prod_f_px_data(row,0)), dx); if (f_pp_is_active) From eb3bcbb34b31979821079ed2bac077968992d770 Mon Sep 17 00:00:00 2001 From: mcarlson801 Date: Thu, 31 Oct 2024 13:08:15 -0700 Subject: [PATCH 10/11] Cleanup and adding comment --- src/disc/Albany_LayeredMeshNumbering.hpp | 3 --- src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp | 4 ++++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/disc/Albany_LayeredMeshNumbering.hpp b/src/disc/Albany_LayeredMeshNumbering.hpp index eec52f24d4..f66180585a 100644 --- a/src/disc/Albany_LayeredMeshNumbering.hpp +++ b/src/disc/Albany_LayeredMeshNumbering.hpp @@ -38,7 +38,6 @@ struct LayeredMeshNumbering { numLayers = _numLayers; } - KOKKOS_INLINE_FUNCTION T getId(const T column_id, const T level_index) const { return layerOrd ? column_id + level_index*numHorizEntities : column_id * numLayers + level_index; @@ -63,12 +62,10 @@ struct LayeredMeshNumbering { return layerOrd ? 1 : numLayers; } - KOKKOS_INLINE_FUNCTION T getColumnId (const T id) const { return layerOrd ? id % numHorizEntities : id / numLayers; } - KOKKOS_INLINE_FUNCTION T getLayerId (const T id) const { return layerOrd ? id / numHorizEntities : id % numLayers; } diff --git a/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp b/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp index 74f487f375..c1bcebd2dd 100644 --- a/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp +++ b/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp @@ -533,6 +533,10 @@ evaluateFields(typename Traits::EvalData workset) const auto numHorizEntities = cell_layers_data->numHorizEntities; const auto numLayers = cell_layers_data->numLayers; + // Note: The DOFManager stores offsets in nested vectors of non-uniform length. In order to + // make the offsets available on device, they were converted to a single kokkos view large enough + // to hold all of the vectors. A side effect is that array bounds can't be obtained from the kokkos view + // extents and have to be obtained from the non-kokkos offsets vector or from another source. const int num_nodes_side = p_dof_mgr->getGIDFieldOffsetsSide(0,field_pos).size(); // Pick a cell layer that contains the field level. Can be same as fieldLevel, From 6e7ef8d83d2e947457899fd438bb1c2f113e9e2f Mon Sep 17 00:00:00 2001 From: mcarlson801 Date: Mon, 18 Nov 2024 15:38:37 -0800 Subject: [PATCH 11/11] Port some more gather specializations for extruded mesh --- .../PHAL_GatherScalarNodalParameter.hpp | 6 ++ .../PHAL_GatherScalarNodalParameter_Def.hpp | 79 +++++++++++-------- 2 files changed, 53 insertions(+), 32 deletions(-) diff --git a/src/evaluators/gather/PHAL_GatherScalarNodalParameter.hpp b/src/evaluators/gather/PHAL_GatherScalarNodalParameter.hpp index 47d46be822..08ec85e659 100644 --- a/src/evaluators/gather/PHAL_GatherScalarNodalParameter.hpp +++ b/src/evaluators/gather/PHAL_GatherScalarNodalParameter.hpp @@ -87,6 +87,9 @@ class GatherScalarExtruded2DNodalParameter : private: typedef typename EvalT::ParamScalarT ParamScalarT; const int fieldLevel; +protected: + using ExecutionSpace = typename PHX::Device::execution_space; + using RangePolicy = Kokkos::RangePolicy; }; @@ -422,6 +425,9 @@ class GatherScalarExtruded2DNodalParameter; }; } // namespace PHAL diff --git a/src/evaluators/gather/PHAL_GatherScalarNodalParameter_Def.hpp b/src/evaluators/gather/PHAL_GatherScalarNodalParameter_Def.hpp index cda028b8aa..b1750f4440 100644 --- a/src/evaluators/gather/PHAL_GatherScalarNodalParameter_Def.hpp +++ b/src/evaluators/gather/PHAL_GatherScalarNodalParameter_Def.hpp @@ -113,7 +113,13 @@ evaluateFields(typename Traits::EvalData workset) const auto bot = layers_data->bot_side_pos; const auto top = layers_data->top_side_pos; const auto ws = workset.wsIndex; - const auto elem_lids = workset.disc->getElementLIDs_host(ws); + const auto elem_lids_ws = workset.disc->getWsElementLIDs(); + const auto elem_lids = Kokkos::subview(elem_lids_ws.dev(),ws,Kokkos::ALL); + + // Grab some info from the layers data for device access + const auto layerOrd = layers_data->layerOrd; + const auto numHorizEntities = layers_data->numHorizEntities; + const auto numLayers = layers_data->numLayers; // Pick element layer that contains the field level const auto fieldLayer = fieldLevel==layers_data->numLayers @@ -122,34 +128,35 @@ evaluateFields(typename Traits::EvalData workset) // Distributed parameter vector const auto& p = workset.distParamLib->get(this->param_name); - const auto p_data = Albany::getLocalData(p->overlapped_vector().getConst()); + const auto p_data = Albany::getDeviceData(p->overlapped_vector().getConst()); // Parameter dof numbering info - const auto& p_elem_dof_lids = p->get_dof_mgr()->elem_dof_lids().host(); + const auto& p_elem_dof_lids = p->get_dof_mgr()->elem_dof_lids().dev(); // Note: grab offsets on top/bot ordered in the same way as on side $field_pos // to guarantee corresponding nodes are vertically aligned. - const auto& offsets_top = p->get_dof_mgr()->getGIDFieldOffsetsSide(0,top,field_pos); - const auto& offsets_bot = p->get_dof_mgr()->getGIDFieldOffsetsSide(0,bot,field_pos); - const auto& offsets_p = p->get_dof_mgr()->getGIDFieldOffsetsSide(0,field_pos); - const int num_nodes_2d = offsets_p.size(); + const auto& offsets_top = p->get_dof_mgr()->getGIDFieldOffsetsSideKokkos(0,top,field_pos); + const auto& offsets_bot = p->get_dof_mgr()->getGIDFieldOffsetsSideKokkos(0,bot,field_pos); + const auto& offsets_p = p->get_dof_mgr()->getGIDFieldOffsetsSideKokkos(0,field_pos); + const int num_nodes_2d = p->get_dof_mgr()->getGIDFieldOffsetsSide(0,field_pos).size(); // Idea: loop over cells. Grab p data from a cell at the right layer, // using offsets that correspond to the elem-side where the param is defined. // Inside, loop over 2d nodes, and process top/bot sides separately - for (std::size_t cell=0; cellgetName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { const auto elem_LID = elem_lids(cell); - const auto basal_elem_LID = layers_data->getColumnId(elem_LID); - const auto param_elem_LID = layers_data->getId(basal_elem_LID,fieldLayer); + const auto basal_elem_LID = layerOrd ? elem_LID % numHorizEntities : elem_LID / numLayers; + const auto param_elem_LID = layerOrd ? basal_elem_LID + fieldLayer*numHorizEntities : + basal_elem_LID * numLayers + fieldLayer; for (int node2d=0; node2d=0 ? p_data[p_lid] : 0; - for (auto node : {offsets_bot[node2d], offsets_top[node2d]}) { - this->val(cell,node) = p_val; - } + const LO p_lid = p_elem_dof_lids(param_elem_LID,offsets_p(node2d)); + const auto p_val = p_lid>=0 ? p_data(p_lid) : 0; + this->val(cell,offsets_bot(node2d)) = p_val; + this->val(cell,offsets_top(node2d)) = p_val; } - } + }); } // ************************************************************** @@ -545,7 +552,12 @@ evaluateFields(typename Traits::EvalData workset) const auto bot = layers_data->bot_side_pos; const auto top = layers_data->top_side_pos; const auto ws = workset.wsIndex; - const auto elem_lids = workset.disc->getElementLIDs_host(ws); + const auto elem_lids_ws = workset.disc->getWsElementLIDs(); + const auto elem_lids = Kokkos::subview(elem_lids_ws.dev(),ws,Kokkos::ALL); + + const auto layerOrd = layers_data->layerOrd; + const auto numHorizEntities = layers_data->numHorizEntities; + const auto numLayers = layers_data->numLayers; // Direction vector for the Hessian-vector product const auto vvec = workset.hessianWorkset.direction_p; @@ -578,7 +590,8 @@ evaluateFields(typename Traits::EvalData workset) Teuchos::Exceptions::InvalidParameter, "\nError in GatherScalarExtruded2DNodalParameter: " "direction_p is not set and the direction is acrive.\n"); - const auto vvec_data = is_p_direction_active ? Albany::getLocalData(vvec->col(0).getConst()) : Teuchos::null; + Albany::ThyraVDeviceView vvec_data; + if (is_p_direction_active) vvec_data = Albany::getDeviceData(vvec->col(0).getConst()); // Pick element layer that contains the field level const auto fieldLayer = fieldLevel==layers_data->numLayers @@ -587,28 +600,30 @@ evaluateFields(typename Traits::EvalData workset) // Distributed parameter vector const auto p = workset.distParamLib->get(this->param_name); - const auto p_data = Albany::getLocalData(p->overlapped_vector().getConst()); + const auto p_data = Albany::getDeviceData(p->overlapped_vector().getConst()); // Parameter dof numbering info const auto p_dof_mgr = p->get_dof_mgr(); - const auto& p_elem_dof_lids = p->get_dof_mgr()->elem_dof_lids().host(); + const auto& p_elem_dof_lids = p->get_dof_mgr()->elem_dof_lids().dev(); // Note: grab offsets on top/bot ordered in the same way as on side $field_pos // to guarantee corresponding nodes are vertically aligned. - const auto& offsets_top = p->get_dof_mgr()->getGIDFieldOffsetsSide(0,top,field_pos); - const auto& offsets_bot = p->get_dof_mgr()->getGIDFieldOffsetsSide(0,bot,field_pos); - const auto& offsets_p = p->get_dof_mgr()->getGIDFieldOffsetsSide(0,field_pos); - const int num_nodes_2d = offsets_p.size(); + const auto& offsets_top = p->get_dof_mgr()->getGIDFieldOffsetsSideKokkos(0,top,field_pos); + const auto& offsets_bot = p->get_dof_mgr()->getGIDFieldOffsetsSideKokkos(0,bot,field_pos); + const auto& offsets_p = p->get_dof_mgr()->getGIDFieldOffsetsSideKokkos(0,field_pos); + const int num_nodes_2d = p->get_dof_mgr()->getGIDFieldOffsetsSide(0,field_pos).size(); using ref_t = typename PHAL::Ref::type; - for (std::size_t cell=0; cellgetName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { const auto elem_LID = elem_lids(cell); - const auto basal_elem_LID = layers_data->getColumnId(elem_LID); - const auto param_elem_LID = layers_data->getId(basal_elem_LID,fieldLayer); + const auto basal_elem_LID = layerOrd ? elem_LID % numHorizEntities : elem_LID / numLayers; + const auto param_elem_LID = layerOrd ? basal_elem_LID + fieldLayer*numHorizEntities : + basal_elem_LID * numLayers + fieldLayer; for (int node2d=0; node2d=0 ? p_data[p_lid] : 0; - for (auto node : {offsets_bot[node2d], offsets_top[node2d]}) { + const LO p_lid = p_elem_dof_lids(param_elem_LID,offsets_p(node2d)); + const auto p_val = p_lid>=0 ? p_data(p_lid) : 0; + for (auto node : {offsets_bot(node2d), offsets_top(node2d)}) { ref_t val = this->val(cell,node); val = HessianVecFad(val.size(), p_val); // If we differentiate w.r.t. this parameter, we have to set the first @@ -618,10 +633,10 @@ evaluateFields(typename Traits::EvalData workset) // If we differentiate w.r.t. this parameter direction, we have to set // the second derivative to the related direction value if (is_p_direction_active) - val.val().fastAccessDx(0) = p_lid>=0 ? vvec_data[p_lid] : 0; + val.val().fastAccessDx(0) = p_lid>=0 ? vvec_data(p_lid) : 0; } } - } + }); } } // namespace PHAL