diff --git a/src/Albany_Application.cpp b/src/Albany_Application.cpp index 80f8a84e51..b00332c379 100644 --- a/src/Albany_Application.cpp +++ b/src/Albany_Application.cpp @@ -3144,7 +3144,6 @@ Application::loadWorksetBucketInfo(PHAL::Workset& workset, const int& ws, workset.EBName = wsEBNames[ws]; workset.wsIndex = ws; - workset.local_Vp.resize(workset.numCells); workset.savedMDFields = phxSetup->get_saved_fields(evalName); diff --git a/src/PHAL_Workset.hpp b/src/PHAL_Workset.hpp index 9ea2acc666..595df94d76 100644 --- a/src/PHAL_Workset.hpp +++ b/src/PHAL_Workset.hpp @@ -127,7 +127,7 @@ struct Workset Teuchos::RCP distParamLib; std::string dist_param_deriv_name; bool transpose_dist_param_deriv; - Teuchos::ArrayRCP>> local_Vp; + Kokkos::View local_Vp; Teuchos::ArrayRCP> wsCoords; std::string EBName; diff --git a/src/disc/Albany_DOFManager.cpp b/src/disc/Albany_DOFManager.cpp index 4c980e1b90..886165b17f 100644 --- a/src/disc/Albany_DOFManager.cpp +++ b/src/disc/Albany_DOFManager.cpp @@ -65,6 +65,11 @@ void DOFManager::build () // 4. Possibly restrict the DOFs list restrict (part_name()); + // Allocate/populate nodes view + convertVecsToViews (m_subcell_closures, m_subcell_closures_view, "m_subcell_closures_view"); + if (m_topo_dim>2 && m_top_bot_well_defined) + convertVecsToViews (m_side_closure_orderd_as_side, m_side_closure_orderd_as_side_view, "m_side_closure_orderd_as_side_view"); + // Done m_built = true; } @@ -161,6 +166,16 @@ getGIDFieldOffsetsSubcell (int fieldNum, return m_subcell_closures[fieldNum][subcell_dim][subcell_pos]; } +Kokkos::Subview,int,int,int,decltype(Kokkos::ALL)> +DOFManager:: +getGIDFieldOffsetsSubcellKokkos (int fieldNum, + int subcell_dim, + int subcell_pos) const +{ + return Kokkos::subview(m_subcell_closures_view,fieldNum,subcell_dim,subcell_pos,Kokkos::ALL); +} + + const std::vector& DOFManager:: getGIDFieldOffsetsSide (int fieldNum, int side) const @@ -168,6 +183,13 @@ getGIDFieldOffsetsSide (int fieldNum, int side) const return getGIDFieldOffsetsSubcell(fieldNum,m_topo_dim-1,side); } +Kokkos::Subview,int,int,int,decltype(Kokkos::ALL)> +DOFManager:: +getGIDFieldOffsetsSideKokkos (int fieldNum, int side) const +{ + return getGIDFieldOffsetsSubcellKokkos(fieldNum,m_topo_dim-1,side); +} + const std::vector& DOFManager:: getGIDFieldOffsetsSide (int fieldNum, int side, const int orderAsInSide) const @@ -191,6 +213,44 @@ getGIDFieldOffsetsSide (int fieldNum, int side, const int orderAsInSide) const return m_side_closure_orderd_as_side[fieldNum][side][orderAsInSide]; } +Kokkos::Subview,int,int,int,decltype(Kokkos::ALL)> +DOFManager:: +getGIDFieldOffsetsSideKokkos(int fieldNum, int side, const int orderAsInSide) const +{ + return Kokkos::subview(m_side_closure_orderd_as_side_view,fieldNum,side,orderAsInSide,Kokkos::ALL); +} + +void +DOFManager:: +convertVecsToViews (vec4int& vec, Kokkos::View& view, std::string view_name) +{ + size_t max_size_i = vec.size(); + size_t max_size_j = 0; size_t max_size_k = 0; size_t max_size_l = 0; + for (size_t i=0; i < vec.size(); ++i) { + max_size_j = std::max(max_size_j, vec[i].size()); + for (size_t j=0; j < vec[i].size(); ++j) { + max_size_k = std::max(max_size_k, vec[i][j].size()); + for (size_t k=0; k < vec[i][j].size(); ++k) { + max_size_l = std::max(max_size_l, vec[i][j][k].size()); + } + } + } + + view = Kokkos::View(view_name, + max_size_i, + max_size_j, + max_size_k, + max_size_l); + + const auto view_host = Kokkos::create_mirror_view(view); + for (size_t i=0; i < vec.size(); ++i) + for (size_t j=0; j < vec[i].size(); ++j) + for (size_t k=0; k < vec[i][j].size(); ++k) + for (size_t l=0; l < vec[i][j][k].size(); ++l) + view_host(i,j,k,l) = vec[i][j][k][l]; + Kokkos::deep_copy(view,view_host); +} + void DOFManager:: restrict (const std::string& sub_part_name) { diff --git a/src/disc/Albany_DOFManager.hpp b/src/disc/Albany_DOFManager.hpp index 5be2832ec9..c121086b9b 100644 --- a/src/disc/Albany_DOFManager.hpp +++ b/src/disc/Albany_DOFManager.hpp @@ -66,11 +66,15 @@ class DOFManager : public panzer::DOFManager { using panzer::DOFManager::getGIDFieldOffsets_closure; const std::vector& getGIDFieldOffsetsSubcell (int fieldNum, int subcell_dim, int subcell_pos) const; + Kokkos::Subview,int,int,int,decltype(Kokkos::ALL)> + getGIDFieldOffsetsSubcellKokkos (int fieldNum, int subcell_dim, int subcell_pos) const; // Special case of the above, for subcell being the top or bottom side // NOTE: only for quad/hexa/wedge const std::vector& getGIDFieldOffsetsSide (int fieldNum, int side) const; + Kokkos::Subview,int,int,int,decltype(Kokkos::ALL)> + getGIDFieldOffsetsSideKokkos (int fieldNum, int side) const; // If side!=orderedAsInSide, this version returns side offsets ordered // in such a way that off[i] on side=$side is directly above/below @@ -78,6 +82,8 @@ class DOFManager : public panzer::DOFManager { // This makes sense ONLY IF $side and $orderAsInSide are both in {top,bot} const std::vector& getGIDFieldOffsetsSide (int fieldNum, int side, int orderAsInSide) const; + Kokkos::Subview,int,int,int,decltype(Kokkos::ALL)> + getGIDFieldOffsetsSideKokkos (int fieldNum, int side, int orderAsInSide) const; const std::string& part_name () const { return m_part_name; @@ -123,10 +129,15 @@ class DOFManager : public panzer::DOFManager { DualView m_elem_dof_lids; using vec4int = std::vector>>>; + + // Convert DOF vec4ints to Kokkos Views for device access + void convertVecsToViews (vec4int& vec, Kokkos::View& view, std::string view_name); + // m_subcell_closures[ifield][dim][ord] is the vector of the offsets of // field $ifield on the $ord-th subcell of dimension $dim. More precisely, // it's the closure of all offsets on all entities belonging to that subcell vec4int m_subcell_closures; + Kokkos::View m_subcell_closures_view; // Shortcut for location of top/bot sides in the cell list of sides. bool m_top_bot_well_defined = false; @@ -145,6 +156,7 @@ class DOFManager : public panzer::DOFManager { // dof at offset m_subcell_closures[F][side_dim][top] // NOTE: this vec4int m_side_closure_orderd_as_side; + Kokkos::View m_side_closure_orderd_as_side_view; Teuchos::RCP m_conn_mgr; diff --git a/src/disc/Albany_LayeredMeshNumbering.hpp b/src/disc/Albany_LayeredMeshNumbering.hpp index 8da4299316..f66180585a 100644 --- a/src/disc/Albany_LayeredMeshNumbering.hpp +++ b/src/disc/Albany_LayeredMeshNumbering.hpp @@ -62,7 +62,6 @@ struct LayeredMeshNumbering { return layerOrd ? 1 : numLayers; } - T getColumnId (const T id) const { return layerOrd ? id % numHorizEntities : id / numLayers; } diff --git a/src/evaluators/bc/PHAL_Neumann_Def.hpp b/src/evaluators/bc/PHAL_Neumann_Def.hpp index 05f95f4154..f940228c13 100644 --- a/src/evaluators/bc/PHAL_Neumann_Def.hpp +++ b/src/evaluators/bc/PHAL_Neumann_Def.hpp @@ -971,6 +971,8 @@ evaluateFields(typename Traits::EvalData workset) this->evaluateNeumannContribution(workset); + const auto& local_Vp = workset.local_Vp; + constexpr auto ALL = Kokkos::ALL(); if (trans) { const int neq = workset.numEqs; @@ -979,7 +981,6 @@ evaluateFields(typename Traits::EvalData workset) const auto elem_lids = workset.disc->getElementLIDs_host(workset.wsIndex); for (size_t cell=0; cellnumNodes; ++node) { for (int dim = 0; dim < this->numDOFsSet; ++dim){ int eq = this->offset[dim]; - val += this->neumann(cell, node, dim).dx(i)*local_Vp[node*neq+eq][col]; + val += this->neumann(cell, node, dim).dx(i)*local_Vp(cell,node*neq+eq,col); } } fpV_nonconst2dView[col][row] += val; @@ -1007,7 +1008,6 @@ evaluateFields(typename Traits::EvalData workset) const auto& offsets = this->fields_offsets; for (std::size_t cell=0; cell < workset.numCells; ++cell ) { const auto elem_LID = elem_lids(cell); - const auto& local_Vp = workset.local_Vp[cell]; const int num_deriv = local_Vp.size(); const auto dof_lids = Kokkos::subview(elem_dof_lids,elem_LID,ALL); @@ -1018,7 +1018,7 @@ evaluateFields(typename Traits::EvalData workset) for (int col=0; colneumann(cell, node, dim).dx(i)*local_Vp[i][col]; + val += this->neumann(cell, node, dim).dx(i)*local_Vp(cell,i,col); } fpV_nonconst2dView[col][row] += val; } diff --git a/src/evaluators/gather/PHAL_GatherScalarNodalParameter.hpp b/src/evaluators/gather/PHAL_GatherScalarNodalParameter.hpp index 5d46182248..08ec85e659 100644 --- a/src/evaluators/gather/PHAL_GatherScalarNodalParameter.hpp +++ b/src/evaluators/gather/PHAL_GatherScalarNodalParameter.hpp @@ -87,6 +87,9 @@ class GatherScalarExtruded2DNodalParameter : private: typedef typename EvalT::ParamScalarT ParamScalarT; const int fieldLevel; +protected: + using ExecutionSpace = typename PHX::Device::execution_space; + using RangePolicy = Kokkos::RangePolicy; }; @@ -111,6 +114,9 @@ class GatherScalarNodalParameter : void evaluateFields(typename Traits::EvalData d); private: typedef typename PHAL::AlbanyTraits::DistParamDeriv::ParamScalarT ParamScalarT; +protected: + using ExecutionSpace = typename PHX::Device::execution_space; + using RangePolicy = Kokkos::RangePolicy; }; @@ -419,6 +425,9 @@ class GatherScalarExtruded2DNodalParameter; }; } // namespace PHAL diff --git a/src/evaluators/gather/PHAL_GatherScalarNodalParameter_Def.hpp b/src/evaluators/gather/PHAL_GatherScalarNodalParameter_Def.hpp index f0c7287496..b1750f4440 100644 --- a/src/evaluators/gather/PHAL_GatherScalarNodalParameter_Def.hpp +++ b/src/evaluators/gather/PHAL_GatherScalarNodalParameter_Def.hpp @@ -113,7 +113,13 @@ evaluateFields(typename Traits::EvalData workset) const auto bot = layers_data->bot_side_pos; const auto top = layers_data->top_side_pos; const auto ws = workset.wsIndex; - const auto elem_lids = workset.disc->getElementLIDs_host(ws); + const auto elem_lids_ws = workset.disc->getWsElementLIDs(); + const auto elem_lids = Kokkos::subview(elem_lids_ws.dev(),ws,Kokkos::ALL); + + // Grab some info from the layers data for device access + const auto layerOrd = layers_data->layerOrd; + const auto numHorizEntities = layers_data->numHorizEntities; + const auto numLayers = layers_data->numLayers; // Pick element layer that contains the field level const auto fieldLayer = fieldLevel==layers_data->numLayers @@ -122,34 +128,35 @@ evaluateFields(typename Traits::EvalData workset) // Distributed parameter vector const auto& p = workset.distParamLib->get(this->param_name); - const auto p_data = Albany::getLocalData(p->overlapped_vector().getConst()); + const auto p_data = Albany::getDeviceData(p->overlapped_vector().getConst()); // Parameter dof numbering info - const auto& p_elem_dof_lids = p->get_dof_mgr()->elem_dof_lids().host(); + const auto& p_elem_dof_lids = p->get_dof_mgr()->elem_dof_lids().dev(); // Note: grab offsets on top/bot ordered in the same way as on side $field_pos // to guarantee corresponding nodes are vertically aligned. - const auto& offsets_top = p->get_dof_mgr()->getGIDFieldOffsetsSide(0,top,field_pos); - const auto& offsets_bot = p->get_dof_mgr()->getGIDFieldOffsetsSide(0,bot,field_pos); - const auto& offsets_p = p->get_dof_mgr()->getGIDFieldOffsetsSide(0,field_pos); - const int num_nodes_2d = offsets_p.size(); + const auto& offsets_top = p->get_dof_mgr()->getGIDFieldOffsetsSideKokkos(0,top,field_pos); + const auto& offsets_bot = p->get_dof_mgr()->getGIDFieldOffsetsSideKokkos(0,bot,field_pos); + const auto& offsets_p = p->get_dof_mgr()->getGIDFieldOffsetsSideKokkos(0,field_pos); + const int num_nodes_2d = p->get_dof_mgr()->getGIDFieldOffsetsSide(0,field_pos).size(); // Idea: loop over cells. Grab p data from a cell at the right layer, // using offsets that correspond to the elem-side where the param is defined. // Inside, loop over 2d nodes, and process top/bot sides separately - for (std::size_t cell=0; cellgetName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { const auto elem_LID = elem_lids(cell); - const auto basal_elem_LID = layers_data->getColumnId(elem_LID); - const auto param_elem_LID = layers_data->getId(basal_elem_LID,fieldLayer); + const auto basal_elem_LID = layerOrd ? elem_LID % numHorizEntities : elem_LID / numLayers; + const auto param_elem_LID = layerOrd ? basal_elem_LID + fieldLayer*numHorizEntities : + basal_elem_LID * numLayers + fieldLayer; for (int node2d=0; node2d=0 ? p_data[p_lid] : 0; - for (auto node : {offsets_bot[node2d], offsets_top[node2d]}) { - this->val(cell,node) = p_val; - } + const LO p_lid = p_elem_dof_lids(param_elem_LID,offsets_p(node2d)); + const auto p_val = p_lid>=0 ? p_data(p_lid) : 0; + this->val(cell,offsets_bot(node2d)) = p_val; + this->val(cell,offsets_top(node2d)) = p_val; } - } + }); } // ************************************************************** @@ -191,17 +198,19 @@ evaluateFields(typename Traits::EvalData workset) // Distributed parameter vector const auto p = workset.distParamLib->get(this->param_name); - const auto p_data = Albany::getLocalData(p->overlapped_vector().getConst()); + const auto p_data = Albany::getDeviceData(p->overlapped_vector().getConst()); const auto Vp = workset.Vp; - const auto Vp_data = !Vp.is_null() ? Albany::getLocalData(Vp) : Teuchos::null; + Albany::ThyraMVDeviceView Vp_data; + if (!Vp.is_null()) Vp_data = Albany::getDeviceData(Vp); // Parameter/solution/nodes dof numbering info const auto dof_mgr = workset.disc->getDOFManager(); - const auto p_elem_dof_lids = p->get_dof_mgr()->elem_dof_lids().host(); + const auto p_elem_dof_lids = p->get_dof_mgr()->elem_dof_lids().dev(); const auto ws = workset.wsIndex; - const auto elem_lids = workset.disc->getElementLIDs_host(ws); + const auto ws_elem_lids = workset.disc->getWsElementLIDs().dev(); + const auto elem_lids = Kokkos::subview(ws_elem_lids,ws,Kokkos::ALL()); // Are we differentiating w.r.t. this parameter? const bool is_active = (workset.dist_param_deriv_name == this->param_name); @@ -211,58 +220,76 @@ evaluateFields(typename Traits::EvalData workset) const int neq = dof_mgr->getNumFields(); const int num_deriv = this->numNodes; bool trans = workset.transpose_dist_param_deriv; - const auto elem_dof_lids = dof_mgr->elem_dof_lids().host(); - for (std::size_t cell=0; cellelem_dof_lids().dev(); + + Kokkos::parallel_for(this->getName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { const auto elem_LID = elem_lids(cell); const auto p_dof_lids = Kokkos::subview(p_elem_dof_lids,elem_LID,ALL); for (int node=0; node=0 ? p_data[lid] : 0; + const auto p_val = lid>=0 ? p_data(lid) : 0; ParamScalarT v(num_deriv, node, p_val); this->val(cell,node) = v; } + }); + + if (Vp != Teuchos::null) { + const int num_cols = Vp->domain()->dim(); + const int num_dofs = elem_dof_lids.extent(1); + if (trans) { + workset.local_Vp = Kokkos::View("local_Vp",workset.numCells,num_dofs,num_cols); + } else { + workset.local_Vp = Kokkos::View("local_Vp",workset.numCells,num_deriv,num_cols); + } - if (Vp != Teuchos::null) { - const int num_cols = Vp->domain()->dim(); + const auto& local_Vp = workset.local_Vp; - auto& local_Vp = workset.local_Vp[cell]; + if (trans) { + for (int eq=0; eqgetGIDFieldOffsetsKokkos(eq); + const int num_offsets = offsets.size(); - if (trans) { - auto dof_lids = Kokkos::subview(elem_dof_lids,elem_LID,ALL); - // const auto& offsets = this->m_sol_fields_offsets; - local_Vp.resize(dof_lids.size()); - for (int eq=0; eqgetGIDFieldOffsets(eq); - for (const auto o : offsets) { - local_Vp[o].resize(num_cols); + Kokkos::parallel_for(this->getName()+"_transvp",RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { + const auto elem_LID = elem_lids(cell); + const auto dof_lids = Kokkos::subview(elem_dof_lids,elem_LID,ALL); + for (int i=0; i < num_offsets; ++i) { + const auto o = offsets(i); const LO lid = dof_lids(o); for (int col=0; colgetName()+"_notransvp",RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { + const auto elem_LID = elem_lids(cell); + const auto p_dof_lids = Kokkos::subview(p_elem_dof_lids,elem_LID,ALL); for (int node=0; node=0 ? Vp_data[col][lid] : 0; + local_Vp(cell,node,col) = lid>=0 ? Vp_data(lid,col) : 0; } - } + }); } } } else { // If not active, just set the parameter value in the phalanx field - for (std::size_t cell=0; cell < workset.numCells; ++cell ) { + const int num_nodes = this->numNodes; + Kokkos::parallel_for(this->getName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { const auto elem_LID = elem_lids(cell); const auto p_dof_lids = Kokkos::subview(p_elem_dof_lids,elem_LID,ALL); - for (int node=0; nodenumNodes; ++node) { + for (int node=0; nodeval(cell,node) = lid>=0 ? p_data[lid] : 0; + this->val(cell,node) = lid>=0 ? p_data(lid) : 0; } - } + }); } } @@ -330,6 +357,18 @@ evaluateFields(typename Traits::EvalData workset) const int neq = sol_dof_mgr->getNumFields(); const int num_deriv = this->numNodes; const bool trans = workset.transpose_dist_param_deriv; + + if (Vp != Teuchos::null) { + const int num_cols = workset.Vp->domain()->dim(); + if (trans) { + const int num_dofs = elem_dof_lids.extent(1); + workset.local_Vp = Kokkos::View("local_Vp",workset.numCells,num_dofs,num_cols); + } else { + workset.local_Vp = Kokkos::View("local_Vp",workset.numCells,num_deriv,num_cols); + } + } + const auto& local_Vp = workset.local_Vp; + for (std::size_t cell=0; cellgetColumnId(elem_LID); @@ -349,27 +388,22 @@ evaluateFields(typename Traits::EvalData workset) if (Vp != Teuchos::null) { const int num_cols = workset.Vp->domain()->dim(); - auto& local_Vp = workset.local_Vp[cell]; if (trans) { auto dof_lids = Kokkos::subview(elem_dof_lids,elem_LID,ALL); - local_Vp.resize(dof_lids.size()); for (int eq=0; eqgetGIDFieldOffsets(eq); for (const auto o : sol_offsets) { - local_Vp[o].resize(num_cols); const LO lid = dof_lids(o); for (int col=0; col=0 ? Vp_data[col][p_lid] : 0; + local_Vp(cell,node,col) = p_lid>=0 ? Vp_data[col][p_lid] : 0; } } } @@ -518,7 +552,12 @@ evaluateFields(typename Traits::EvalData workset) const auto bot = layers_data->bot_side_pos; const auto top = layers_data->top_side_pos; const auto ws = workset.wsIndex; - const auto elem_lids = workset.disc->getElementLIDs_host(ws); + const auto elem_lids_ws = workset.disc->getWsElementLIDs(); + const auto elem_lids = Kokkos::subview(elem_lids_ws.dev(),ws,Kokkos::ALL); + + const auto layerOrd = layers_data->layerOrd; + const auto numHorizEntities = layers_data->numHorizEntities; + const auto numLayers = layers_data->numLayers; // Direction vector for the Hessian-vector product const auto vvec = workset.hessianWorkset.direction_p; @@ -551,7 +590,8 @@ evaluateFields(typename Traits::EvalData workset) Teuchos::Exceptions::InvalidParameter, "\nError in GatherScalarExtruded2DNodalParameter: " "direction_p is not set and the direction is acrive.\n"); - const auto vvec_data = is_p_direction_active ? Albany::getLocalData(vvec->col(0).getConst()) : Teuchos::null; + Albany::ThyraVDeviceView vvec_data; + if (is_p_direction_active) vvec_data = Albany::getDeviceData(vvec->col(0).getConst()); // Pick element layer that contains the field level const auto fieldLayer = fieldLevel==layers_data->numLayers @@ -560,28 +600,30 @@ evaluateFields(typename Traits::EvalData workset) // Distributed parameter vector const auto p = workset.distParamLib->get(this->param_name); - const auto p_data = Albany::getLocalData(p->overlapped_vector().getConst()); + const auto p_data = Albany::getDeviceData(p->overlapped_vector().getConst()); // Parameter dof numbering info const auto p_dof_mgr = p->get_dof_mgr(); - const auto& p_elem_dof_lids = p->get_dof_mgr()->elem_dof_lids().host(); + const auto& p_elem_dof_lids = p->get_dof_mgr()->elem_dof_lids().dev(); // Note: grab offsets on top/bot ordered in the same way as on side $field_pos // to guarantee corresponding nodes are vertically aligned. - const auto& offsets_top = p->get_dof_mgr()->getGIDFieldOffsetsSide(0,top,field_pos); - const auto& offsets_bot = p->get_dof_mgr()->getGIDFieldOffsetsSide(0,bot,field_pos); - const auto& offsets_p = p->get_dof_mgr()->getGIDFieldOffsetsSide(0,field_pos); - const int num_nodes_2d = offsets_p.size(); + const auto& offsets_top = p->get_dof_mgr()->getGIDFieldOffsetsSideKokkos(0,top,field_pos); + const auto& offsets_bot = p->get_dof_mgr()->getGIDFieldOffsetsSideKokkos(0,bot,field_pos); + const auto& offsets_p = p->get_dof_mgr()->getGIDFieldOffsetsSideKokkos(0,field_pos); + const int num_nodes_2d = p->get_dof_mgr()->getGIDFieldOffsetsSide(0,field_pos).size(); using ref_t = typename PHAL::Ref::type; - for (std::size_t cell=0; cellgetName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { const auto elem_LID = elem_lids(cell); - const auto basal_elem_LID = layers_data->getColumnId(elem_LID); - const auto param_elem_LID = layers_data->getId(basal_elem_LID,fieldLayer); + const auto basal_elem_LID = layerOrd ? elem_LID % numHorizEntities : elem_LID / numLayers; + const auto param_elem_LID = layerOrd ? basal_elem_LID + fieldLayer*numHorizEntities : + basal_elem_LID * numLayers + fieldLayer; for (int node2d=0; node2d=0 ? p_data[p_lid] : 0; - for (auto node : {offsets_bot[node2d], offsets_top[node2d]}) { + const LO p_lid = p_elem_dof_lids(param_elem_LID,offsets_p(node2d)); + const auto p_val = p_lid>=0 ? p_data(p_lid) : 0; + for (auto node : {offsets_bot(node2d), offsets_top(node2d)}) { ref_t val = this->val(cell,node); val = HessianVecFad(val.size(), p_val); // If we differentiate w.r.t. this parameter, we have to set the first @@ -591,10 +633,10 @@ evaluateFields(typename Traits::EvalData workset) // If we differentiate w.r.t. this parameter direction, we have to set // the second derivative to the related direction value if (is_p_direction_active) - val.val().fastAccessDx(0) = p_lid>=0 ? vvec_data[p_lid] : 0; + val.val().fastAccessDx(0) = p_lid>=0 ? vvec_data(p_lid) : 0; } } - } + }); } } // namespace PHAL diff --git a/src/evaluators/gather/PHAL_GatherSolution.hpp b/src/evaluators/gather/PHAL_GatherSolution.hpp index 4064ce4e97..3bd1ce519b 100644 --- a/src/evaluators/gather/PHAL_GatherSolution.hpp +++ b/src/evaluators/gather/PHAL_GatherSolution.hpp @@ -297,6 +297,10 @@ class GatherSolution using Base::get_ref_dotdot; using Base::numFields; using Base::m_fields_offsets; + +protected: + using ExecutionSpace = typename PHX::Device::execution_space; + using RangePolicy = Kokkos::RangePolicy; }; // ************************************************************** diff --git a/src/evaluators/gather/PHAL_GatherSolution_Def.hpp b/src/evaluators/gather/PHAL_GatherSolution_Def.hpp index 2ef3130b4c..0c55b9cd97 100644 --- a/src/evaluators/gather/PHAL_GatherSolution_Def.hpp +++ b/src/evaluators/gather/PHAL_GatherSolution_Def.hpp @@ -574,37 +574,38 @@ evaluateFields(typename Traits::EvalData workset) this->gather_fields_offsets(dof_mgr); const auto ws = workset.wsIndex; - const auto elem_lids = workset.disc->getWsElementLIDs().host(); - const auto elem_dof_lids = dof_mgr->elem_dof_lids().host(); + const auto elem_lids = workset.disc->getWsElementLIDs().dev(); + const auto elem_dof_lids = dof_mgr->elem_dof_lids().dev(); constexpr auto ALL = Kokkos::ALL; //get const (read-only) view of x and xdot - using const_data_t = Teuchos::ArrayRCP; + using const_data_t = Albany::ThyraVDeviceView; const_data_t x_data, xdot_data, xdotdot_data; - x_data = Albany::getLocalData(x); + x_data = Albany::getDeviceData(x); if (xdot!=Teuchos::null) - xdot_data = Albany::getLocalData(xdot); + xdot_data = Albany::getDeviceData(xdot); if (xdotdot!=Teuchos::null) - xdotdot_data = Albany::getLocalData(xdotdot); + xdotdot_data = Albany::getDeviceData(xdotdot); - const auto& fields_offsets = m_fields_offsets.host(); + const auto& fields_offsets = m_fields_offsets.dev(); const int first_dof = this->offset; - for (std::size_t cell=0; cell < workset.numCells; ++cell ) { + Kokkos::parallel_for(this->getName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { const auto elem_LID = elem_lids(ws,cell); const auto dof_lids = Kokkos::subview(elem_dof_lids,elem_LID,ALL); for (int node = 0; node < this->numNodes; ++node) { for (int eq = 0; eq < numFields; ++eq) { const auto lid = dof_lids(fields_offsets(node,eq+first_dof)); - get_ref(cell,node,eq) = x_data[lid]; + this->device_sol.get_ref(cell,node,eq) = x_data(lid); if (gather_xdot) - get_ref_dot(cell,node,eq) = xdot_data[lid]; + this->device_sol.get_ref_dot(cell,node,eq) = xdot_data(lid); if (gather_xdotdot) - get_ref_dotdot(cell,node,eq) = xdotdot_data[lid]; + this->device_sol.get_ref_dotdot(cell,node,eq) = xdotdot_data(lid); } } - } + }); } // ********************************************************************** diff --git a/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide.hpp b/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide.hpp index 5eb4222c8e..5c22cba59c 100644 --- a/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide.hpp +++ b/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide.hpp @@ -9,6 +9,8 @@ #include "PHAL_SeparableScatterScalarResponse.hpp" +#include "Albany_KokkosUtils.hpp" + namespace PHAL { /** * \brief Response Description @@ -44,6 +46,8 @@ class ResponseSquaredL2DifferenceSideBase : public PHAL::SeparableScatterScalarR int sideDim; int numQPs; int fieldDim; + size_t dims_2; + size_t dims_3; std::vector dims; bool target_value, rmsScaling, extrudedParams, isFieldGradient; @@ -57,10 +61,10 @@ class ResponseSquaredL2DifferenceSideBase : public PHAL::SeparableScatterScalarR PHX::MDField metric; PHX::MDField w_measure; - std::vector diff_1; - std::vector> diff_2; - bool resp_depends_on_sol_column; + + using ExecutionSpace = typename PHX::Device::execution_space; + using RangePolicy = Kokkos::RangePolicy; }; //-- SourceScalarT = ScalarT diff --git a/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide_Def.hpp b/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide_Def.hpp index 0f47414b1b..19e6a0cf22 100644 --- a/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide_Def.hpp +++ b/src/evaluators/response/PHAL_ResponseSquaredL2DifferenceSide_Def.hpp @@ -55,6 +55,10 @@ ResponseSquaredL2DifferenceSideBase(Teuchos::ParameterList& p, const Teuchos::RC layout->dimensions(dims); + // std::vector isn't accessible on device + dims_2 = (fieldDim >= 1) ? dims[2] : 0; + dims_3 = (fieldDim >= 2) ? dims[3] : 0; + std::string sideSetNameForMetric = (plist->isParameter("Is Side Set Planar") && plist->get("Is Side Set Planar")) ? sideSetName + "_planar" : @@ -167,15 +171,7 @@ evaluateFields(typename Traits::EvalData workset) "Side sets defined in input file but not properly specified on the mesh" << std::endl); // Zero out local response - PHAL::set(this->local_response_eval, 0.0); - - if (fieldDim == 1) { - diff_1.resize(dims[2]); - } else if (fieldDim == 2) { - diff_2.resize(dims[2]); - for(size_t i=0; ilocal_response_eval.get_view(), 0.0); if (workset.sideSets->find(sideSetName) != workset.sideSets->end()) { @@ -184,10 +180,10 @@ evaluateFields(typename Traits::EvalData workset) switch (fieldDim) { case 0: - for (int sideSet_idx = 0; sideSet_idx < sideSet.size; ++sideSet_idx) - { + Kokkos::parallel_for(this->getName(),RangePolicy(0,sideSet.size), + KOKKOS_CLASS_LAMBDA(const int& sideSet_idx) { // Get the local data of cell - const int cell = sideSet.ws_elem_idx.h_view(sideSet_idx); + const int cell = sideSet.ws_elem_idx.d_view(sideSet_idx); ScalarT sum = 0; for (int qp=0; qplocal_response_eval(cell, 0) = sum*scaling; - this->global_response_eval(0) += sum*scaling; - } + this->local_response_eval(cell,0) = sum*scaling; + KU::atomic_add(&(this->global_response_eval(0)), sum*scaling); + }); break; case 1: - for (int sideSet_idx = 0; sideSet_idx < sideSet.size; ++sideSet_idx) - { + Kokkos::parallel_for(this->getName(),RangePolicy(0,sideSet.size), + KOKKOS_CLASS_LAMBDA(const int& sideSet_idx) { // Get the local data of cell - const int cell = sideSet.ws_elem_idx.h_view(sideSet_idx); + const int cell = sideSet.ws_elem_idx.d_view(sideSet_idx); + + ScalarT diff_1[8] = {0}; ScalarT sum = 0; for (int qp=0; qplocal_response_eval(cell, 0) = sum*scaling; - this->global_response_eval(0) += sum*scaling; - } + this->local_response_eval(cell,0) = sum*scaling; + KU::atomic_add(&(this->global_response_eval(0)), sum*scaling); + }); break; case 2: - for (int sideSet_idx = 0; sideSet_idx < sideSet.size; ++sideSet_idx) - { + Kokkos::parallel_for(this->getName(),RangePolicy(0,sideSet.size), + KOKKOS_CLASS_LAMBDA(const int& sideSet_idx) { // Get the local data of cell - const int cell = sideSet.ws_elem_idx.h_view(sideSet_idx); + const int cell = sideSet.ws_elem_idx.d_view(sideSet_idx); + + ScalarT diff_2[8][8] = {0}; ScalarT sum = 0; for (int qp=0; qplocal_response_eval(cell, 0) = sum*scaling; - this->global_response_eval(0) += sum*scaling; - } + this->local_response_eval(cell,0) = sum*scaling; + KU::atomic_add(&(this->global_response_eval(0)), sum*scaling); + }); break; } diff --git a/src/evaluators/scatter/PHAL_ScatterResidual.hpp b/src/evaluators/scatter/PHAL_ScatterResidual.hpp index bd10be77ba..2c8f3e78fd 100644 --- a/src/evaluators/scatter/PHAL_ScatterResidual.hpp +++ b/src/evaluators/scatter/PHAL_ScatterResidual.hpp @@ -267,7 +267,9 @@ class ScatterResidualWithExtrudedParams protected: using Base = ScatterResidual; using ScalarT = typename Base::ScalarT; - + using ExecutionSpace = typename Base::ExecutionSpace; + using RangePolicy = typename Base::RangePolicy; + using Base::get_resid; using Base::m_fields_offsets; using Base::numNodes; @@ -328,6 +330,8 @@ class ScatterResidual void evaluateFields(typename Traits::EvalData d); protected: using Base = ScatterResidualBase; + using ExecutionSpace = typename Base::ExecutionSpace; + using RangePolicy = typename Base::RangePolicy; using ScalarT = typename Base::ScalarT; using Base::get_resid; @@ -377,6 +381,9 @@ class ScatterResidualWithExtrudedParams using Base = ScatterResidual; using ScalarT = typename Base::ScalarT; + using ExecutionSpace = typename PHX::Device::execution_space; + using RangePolicy = Kokkos::RangePolicy; + using Base::get_resid; using Base::m_fields_offsets; using Base::numNodes; diff --git a/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp b/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp index fd8f82f99c..c1bcebd2dd 100644 --- a/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp +++ b/src/evaluators/scatter/PHAL_ScatterResidual_Def.hpp @@ -394,7 +394,7 @@ evaluateFields(typename Traits::EvalData workset) { // In case the parameter has not been gathered, e.g. parameter // is used only in Dirichlet conditions. - if(workset.local_Vp[0].size() == 0) { return; } + if(workset.local_Vp.size() == 0) { return; } this->gather_fields_offsets (workset.disc->getDOFManager()); @@ -412,6 +412,8 @@ evaluateFields(typename Traits::EvalData workset) const auto& elem_dof_lids = dof_mgr->elem_dof_lids().host(); const auto eq_offset = this->offset; + const auto& local_Vp = workset.local_Vp; + if (trans) { const auto& pname = workset.dist_param_deriv_name; const auto& p_dof_mgr = workset.disc->getDOFManager(pname); @@ -420,7 +422,6 @@ evaluateFields(typename Traits::EvalData workset) const int num_deriv = numNodes;//local_Vp.size()/numFields; for (size_t cell=0; cellfind(workset.dist_param_deriv_name); if(level_it == extruded_params_levels->end()) { @@ -495,15 +495,21 @@ evaluateFields(typename Traits::EvalData workset) const int ws = workset.wsIndex; const auto fpV = workset.fpV; - const auto fpV_data = Albany::getNonconstLocalData(fpV); + const auto fpV_data = Albany::getNonconstDeviceData(fpV); const bool trans = workset.transpose_dist_param_deriv; const int num_cols = workset.Vp->domain()->dim(); const auto dof_mgr = workset.disc->getDOFManager(); - const auto elem_lids = workset.disc->getElementLIDs_host(ws); + const auto elem_lids_ws = workset.disc->getWsElementLIDs(); + const auto elem_lids = Kokkos::subview(elem_lids_ws.dev(),ws,Kokkos::ALL); + + const auto resid_offsets = m_fields_offsets.dev(); + + const auto& local_Vp = workset.local_Vp; - const auto resid_offsets = m_fields_offsets.host(); + const int offset = this->offset; + const auto& device_resid = this->device_resid; if (trans) { const auto& cell_layers_data = workset.disc->getMeshStruct()->local_cell_layers_data; @@ -516,65 +522,90 @@ evaluateFields(typename Traits::EvalData workset) const auto node_dof_mgr = workset.disc->getNodeDOFManager(); const auto p = workset.distParamLib->get(workset.dist_param_deriv_name); const auto p_dof_mgr = p->get_dof_mgr(); - const auto p_elem_dof_lids = p->get_dof_mgr()->elem_dof_lids().host(); - const auto p_offsets = p_dof_mgr->getGIDFieldOffsetsSide(0,field_pos); - const auto top_nodes = node_dof_mgr->getGIDFieldOffsetsSide(0,top,field_pos); - const auto bot_nodes = node_dof_mgr->getGIDFieldOffsetsSide(0,bot,field_pos); + const auto p_elem_dof_lids = p->get_dof_mgr()->elem_dof_lids().dev(); + const auto p_offsets = p_dof_mgr->getGIDFieldOffsetsSideKokkos(0,field_pos); + const auto top_nodes = node_dof_mgr->getGIDFieldOffsetsSideKokkos(0,top,field_pos); + const auto bot_nodes = node_dof_mgr->getGIDFieldOffsetsSideKokkos(0,bot,field_pos); + + const auto elem_lids_host = Kokkos::subview(elem_lids_ws.host(),ws,Kokkos::ALL); + + const auto layerOrd = cell_layers_data->layerOrd; + const auto numHorizEntities = cell_layers_data->numHorizEntities; + const auto numLayers = cell_layers_data->numLayers; + + // Note: The DOFManager stores offsets in nested vectors of non-uniform length. In order to + // make the offsets available on device, they were converted to a single kokkos view large enough + // to hold all of the vectors. A side effect is that array bounds can't be obtained from the kokkos view + // extents and have to be obtained from the non-kokkos offsets vector or from another source. + const int num_nodes_side = p_dof_mgr->getGIDFieldOffsetsSide(0,field_pos).size(); - const int num_nodes_side = p_offsets.size(); // Pick a cell layer that contains the field level. Can be same as fieldLevel, // except for the last level. - for (size_t cell=0; cellgetName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { const auto elem_LID = elem_lids(cell); - const auto& local_Vp = workset.local_Vp[cell]; - const LO basal_elem_LID = cell_layers_data->getColumnId(elem_LID); - const LO field_elem_LID = cell_layers_data->getId(basal_elem_LID,fieldLayer); - const auto p_elem_gids = p->get_dof_mgr()->getElementGIDs(field_elem_LID); + const LO basal_elem_LID = layerOrd ? elem_LID % numHorizEntities : elem_LID / numLayers; + const LO field_elem_LID = layerOrd ? basal_elem_LID + fieldLayer*numHorizEntities : + basal_elem_LID * numLayers + fieldLayer; - auto do_derivatives = [&](const std::vector& derivs) { - for (int i=0; ioffset)][col]; - } + const int deriv = bot_nodes(i); + for (int col=0; coldevice_resid.get(cell,node,eq); + val += res.dx(deriv)*local_Vp(cell,resid_offsets(node,eq+this->offset),col); } - fpV_data[col][row] += val; } + KU::atomic_add(&(fpV_data(row,col)), val); } - }; - do_derivatives(bot_nodes); - do_derivatives(top_nodes); - } + } + // Top nodes + for (int i=0; idevice_resid.get(cell,node,eq); + val += res.dx(deriv)*local_Vp(cell,resid_offsets(node,eq+this->offset),col); + } + } + KU::atomic_add(&(fpV_data(row,col)), val); + } + } + }); } else { constexpr auto ALL = Kokkos::ALL(); - const auto elem_dof_lids = dof_mgr->elem_dof_lids().host(); - for (size_t cell=0; cellelem_dof_lids().dev(); + const int num_deriv = local_Vp.extent(1); + Kokkos::parallel_for(this->getName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { + const auto elem_LID = elem_lids(cell); + + const auto dof_lids = Kokkos::subview(elem_dof_lids,elem_LID,ALL); for (int node=0; nodeoffset)); + auto res = device_resid.get(cell,node,eq); + const int row = dof_lids(resid_offsets(node,eq+offset)); for (int col=0; col(&(fpV_data(row,col)), val); } } } - } + }); } } @@ -622,20 +653,20 @@ evaluateFields(typename Traits::EvalData workset) const auto f_multiplier = workset.hessianWorkset.overlapped_f_multiplier; - using mv_data_t = Teuchos::ArrayRCP >; + using mv_data_t = Albany::ThyraMVDeviceView; mv_data_t hess_vec_prod_f_xx_data, hess_vec_prod_f_xp_data, hess_vec_prod_f_px_data, hess_vec_prod_f_pp_data; - auto f_multiplier_data = Albany::getNonconstLocalData(f_multiplier); + auto f_multiplier_data = Albany::getNonconstDeviceData(f_multiplier); if(f_xx_is_active) - hess_vec_prod_f_xx_data = Albany::getNonconstLocalData(hess_vec_prod_f_xx); + hess_vec_prod_f_xx_data = Albany::getNonconstDeviceData(hess_vec_prod_f_xx); if(f_xp_is_active) - hess_vec_prod_f_xp_data = Albany::getNonconstLocalData(hess_vec_prod_f_xp); + hess_vec_prod_f_xp_data = Albany::getNonconstDeviceData(hess_vec_prod_f_xp); if(f_px_is_active) - hess_vec_prod_f_px_data = Albany::getNonconstLocalData(hess_vec_prod_f_px); + hess_vec_prod_f_px_data = Albany::getNonconstDeviceData(hess_vec_prod_f_px); if(f_pp_is_active) - hess_vec_prod_f_pp_data = Albany::getNonconstLocalData(hess_vec_prod_f_pp); + hess_vec_prod_f_pp_data = Albany::getNonconstDeviceData(hess_vec_prod_f_pp); constexpr auto ALL = Kokkos::ALL(); const int ws = workset.wsIndex; @@ -645,27 +676,33 @@ evaluateFields(typename Traits::EvalData workset) // If the parameter associated to workset.dist_param_deriv_name is a distributed parameter, // the function needs to access the associated dof manager to deduce the IDs of the entries // of the resulting vector. - Albany::DualView::host_t p_elem_dof_lids; + Albany::DualView::dev_t p_elem_dof_lids; if(l1_is_distributed && (f_px_is_active || f_pp_is_active)) { auto p_dof_mgr = workset.disc->getDOFManager(workset.dist_param_deriv_name); - p_elem_dof_lids = p_dof_mgr->elem_dof_lids().host(); + p_elem_dof_lids = p_dof_mgr->elem_dof_lids().dev(); } - const auto elem_lids = workset.disc->getElementLIDs_host(ws); - const auto elem_dof_lids = dof_mgr->elem_dof_lids().host(); + const auto elem_lids_ws = workset.disc->getWsElementLIDs(); + const auto elem_lids = Kokkos::subview(elem_lids_ws.dev(),ws,Kokkos::ALL); - const auto& fields_offsets = m_fields_offsets.host(); + const auto elem_dof_lids = dof_mgr->elem_dof_lids().dev(); + + const size_t hess_vec_prod_f_px_data_size = hess_vec_prod_f_px_data.extent(0); + const size_t hess_vec_prod_f_pp_data_size = hess_vec_prod_f_pp_data.extent(0); + + const auto& fields_offsets = m_fields_offsets.dev(); const int eq_offset = this->offset; - for (size_t cell=0; cellgetName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { const auto elem_LID = elem_lids(cell); const auto dof_lids = Kokkos::subview(elem_dof_lids,elem_LID,ALL); ScalarT value=0.0; for (int node=0; nodedevice_resid.get(cell,node,eq); const int row = dof_lids(fields_offsets(node,eq+eq_offset)); - value += res * f_multiplier_data[row]; + value += res * f_multiplier_data(row); } } @@ -675,9 +712,9 @@ evaluateFields(typename Traits::EvalData workset) const int row = dof_lids(fields_offsets(node,eq+eq_offset)); const auto& dx = value.dx(fields_offsets(node,eq)).dx(0); if (f_xx_is_active) - hess_vec_prod_f_xx_data[0][row] += dx; + KU::atomic_add(&(hess_vec_prod_f_xx_data(row,0)), dx); if (f_xp_is_active) - hess_vec_prod_f_xp_data[0][row] += dx; + KU::atomic_add(&(hess_vec_prod_f_xp_data(row,0)), dx); } } @@ -686,9 +723,9 @@ evaluateFields(typename Traits::EvalData workset) if(row >=0){ const auto& dx = value.dx(node).dx(0); if(f_px_is_active) - hess_vec_prod_f_px_data[0][row] += dx; + KU::atomic_add(&(hess_vec_prod_f_px_data(row,0)), dx); if(f_pp_is_active) - hess_vec_prod_f_pp_data[0][row] += dx; + KU::atomic_add(&(hess_vec_prod_f_pp_data(row,0)), dx); } } } // node @@ -698,13 +735,13 @@ evaluateFields(typename Traits::EvalData workset) // the nodes: if(!l1_is_distributed && (f_px_is_active || f_pp_is_active)) { if(f_px_is_active) - for (unsigned int l1_i=0; l1_i(&(hess_vec_prod_f_px_data(l1_i,0)), value.dx(l1_i).dx(0)); if(f_pp_is_active) - for (unsigned int l1_i=0; l1_i(&(hess_vec_prod_f_pp_data(l1_i,0)), value.dx(l1_i).dx(0)); } - } // cell + }); // cell } // ********************************************************************** @@ -755,7 +792,8 @@ evaluate2DFieldsDerivativesDueToExtrudedParams(typename Traits::EvalData workset constexpr auto ALL = Kokkos::ALL(); const int ws = workset.wsIndex; - const auto elem_lids = workset.disc->getElementLIDs_host(ws); + const auto elem_lids_ws = workset.disc->getWsElementLIDs(); + const auto elem_lids = Kokkos::subview(elem_lids_ws.dev(),ws,Kokkos::ALL); const auto& hws = workset.hessianWorkset; @@ -764,7 +802,7 @@ evaluate2DFieldsDerivativesDueToExtrudedParams(typename Traits::EvalData workset // Here we scatter the *local* response derivative const auto f_multiplier = hws.overlapped_f_multiplier; - const auto f_multiplier_data = Albany::getNonconstLocalData(f_multiplier); + const auto f_multiplier_data = Albany::getNonconstDeviceData(f_multiplier); auto level_it = extruded_params_levels->find(workset.dist_param_deriv_name); int fieldLevel = level_it->second; @@ -772,13 +810,13 @@ evaluate2DFieldsDerivativesDueToExtrudedParams(typename Traits::EvalData workset const auto hess_vec_prod_f_px = hws.overlapped_hess_vec_prod_f_px; const auto hess_vec_prod_f_pp = hws.overlapped_hess_vec_prod_f_pp; - using mv_data_t = Teuchos::ArrayRCP>; + using mv_data_t = Albany::ThyraMVDeviceView; mv_data_t hess_vec_prod_f_px_data, hess_vec_prod_f_pp_data; if(f_px_is_active) - hess_vec_prod_f_px_data = Albany::getNonconstLocalData(hess_vec_prod_f_px); + hess_vec_prod_f_px_data = Albany::getNonconstDeviceData(hess_vec_prod_f_px); if(f_pp_is_active) - hess_vec_prod_f_pp_data = Albany::getNonconstLocalData(hess_vec_prod_f_pp); + hess_vec_prod_f_pp_data = Albany::getNonconstDeviceData(hess_vec_prod_f_pp); const auto& layers_data = workset.disc->getLayeredMeshNumberingLO(); const int top = layers_data->top_side_pos; @@ -788,48 +826,65 @@ evaluate2DFieldsDerivativesDueToExtrudedParams(typename Traits::EvalData workset const auto dof_mgr = workset.disc->getDOFManager(); const auto p_dof_mgr = workset.disc->getDOFManager(workset.dist_param_deriv_name); - const auto elem_dof_lids = dof_mgr->elem_dof_lids().host(); - const auto p_elem_dof_lids = p_dof_mgr->elem_dof_lids().host(); + const auto elem_dof_lids = dof_mgr->elem_dof_lids().dev(); + const auto p_elem_dof_lids = p_dof_mgr->elem_dof_lids().dev(); // Note: grab offsets on top/bot ordered in the same way as on side $field_pos // to guarantee corresponding nodes are vertically aligned. - const auto top_offsets = p_dof_mgr->getGIDFieldOffsetsSide(0,top,field_pos); - const auto bot_offsets = p_dof_mgr->getGIDFieldOffsetsSide(0,bot,field_pos); + const auto top_offsets = p_dof_mgr->getGIDFieldOffsetsSideKokkos(0,top,field_pos); + const auto bot_offsets = p_dof_mgr->getGIDFieldOffsetsSideKokkos(0,bot,field_pos); const auto p_offsets = fieldLevel==fieldLayer ? bot_offsets : top_offsets; - const auto numSideNodes = p_offsets.size(); + const auto numSideNodes = p_dof_mgr->getGIDFieldOffsetsSide(0,top,field_pos).size(); + + const auto elem_lids_host = Kokkos::subview(elem_lids_ws.host(),ws,Kokkos::ALL); - const auto offsets = m_fields_offsets.host(); - for (size_t cell=0; celllayerOrd; + const auto numHorizEntities = layers_data->numHorizEntities; + const auto numLayers = layers_data->numLayers; + + const auto offsets = m_fields_offsets.dev(); + Kokkos::parallel_for(this->getName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { const auto elem_LID = elem_lids(cell); const auto dof_lids = Kokkos::subview(elem_dof_lids,elem_LID,ALL); ScalarT value=0.0; for (int node=0; nodedevice_resid.get(cell,node,eq); const auto lid = dof_lids(offsets(node,eq+this->offset)); - value += res * f_multiplier_data[lid]; + value += res * f_multiplier_data(lid); } } - const auto basal_elem_LID = layers_data->getColumnId(elem_LID); - const auto field_elem_LID = layers_data->getId(basal_elem_LID,fieldLayer); - const auto do_nodes = [&] (const std::vector& offsets) { - for (std::size_t node=0; node=0) { - const auto& dx = value.dx(offsets[node]).dx(0); - if (f_px_is_active) - hess_vec_prod_f_px_data[0][row] += dx; - if (f_pp_is_active) - hess_vec_prod_f_pp_data[0][row] += dx; - } + const LO basal_elem_LID = layerOrd ? elem_LID % numHorizEntities : elem_LID / numLayers; + const LO field_elem_LID = layerOrd ? basal_elem_LID + fieldLayer*numHorizEntities : + basal_elem_LID * numLayers + fieldLayer; + + // do bot_offsets + for (std::size_t node=0; node=0) { + const auto& dx = value.dx(bot_offsets(node)).dx(0); + if (f_px_is_active) + KU::atomic_add(&(hess_vec_prod_f_px_data(row,0)), dx); + if (f_pp_is_active) + KU::atomic_add(&(hess_vec_prod_f_pp_data(row,0)), dx); } - }; + } - do_nodes (bot_offsets); - do_nodes (top_offsets); - } + // do top_offsets + for (std::size_t node=0; node=0) { + const auto& dx = value.dx(top_offsets(node)).dx(0); + if (f_px_is_active) + KU::atomic_add(&(hess_vec_prod_f_px_data(row,0)), dx); + if (f_pp_is_active) + KU::atomic_add(&(hess_vec_prod_f_pp_data(row,0)), dx); + } + } + }); } } // namespace PHAL diff --git a/src/evaluators/scatter/PHAL_ScatterSideEqnResidual_Def.hpp b/src/evaluators/scatter/PHAL_ScatterSideEqnResidual_Def.hpp index 135ffced23..d6da6656f9 100644 --- a/src/evaluators/scatter/PHAL_ScatterSideEqnResidual_Def.hpp +++ b/src/evaluators/scatter/PHAL_ScatterSideEqnResidual_Def.hpp @@ -474,7 +474,7 @@ doEvaluateFields(typename Traits::EvalData workset, } // Check for early return - if(workset.local_Vp[0].size() == 0) { + if(workset.local_Vp.size() == 0) { // In case the parameter has not been gathered. // E.g., parameter is used only in Dirichlet conditions. return; @@ -496,6 +496,8 @@ doEvaluateFields(typename Traits::EvalData workset, const auto node_dof_mgr = workset.disc->getNodeDOFManager(); const auto elem_dof_lids = dof_mgr->elem_dof_lids().host(); + const auto& local_Vp = workset.local_Vp; + constexpr auto ALL = Kokkos::ALL(); const int neq = dof_mgr->getNumFields(); const int numSides = sideSet.size(); @@ -504,8 +506,7 @@ doEvaluateFields(typename Traits::EvalData workset, const auto icell = side.ws_elem_idx; const auto elem_LID = elem_lids(icell); - const auto& local_Vp = workset.local_Vp[icell]; - + const auto& side_nodes = node_dof_mgr->getGIDFieldOffsetsSide(0,side.side_pos); const int numNodes = side_nodes.size(); const auto dof_lids = Kokkos::subview(elem_dof_lids,elem_LID,ALL); @@ -524,7 +525,7 @@ doEvaluateFields(typename Traits::EvalData workset, const int node = side_nodes[inode]; for (int eq = 0; eq < this->numFields; eq++) { auto res = this->get_resid(iside,inode,eq); - val += res.dx(i)*local_Vp[node*neq+eq+this->offset][col]; + val += res.dx(i)*local_Vp(icell,node*neq+eq+this->offset,col); } } fpV_data[col][row] += val; @@ -541,7 +542,7 @@ doEvaluateFields(typename Traits::EvalData workset, for (int col=0; col private: typedef typename AlbanyTraits::DistParamDeriv::ScalarT ScalarT; + +protected: + using ExecutionSpace = typename PHX::Device::execution_space; + using RangePolicy = Kokkos::RangePolicy; + MDFieldVectorRight global_response_reader; + }; @@ -311,6 +317,11 @@ class SeparableScatterScalarResponse private: typedef typename AlbanyTraits::HessianVec::ScalarT ScalarT; + +protected: + using ExecutionSpace = typename PHX::Device::execution_space; + using RangePolicy = Kokkos::RangePolicy; + MDFieldVectorRight global_response_reader; }; diff --git a/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp b/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp index 18f43467b5..c1700f0c68 100644 --- a/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp +++ b/src/evaluators/scatter/PHAL_SeparableScatterScalarResponse_Def.hpp @@ -293,16 +293,19 @@ evaluateFields(typename Traits::EvalData workset) } constexpr auto ALL = Kokkos::ALL(); - const auto dgdp_data = Albany::getNonconstLocalData(dgdp); + const auto dgdp_data = Albany::getNonconstDeviceData(dgdp); const int ws = workset.wsIndex; const int num_deriv = numNodes; - const auto elem_lids = workset.disc->getElementLIDs_host(ws); + const auto elem_lids_all = workset.disc->getWsElementLIDs(); + const auto elem_lids = Kokkos::subview(elem_lids_all.dev(),ws,ALL); + const auto param = workset.distParamLib->get(workset.dist_param_deriv_name); - const auto p_elem_dof_lids = param->get_dof_mgr()->elem_dof_lids().host(); + const auto p_elem_dof_lids = param->get_dof_mgr()->elem_dof_lids().dev(); // Loop over cells in workset - for (std::size_t cell=0; cell < workset.numCells; ++cell) { + Kokkos::parallel_for(this->getName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { const auto elem_LID = elem_lids(cell); const auto dof_lids = Kokkos::subview(p_elem_dof_lids,elem_LID,ALL); @@ -314,11 +317,11 @@ evaluateFields(typename Traits::EvalData workset) // If param defined at this node, update dg/dp const int row = dof_lids(deriv); if(row >=0){ - dgdp_data[res][row] += this->local_response(cell, res).dx(deriv); + KU::atomic_add(&dgdp_data(row,res), this->local_response(cell, res).dx(deriv)); } } // deriv } // response - } // cell + }); // cell } template @@ -327,10 +330,14 @@ postEvaluate(typename Traits::PostEvalData workset) { auto g = workset.g; if (g != Teuchos::null) { - auto g_nonconstView = Albany::getNonconstLocalData(g); - for (std::size_t res=0; resglobal_response.size(); ++res) { - g_nonconstView[res] = this->global_response[res].val(); - } + auto g_nonconstView = Albany::getNonconstDeviceData(g); + MDFieldVectorRight gr(this->global_response); + global_response_reader = gr; + Kokkos::parallel_for(this->getName(), + Kokkos::RangePolicy(0,this->global_response.size()), + KOKKOS_CLASS_LAMBDA(const int i) { + g_nonconstView(i) = global_response_reader[i].val(); + }); } auto dgdp = workset.dgdp; @@ -478,18 +485,18 @@ evaluateFields(typename Traits::EvalData workset) } // Extract multivectors raw data - using mv_data_t = Teuchos::ArrayRCP>; + using mv_data_t = Albany::ThyraMVDeviceView; mv_data_t hess_vec_prod_g_xx_data, hess_vec_prod_g_xp_data, hess_vec_prod_g_px_data, hess_vec_prod_g_pp_data; if (!hess_vec_prod_g_xx.is_null()) - hess_vec_prod_g_xx_data = Albany::getNonconstLocalData(hess_vec_prod_g_xx); + hess_vec_prod_g_xx_data = Albany::getNonconstDeviceData(hess_vec_prod_g_xx); if (!hess_vec_prod_g_xp.is_null()) - hess_vec_prod_g_xp_data = Albany::getNonconstLocalData(hess_vec_prod_g_xp); + hess_vec_prod_g_xp_data = Albany::getNonconstDeviceData(hess_vec_prod_g_xp); if (!hess_vec_prod_g_px.is_null()) - hess_vec_prod_g_px_data = Albany::getNonconstLocalData(hess_vec_prod_g_px); + hess_vec_prod_g_px_data = Albany::getNonconstDeviceData(hess_vec_prod_g_px); if (!hess_vec_prod_g_pp.is_null()) - hess_vec_prod_g_pp_data = Albany::getNonconstLocalData(hess_vec_prod_g_pp); + hess_vec_prod_g_pp_data = Albany::getNonconstDeviceData(hess_vec_prod_g_pp); const bool do_xx = !hess_vec_prod_g_xx.is_null(); const bool do_xp = !hess_vec_prod_g_xp.is_null(); @@ -500,10 +507,10 @@ evaluateFields(typename Traits::EvalData workset) // Get some data from the discretization const auto param_name = workset.dist_param_deriv_name; - Albany::DualView::host_t p_elem_dof_lids; + Albany::DualView::dev_t p_elem_dof_lids; if (do_dist_pp || do_dist_px) { auto dist_param = workset.distParamLib->get(param_name); - p_elem_dof_lids = dist_param->get_dof_mgr()->elem_dof_lids().host(); + p_elem_dof_lids = dist_param->get_dof_mgr()->elem_dof_lids().dev(); } const auto& dof_mgr = workset.disc->getDOFManager(); @@ -511,67 +518,74 @@ evaluateFields(typename Traits::EvalData workset) const int ws = workset.wsIndex; const int neq = dof_mgr->getNumFields(); - const auto& elem_dof_lids = dof_mgr->elem_dof_lids().host(); - const auto& elem_lids = workset.disc->getElementLIDs_host(ws); + const size_t num_responses = this->global_response.size(); - for (size_t cell=0; cellglobal_response.size(); ++res) { - auto lresp = this->local_response(cell, res); - - if (do_xx || do_xp) { - // Loop over equations per node - for (int eq_dof=0; eq_dofgetGIDFieldOffsets(eq_dof); - - const int num_nodes = offsets.size(); + const int g_px_size = hess_vec_prod_g_px_data.extent(0); + const int g_pp_size = hess_vec_prod_g_pp_data.extent(0); + const auto& elem_dof_lids = dof_mgr->elem_dof_lids().dev(); + const auto elem_lids_all = workset.disc->getWsElementLIDs(); + const auto elem_lids = Kokkos::subview(elem_lids_all.dev(),ws,Kokkos::ALL); + + if (do_xx || do_xp) { + for (int eq_dof=0; eq_dofgetGIDFieldOffsetsKokkos(eq_dof); + const int num_nodes = offsets.size(); + Kokkos::parallel_for(this->getName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { + const auto elem_LID = elem_lids(cell); + // Loop over responses + for (size_t res=0; reslocal_response(cell, res); for (int i=0; i(&(hess_vec_prod_g_xx_data(dof_lid,res)), lresp.dx(deriv).dx(0)); if (do_xp) - hess_vec_prod_g_xp_data[res][dof_lid] += lresp.dx(deriv).dx(0); + KU::atomic_add(&(hess_vec_prod_g_xp_data(dof_lid,res)), lresp.dx(deriv).dx(0)); } } - } - - if (do_dist_px) { - for (int deriv=0; deriv=0) { - hess_vec_prod_g_px_data[res][row] += lresp.dx(deriv).dx(0); + }); + } + } else { + Kokkos::parallel_for(this->getName(),RangePolicy(0,workset.numCells), + KOKKOS_CLASS_LAMBDA(const int& cell) { + const auto elem_LID = elem_lids(cell); + for (size_t res=0; reslocal_response(cell, res); + if (do_dist_px) { + for (int deriv=0; deriv=0) { + KU::atomic_add(&(hess_vec_prod_g_px_data(row,res)), lresp.dx(deriv).dx(0)); + } } } - } - if (do_dist_pp) { - for (int deriv=0; deriv=0) { - hess_vec_prod_g_pp_data[res][row] += lresp.dx(deriv).dx(0); + if (do_dist_pp) { + for (int deriv=0; deriv=0) { + KU::atomic_add(&(hess_vec_prod_g_pp_data(row,res)), lresp.dx(deriv).dx(0)); + } } } - } - - if (do_scalar_px) { - for (int deriv=0; deriv(&(hess_vec_prod_g_px_data(deriv,res)), lresp.dx(deriv).dx(0)); + } } - } - if (do_scalar_pp) { - for (int deriv=0; deriv(&(hess_vec_prod_g_pp_data(deriv,res)), lresp.dx(deriv).dx(0)); + } } } - } + }); } } @@ -588,10 +602,14 @@ postEvaluate(typename Traits::PostEvalData workset) const auto g = workset.g; if (g != Teuchos::null) { - const auto g_nonconstView = Albany::getNonconstLocalData(g); - for (size_t res=0; resglobal_response.size(); res++) { - g_nonconstView[res] = this->global_response[res].val().val(); - } + Albany::ThyraVDeviceView g_nonconstView = Albany::getNonconstDeviceData(g); + MDFieldVectorRight gr(this->global_response); + global_response_reader = gr; + Kokkos::parallel_for(this->getName(), + Kokkos::RangePolicy(0,this->global_response.size()), + KOKKOS_CLASS_LAMBDA(const int i) { + g_nonconstView(i) = this->global_response[i].val().val(); + }); } const auto& hws = workset.hessianWorkset; diff --git a/tests/unit/evaluators/ScatterResidual.cpp b/tests/unit/evaluators/ScatterResidual.cpp index 2651cb9f1f..bdfa5ab4f7 100644 --- a/tests/unit/evaluators/ScatterResidual.cpp +++ b/tests/unit/evaluators/ScatterResidual.cpp @@ -75,30 +75,30 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank0) const auto f_multiplier = Thyra::createMember(ov_x_space); - const auto hess_vec_prod_f_xx = Thyra::createMember(x_space); - const auto hess_vec_prod_f_xp = Thyra::createMember(x_space); - const auto hess_vec_prod_f_px = Thyra::createMember(p_space); - const auto hess_vec_prod_f_pp = Thyra::createMember(p_space); - - const auto ov_hess_vec_prod_f_xx = Thyra::createMember(ov_x_space); - const auto ov_hess_vec_prod_f_xp = Thyra::createMember(ov_x_space); - const auto ov_hess_vec_prod_f_px = Thyra::createMember(ov_p_space); - const auto ov_hess_vec_prod_f_pp = Thyra::createMember(ov_p_space); - - const auto diff_x_out = Thyra::createMember(x_space); - const auto one_x_out = Thyra::createMember(x_space); - const auto diff_p_out = Thyra::createMember(p_space); - const auto one_p_out = Thyra::createMember(p_space); - - const auto hess_vec_prod_f_xx_out = Thyra::createMember(x_space); - const auto hess_vec_prod_f_xp_out = Thyra::createMember(x_space); - const auto hess_vec_prod_f_px_out = Thyra::createMember(p_space); - const auto hess_vec_prod_f_pp_out = Thyra::createMember(p_space); - - const auto ov_hess_vec_prod_f_xx_out = Thyra::createMember(ov_x_space); - const auto ov_hess_vec_prod_f_xp_out = Thyra::createMember(ov_x_space); - const auto ov_hess_vec_prod_f_px_out = Thyra::createMember(ov_p_space); - const auto ov_hess_vec_prod_f_pp_out = Thyra::createMember(ov_p_space); + const auto hess_vec_prod_f_xx = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_f_xp = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_f_px = Thyra::createMembers(p_space,1); + const auto hess_vec_prod_f_pp = Thyra::createMembers(p_space,1); + + const auto ov_hess_vec_prod_f_xx = Thyra::createMembers(ov_x_space,1); + const auto ov_hess_vec_prod_f_xp = Thyra::createMembers(ov_x_space,1); + const auto ov_hess_vec_prod_f_px = Thyra::createMembers(ov_p_space,1); + const auto ov_hess_vec_prod_f_pp = Thyra::createMembers(ov_p_space,1); + + const auto diff_x_out = Thyra::createMembers(x_space,1); + const auto one_x_out = Thyra::createMembers(x_space,1); + const auto diff_p_out = Thyra::createMembers(p_space,1); + const auto one_p_out = Thyra::createMembers(p_space,1); + + const auto hess_vec_prod_f_xx_out = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_f_xp_out = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_f_px_out = Thyra::createMembers(p_space,1); + const auto hess_vec_prod_f_pp_out = Thyra::createMembers(p_space,1); + + const auto ov_hess_vec_prod_f_xx_out = Thyra::createMembers(ov_x_space,1); + const auto ov_hess_vec_prod_f_xp_out = Thyra::createMembers(ov_x_space,1); + const auto ov_hess_vec_prod_f_px_out = Thyra::createMembers(ov_p_space,1); + const auto ov_hess_vec_prod_f_pp_out = Thyra::createMembers(ov_p_space,1); ov_hess_vec_prod_f_xx->assign(0.0); ov_hess_vec_prod_f_xp->assign(0.0); @@ -116,10 +116,15 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank0) ov_hess_vec_prod_f_pp_out->assign(0.0); { - auto ov_hess_vec_prod_f_xx_out_data = Albany::getNonconstLocalData(ov_hess_vec_prod_f_xx_out); - auto ov_hess_vec_prod_f_xp_out_data = Albany::getNonconstLocalData(ov_hess_vec_prod_f_xp_out); - auto ov_hess_vec_prod_f_px_out_data = Albany::getNonconstLocalData(ov_hess_vec_prod_f_px_out); - auto ov_hess_vec_prod_f_pp_out_data = Albany::getNonconstLocalData(ov_hess_vec_prod_f_pp_out); + auto ov_hess_vec_prod_f_xx_out_data = Albany::getNonconstDeviceData(ov_hess_vec_prod_f_xx_out); + auto ov_hess_vec_prod_f_xp_out_data = Albany::getNonconstDeviceData(ov_hess_vec_prod_f_xp_out); + auto ov_hess_vec_prod_f_px_out_data = Albany::getNonconstDeviceData(ov_hess_vec_prod_f_px_out); + auto ov_hess_vec_prod_f_pp_out_data = Albany::getNonconstDeviceData(ov_hess_vec_prod_f_pp_out); + + auto ov_hess_vec_prod_f_xx_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_f_xx_out_data); + auto ov_hess_vec_prod_f_xp_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_f_xp_out_data); + auto ov_hess_vec_prod_f_px_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_f_px_out_data); + auto ov_hess_vec_prod_f_pp_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_f_pp_out_data); auto p_elem_dof_lids = p_dof_mgr->elem_dof_lids().host(); auto x_elem_dof_lids = x_dof_mgr->elem_dof_lids().host(); @@ -128,14 +133,19 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank0) auto p_dof_lids = Kokkos::subview(p_elem_dof_lids,cell,ALL); auto x_dof_lids = Kokkos::subview(x_elem_dof_lids,cell,ALL); for (size_t i=0; icombine(ov_hess_vec_prod_f_px, hess_vec_prod_f_px, ADD); p_cas_manager->combine(ov_hess_vec_prod_f_pp, hess_vec_prod_f_pp, ADD); - Thyra::V_VmV(diff_x_out.ptr(), *one_x_out, *hess_vec_prod_f_xx); + Thyra::V_VmV(diff_x_out->col(0).ptr(), *(one_x_out->col(0)), *(hess_vec_prod_f_xx->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_xx_out", *one_x_out, - "hess_vec_prod_f_xx", *diff_x_out, + "hess_vec_prod_f_xx_out", *(one_x_out->col(0)), + "hess_vec_prod_f_xx", *(diff_x_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); - Thyra::V_VmV(diff_x_out.ptr(), *one_x_out, *hess_vec_prod_f_xp); + Thyra::V_VmV(diff_x_out->col(0).ptr(), *(one_x_out->col(0)), *(hess_vec_prod_f_xp->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_xp_out", *one_x_out, - "hess_vec_prod_f_xp", *diff_x_out, + "hess_vec_prod_f_xp_out", *(one_x_out->col(0)), + "hess_vec_prod_f_xp", *(diff_x_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); - Thyra::V_VmV(diff_p_out.ptr(), *one_p_out, *hess_vec_prod_f_px); + Thyra::V_VmV(diff_p_out->col(0).ptr(), *(one_p_out->col(0)), *(hess_vec_prod_f_px->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_px_out", *one_p_out, - "hess_vec_prod_f_px", *diff_p_out, + "hess_vec_prod_f_px_out", *(one_p_out->col(0)), + "hess_vec_prod_f_px", *(diff_p_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); - Thyra::V_VmV(diff_p_out.ptr(), *one_p_out, *hess_vec_prod_f_pp); + Thyra::V_VmV(diff_p_out->col(0).ptr(), *(one_p_out->col(0)), *(hess_vec_prod_f_pp->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_pp_out", *one_p_out, - "hess_vec_prod_f_pp", *diff_p_out, + "hess_vec_prod_f_pp_out", *(one_p_out->col(0)), + "hess_vec_prod_f_pp", *(diff_p_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -272,8 +282,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank0) TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_xx_out", *hess_vec_prod_f_xx_out, - "hess_vec_prod_f_xx", *hess_vec_prod_f_xx, + "hess_vec_prod_f_xx_out", *(hess_vec_prod_f_xx_out->col(0)), + "hess_vec_prod_f_xx", *(hess_vec_prod_f_xx->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -302,8 +312,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank0) TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_xp_out", *hess_vec_prod_f_xp_out, - "hess_vec_prod_f_xp", *hess_vec_prod_f_xp, + "hess_vec_prod_f_xp_out", *(hess_vec_prod_f_xp_out->col(0)), + "hess_vec_prod_f_xp", *(hess_vec_prod_f_xp->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -332,8 +342,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank0) TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_px_out", *hess_vec_prod_f_px_out, - "hess_vec_prod_f_px", *hess_vec_prod_f_px, + "hess_vec_prod_f_px_out", *(hess_vec_prod_f_px_out->col(0)), + "hess_vec_prod_f_px", *(hess_vec_prod_f_px->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -362,8 +372,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank0) TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_pp_out", *hess_vec_prod_f_pp_out, - "hess_vec_prod_f_pp", *hess_vec_prod_f_pp, + "hess_vec_prod_f_pp_out", *(hess_vec_prod_f_pp_out->col(0)), + "hess_vec_prod_f_pp", *(hess_vec_prod_f_pp->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -432,30 +442,30 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank1) const auto f_multiplier = Thyra::createMember(ov_x_space); - const auto hess_vec_prod_f_xx = Thyra::createMember(x_space); - const auto hess_vec_prod_f_xp = Thyra::createMember(x_space); - const auto hess_vec_prod_f_px = Thyra::createMember(p_space); - const auto hess_vec_prod_f_pp = Thyra::createMember(p_space); + const auto hess_vec_prod_f_xx = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_f_xp = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_f_px = Thyra::createMembers(p_space,1); + const auto hess_vec_prod_f_pp = Thyra::createMembers(p_space,1); - const auto ov_hess_vec_prod_f_xx = Thyra::createMember(ov_x_space); - const auto ov_hess_vec_prod_f_xp = Thyra::createMember(ov_x_space); - const auto ov_hess_vec_prod_f_px = Thyra::createMember(ov_p_space); - const auto ov_hess_vec_prod_f_pp = Thyra::createMember(ov_p_space); + const auto ov_hess_vec_prod_f_xx = Thyra::createMembers(ov_x_space,1); + const auto ov_hess_vec_prod_f_xp = Thyra::createMembers(ov_x_space,1); + const auto ov_hess_vec_prod_f_px = Thyra::createMembers(ov_p_space,1); + const auto ov_hess_vec_prod_f_pp = Thyra::createMembers(ov_p_space,1); - const auto diff_x_out = Thyra::createMember(x_space); - const auto one_x_out = Thyra::createMember(x_space); - const auto diff_p_out = Thyra::createMember(p_space); - const auto one_p_out = Thyra::createMember(p_space); + const auto diff_x_out = Thyra::createMembers(x_space,1); + const auto one_x_out = Thyra::createMembers(x_space,1); + const auto diff_p_out = Thyra::createMembers(p_space,1); + const auto one_p_out = Thyra::createMembers(p_space,1); - const auto hess_vec_prod_f_xx_out = Thyra::createMember(x_space); - const auto hess_vec_prod_f_xp_out = Thyra::createMember(x_space); - const auto hess_vec_prod_f_px_out = Thyra::createMember(p_space); - const auto hess_vec_prod_f_pp_out = Thyra::createMember(p_space); + const auto hess_vec_prod_f_xx_out = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_f_xp_out = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_f_px_out = Thyra::createMembers(p_space,1); + const auto hess_vec_prod_f_pp_out = Thyra::createMembers(p_space,1); - const auto ov_hess_vec_prod_f_xx_out = Thyra::createMember(ov_x_space); - const auto ov_hess_vec_prod_f_xp_out = Thyra::createMember(ov_x_space); - const auto ov_hess_vec_prod_f_px_out = Thyra::createMember(ov_p_space); - const auto ov_hess_vec_prod_f_pp_out = Thyra::createMember(ov_p_space); + const auto ov_hess_vec_prod_f_xx_out = Thyra::createMembers(ov_x_space,1); + const auto ov_hess_vec_prod_f_xp_out = Thyra::createMembers(ov_x_space,1); + const auto ov_hess_vec_prod_f_px_out = Thyra::createMembers(ov_p_space,1); + const auto ov_hess_vec_prod_f_pp_out = Thyra::createMembers(ov_p_space,1); ov_hess_vec_prod_f_xx->assign(0.0); ov_hess_vec_prod_f_xp->assign(0.0); @@ -473,10 +483,15 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank1) ov_hess_vec_prod_f_pp_out->assign(0.0); { - auto ov_hess_vec_prod_f_xx_out_data = Albany::getNonconstLocalData(ov_hess_vec_prod_f_xx_out); - auto ov_hess_vec_prod_f_xp_out_data = Albany::getNonconstLocalData(ov_hess_vec_prod_f_xp_out); - auto ov_hess_vec_prod_f_px_out_data = Albany::getNonconstLocalData(ov_hess_vec_prod_f_px_out); - auto ov_hess_vec_prod_f_pp_out_data = Albany::getNonconstLocalData(ov_hess_vec_prod_f_pp_out); + auto ov_hess_vec_prod_f_xx_out_data = Albany::getNonconstDeviceData(ov_hess_vec_prod_f_xx_out); + auto ov_hess_vec_prod_f_xp_out_data = Albany::getNonconstDeviceData(ov_hess_vec_prod_f_xp_out); + auto ov_hess_vec_prod_f_px_out_data = Albany::getNonconstDeviceData(ov_hess_vec_prod_f_px_out); + auto ov_hess_vec_prod_f_pp_out_data = Albany::getNonconstDeviceData(ov_hess_vec_prod_f_pp_out); + + auto ov_hess_vec_prod_f_xx_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_f_xx_out_data); + auto ov_hess_vec_prod_f_xp_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_f_xp_out_data); + auto ov_hess_vec_prod_f_px_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_f_px_out_data); + auto ov_hess_vec_prod_f_pp_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_f_pp_out_data); auto p_elem_dof_lids = p_dof_mgr->elem_dof_lids().host(); auto x_elem_dof_lids = x_dof_mgr->elem_dof_lids().host(); @@ -485,14 +500,19 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank1) auto p_dof_lids = Kokkos::subview(p_elem_dof_lids,cell,ALL); auto x_dof_lids = Kokkos::subview(x_elem_dof_lids,cell,ALL); for (size_t i=0; icombine(ov_hess_vec_prod_f_px, hess_vec_prod_f_px, ADD); p_cas_manager->combine(ov_hess_vec_prod_f_pp, hess_vec_prod_f_pp, ADD); - Thyra::V_VmV(diff_x_out.ptr(), *one_x_out, *hess_vec_prod_f_xx); + Thyra::V_VmV(diff_x_out->col(0).ptr(), *(one_x_out->col(0)), *(hess_vec_prod_f_xx->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_xx_out", *one_x_out, - "hess_vec_prod_f_xx", *diff_x_out, + "hess_vec_prod_f_xx_out", *(one_x_out->col(0)), + "hess_vec_prod_f_xx", *(diff_x_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); - Thyra::V_VmV(diff_x_out.ptr(), *one_x_out, *hess_vec_prod_f_xp); + Thyra::V_VmV(diff_x_out->col(0).ptr(), *(one_x_out->col(0)), *(hess_vec_prod_f_xp->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_xp_out", *one_x_out, - "hess_vec_prod_f_xp", *diff_x_out, + "hess_vec_prod_f_xp_out", *(one_x_out->col(0)), + "hess_vec_prod_f_xp", *(diff_x_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); - Thyra::V_VmV(diff_p_out.ptr(), *one_p_out, *hess_vec_prod_f_px); + Thyra::V_VmV(diff_p_out->col(0).ptr(), *(one_p_out->col(0)), *(hess_vec_prod_f_px->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_px_out", *one_p_out, - "hess_vec_prod_f_px", *diff_p_out, + "hess_vec_prod_f_px_out", *(one_p_out->col(0)), + "hess_vec_prod_f_px", *(diff_p_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); - Thyra::V_VmV(diff_p_out.ptr(), *one_p_out, *hess_vec_prod_f_pp); + Thyra::V_VmV(diff_p_out->col(0).ptr(), *(one_p_out->col(0)), *(hess_vec_prod_f_pp->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_pp_out", *one_p_out, - "hess_vec_prod_f_pp", *diff_p_out, + "hess_vec_prod_f_pp_out", *(one_p_out->col(0)), + "hess_vec_prod_f_pp", *(diff_p_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -633,8 +653,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank1) TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_xx_out", *ov_hess_vec_prod_f_xx_out, - "hess_vec_prod_f_xx", *ov_hess_vec_prod_f_xx, + "hess_vec_prod_f_xx_out", *(ov_hess_vec_prod_f_xx_out->col(0)), + "hess_vec_prod_f_xx", *(ov_hess_vec_prod_f_xx->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -663,8 +683,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank1) TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_xp_out", *hess_vec_prod_f_xp_out, - "hess_vec_prod_f_xp", *hess_vec_prod_f_xp, + "hess_vec_prod_f_xp_out", *(hess_vec_prod_f_xp_out->col(0)), + "hess_vec_prod_f_xp", *(hess_vec_prod_f_xp->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -693,8 +713,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank1) TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_px_out", *hess_vec_prod_f_px_out, - "hess_vec_prod_f_px", *hess_vec_prod_f_px, + "hess_vec_prod_f_px_out", *(hess_vec_prod_f_px_out->col(0)), + "hess_vec_prod_f_px", *(hess_vec_prod_f_px->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -723,8 +743,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, scatterResidualHessianVecTensorRank1) TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_f_pp_out", *hess_vec_prod_f_pp_out, - "hess_vec_prod_f_pp", *hess_vec_prod_f_pp, + "hess_vec_prod_f_pp_out", *(hess_vec_prod_f_pp_out->col(0)), + "hess_vec_prod_f_pp", *(hess_vec_prod_f_pp->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); diff --git a/tests/unit/evaluators/ScatterScalarResponse.cpp b/tests/unit/evaluators/ScatterScalarResponse.cpp index f9b0728ebf..a41f2ee012 100644 --- a/tests/unit/evaluators/ScatterScalarResponse.cpp +++ b/tests/unit/evaluators/ScatterScalarResponse.cpp @@ -74,31 +74,31 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, separableScatterScalarResponseHessianVe auto overlapped_p_space = p_dof_mgr->ov_indexer()->getVectorSpace(); auto overlapped_x_space = x_dof_mgr->ov_indexer()->getVectorSpace(); - const auto hess_vec_prod_g_xx = Thyra::createMember(x_space); - const auto hess_vec_prod_g_xp = Thyra::createMember(x_space); - const auto hess_vec_prod_g_px = Thyra::createMember(p_space); - const auto hess_vec_prod_g_pp = Thyra::createMember(p_space); + const auto hess_vec_prod_g_xx = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_g_xp = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_g_px = Thyra::createMembers(p_space,1); + const auto hess_vec_prod_g_pp = Thyra::createMembers(p_space,1); - const auto overlapped_hess_vec_prod_g_xx = Thyra::createMember(overlapped_x_space); - const auto overlapped_hess_vec_prod_g_xp = Thyra::createMember(overlapped_x_space); - const auto overlapped_hess_vec_prod_g_px = Thyra::createMember(overlapped_p_space); - const auto overlapped_hess_vec_prod_g_pp = Thyra::createMember(overlapped_p_space); + const auto overlapped_hess_vec_prod_g_xx = Thyra::createMembers(overlapped_x_space,1); + const auto overlapped_hess_vec_prod_g_xp = Thyra::createMembers(overlapped_x_space,1); + const auto overlapped_hess_vec_prod_g_px = Thyra::createMembers(overlapped_p_space,1); + const auto overlapped_hess_vec_prod_g_pp = Thyra::createMembers(overlapped_p_space,1); - const auto diff_x_out = Thyra::createMember(x_space); - const auto diff_p_out = Thyra::createMember(p_space); - const auto one_x_out = Thyra::createMember(x_space); - const auto one_p_out = Thyra::createMember(p_space); + const auto diff_x_out = Thyra::createMembers(x_space,1); + const auto diff_p_out = Thyra::createMembers(p_space,1); + const auto one_x_out = Thyra::createMembers(x_space,1); + const auto one_p_out = Thyra::createMembers(p_space,1); - const auto hess_vec_prod_g_xx_out = Thyra::createMember(x_space); - const auto hess_vec_prod_g_xp_out = Thyra::createMember(x_space); - const auto hess_vec_prod_g_px_out = Thyra::createMember(p_space); - const auto hess_vec_prod_g_pp_out = Thyra::createMember(p_space); + const auto hess_vec_prod_g_xx_out = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_g_xp_out = Thyra::createMembers(x_space,1); + const auto hess_vec_prod_g_px_out = Thyra::createMembers(p_space,1); + const auto hess_vec_prod_g_pp_out = Thyra::createMembers(p_space,1); - const auto overlapped_hess_vec_prod_g_xx_out = Thyra::createMember(overlapped_x_space); - const auto overlapped_hess_vec_prod_g_xp_out = Thyra::createMember(overlapped_x_space); - const auto overlapped_hess_vec_prod_g_px_out = Thyra::createMember(overlapped_p_space); - const auto overlapped_hess_vec_prod_g_pp_out = Thyra::createMember(overlapped_p_space); + const auto overlapped_hess_vec_prod_g_xx_out = Thyra::createMembers(overlapped_x_space,1); + const auto overlapped_hess_vec_prod_g_xp_out = Thyra::createMembers(overlapped_x_space,1); + const auto overlapped_hess_vec_prod_g_px_out = Thyra::createMembers(overlapped_p_space,1); + const auto overlapped_hess_vec_prod_g_pp_out = Thyra::createMembers(overlapped_p_space,1); overlapped_hess_vec_prod_g_xx->assign(0.0); overlapped_hess_vec_prod_g_xp->assign(0.0); @@ -114,10 +114,15 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, separableScatterScalarResponseHessianVe overlapped_hess_vec_prod_g_pp_out->assign(0.0); { - auto ov_hess_vec_prod_g_xx_out_data = Albany::getNonconstLocalData(overlapped_hess_vec_prod_g_xx_out); - auto ov_hess_vec_prod_g_xp_out_data = Albany::getNonconstLocalData(overlapped_hess_vec_prod_g_xp_out); - auto ov_hess_vec_prod_g_px_out_data = Albany::getNonconstLocalData(overlapped_hess_vec_prod_g_px_out); - auto ov_hess_vec_prod_g_pp_out_data = Albany::getNonconstLocalData(overlapped_hess_vec_prod_g_pp_out); + auto ov_hess_vec_prod_g_xx_out_data = Albany::getNonconstDeviceData(overlapped_hess_vec_prod_g_xx_out); + auto ov_hess_vec_prod_g_xp_out_data = Albany::getNonconstDeviceData(overlapped_hess_vec_prod_g_xp_out); + auto ov_hess_vec_prod_g_px_out_data = Albany::getNonconstDeviceData(overlapped_hess_vec_prod_g_px_out); + auto ov_hess_vec_prod_g_pp_out_data = Albany::getNonconstDeviceData(overlapped_hess_vec_prod_g_pp_out); + + auto ov_hess_vec_prod_g_xx_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_g_xx_out_data); + auto ov_hess_vec_prod_g_xp_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_g_xp_out_data); + auto ov_hess_vec_prod_g_px_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_g_px_out_data); + auto ov_hess_vec_prod_g_pp_out_data_host = Kokkos::create_mirror_view(ov_hess_vec_prod_g_pp_out_data); auto p_elem_dof_lids = p_dof_mgr->elem_dof_lids().host(); auto x_elem_dof_lids = x_dof_mgr->elem_dof_lids().host(); @@ -126,14 +131,19 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, separableScatterScalarResponseHessianVe auto p_dof_lids = Kokkos::subview(p_elem_dof_lids,cell,ALL); auto x_dof_lids = Kokkos::subview(x_elem_dof_lids,cell,ALL); for (size_t i=0; icombine(overlapped_hess_vec_prod_g_px, hess_vec_prod_g_px, ADD); p_cas_manager->combine(overlapped_hess_vec_prod_g_pp, hess_vec_prod_g_pp, ADD); - Thyra::V_VmV(diff_x_out.ptr(), *one_x_out, *hess_vec_prod_g_xx); + std::cout << "Running hess_vec_prod_g_**...\n"; + Thyra::V_VmV(diff_x_out->col(0).ptr(), *(one_x_out->col(0)), *(hess_vec_prod_g_xx->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_g_xx_out", *one_x_out, - "hess_vec_prod_g_xx", *diff_x_out, + "hess_vec_prod_g_xx_out", *(one_x_out->col(0)), + "hess_vec_prod_g_xx", *(diff_x_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); - Thyra::V_VmV(diff_x_out.ptr(), *one_x_out, *hess_vec_prod_g_xp); + Thyra::V_VmV(diff_x_out->col(0).ptr(), *(one_x_out->col(0)), *(hess_vec_prod_g_xp->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_g_xp_out", *one_x_out, - "hess_vec_prod_g_xp", *diff_x_out, + "hess_vec_prod_g_xp_out", *(one_x_out->col(0)), + "hess_vec_prod_g_xp", *(diff_x_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); - Thyra::V_VmV(diff_p_out.ptr(), *one_p_out, *hess_vec_prod_g_px); + Thyra::V_VmV(diff_p_out->col(0).ptr(), *(one_p_out->col(0)), *(hess_vec_prod_g_px->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_g_px_out", *one_p_out, - "hess_vec_prod_g_px", *diff_p_out, + "hess_vec_prod_g_px_out", *(one_p_out->col(0)), + "hess_vec_prod_g_px", *(diff_p_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); - Thyra::V_VmV(diff_p_out.ptr(), *one_p_out, *hess_vec_prod_g_pp); + Thyra::V_VmV(diff_p_out->col(0).ptr(), *(one_p_out->col(0)), *(hess_vec_prod_g_pp->col(0))); TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_g_pp_out", *one_p_out, - "hess_vec_prod_g_pp", *diff_p_out, + "hess_vec_prod_g_pp_out", *(one_p_out->col(0)), + "hess_vec_prod_g_pp", *(diff_p_out->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -286,8 +297,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, separableScatterScalarResponseHessianVe TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_g_xx_out", *hess_vec_prod_g_xx_out, - "hess_vec_prod_g_xx", *hess_vec_prod_g_xx, + "hess_vec_prod_g_xx_out", *(hess_vec_prod_g_xx_out->col(0)), + "hess_vec_prod_g_xx", *(hess_vec_prod_g_xx->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -316,8 +327,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, separableScatterScalarResponseHessianVe TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_g_xp_out", *hess_vec_prod_g_xp_out, - "hess_vec_prod_g_xp", *hess_vec_prod_g_xp, + "hess_vec_prod_g_xp_out", *(hess_vec_prod_g_xp_out->col(0)), + "hess_vec_prod_g_xp", *(hess_vec_prod_g_xp->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -346,8 +357,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, separableScatterScalarResponseHessianVe TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_g_px_out", *hess_vec_prod_g_px_out, - "hess_vec_prod_g_px", *hess_vec_prod_g_px, + "hess_vec_prod_g_px_out", *(hess_vec_prod_g_px_out->col(0)), + "hess_vec_prod_g_px", *(hess_vec_prod_g_px->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel)); @@ -376,8 +387,8 @@ TEUCHOS_UNIT_TEST(evaluator_unit_tester, separableScatterScalarResponseHessianVe TEUCHOS_TEST_FOR_EXCEPT( !Thyra::testRelNormDiffErr( - "hess_vec_prod_g_pp_out", *hess_vec_prod_g_pp_out, - "hess_vec_prod_g_pp", *hess_vec_prod_g_pp, + "hess_vec_prod_g_pp_out", *(hess_vec_prod_g_pp_out->col(0)), + "hess_vec_prod_g_pp", *(hess_vec_prod_g_pp->col(0)), "maxSensError", tol, "warningTol", 1.0, // Don't warn &*out_test, verbLevel));