1 #ifndef _COMPADRE_APPLY_TARGET_EVALUATIONS_HPP_
2 #define _COMPADRE_APPLY_TARGET_EVALUATIONS_HPP_
13 template <
typename SolutionData>
14 KOKKOS_INLINE_FUNCTION
17 const int target_index = data._initial_index_for_batch + teamMember.league_rank();
19 #if defined(COMPADRE_USE_CUDA)
60 const auto n_evaluation_sites_per_target = data.additional_number_of_neighbors_list(target_index) + 1;
61 const auto nn = data.number_of_neighbors_list(target_index);
62 auto alphas = data._d_ss._alphas;
63 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,
64 nn + data._d_ss._added_alpha_size), [&] (
const int i) {
65 for (int e=0; e<n_evaluation_sites_per_target; ++e) {
66 for (int j=0; j<(int)data.operations_size; ++j) {
67 for (int k=0; k<data._d_ss._lro_output_tile_size[j]; ++k) {
68 for (int m=0; m<data._d_ss._lro_input_tile_size[j]; ++m) {
69 const int offset_index_jmke = data._d_ss.getTargetOffsetIndex(j,m,k,e);
70 const int alphas_index = data._d_ss.getAlphaIndex(target_index, offset_index_jmke);
72 if (data._sampling_multiplier>1 && m<data._sampling_multiplier) {
73 const int m_neighbor_offset = i+m*nn;
74 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(teamMember, data.this_num_cols),
75 [&] (int& l, double& t_alpha_ij) {
76 t_alpha_ij += P_target_row(offset_index_jmke, l)*Q(l, m_neighbor_offset);
78 compadre_kernel_assert_extreme_debug(P_target_row(offset_index_jmke, l)==P_target_row(offset_index_jmke, l)
79 &&
"NaN in P_target_row matrix.");
80 compadre_kernel_assert_extreme_debug(Q(l, m_neighbor_offset)==Q(l, m_neighbor_offset)
81 &&
"NaN in Q coefficient matrix.");
84 } else if (data._sampling_multiplier == 1) {
85 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(teamMember, data.this_num_cols),
86 [&] (int& l, double& t_alpha_ij) {
87 t_alpha_ij += P_target_row(offset_index_jmke, l)*Q(l,i);
89 compadre_kernel_assert_extreme_debug(P_target_row(offset_index_jmke, l)==P_target_row(offset_index_jmke, l)
90 &&
"NaN in P_target_row matrix.");
91 compadre_kernel_assert_extreme_debug(Q(l,i)==Q(l,i)
92 &&
"NaN in Q coefficient matrix.");
98 alphas(alphas_index+i) = alpha_ij;
99 compadre_kernel_assert_extreme_debug(alpha_ij==alpha_ij &&
"NaN in alphas.");
109 const int alphas_per_tile_per_target = data.number_of_neighbors_list(target_index) + data._d_ss._added_alpha_size;
110 const global_index_type base_offset_index_jmke = data._d_ss.getTargetOffsetIndex(0,0,0,0);
111 const global_index_type base_alphas_index = data._d_ss.getAlphaIndex(target_index, base_offset_index_jmke);
113 scratch_matrix_right_type this_alphas(data._d_ss._alphas.data() +
TO_GLOBAL(base_alphas_index), data._d_ss._total_alpha_values*data._d_ss._max_evaluation_sites_per_target, alphas_per_tile_per_target);
115 auto n_evaluation_sites_per_target = data.additional_number_of_neighbors_list(target_index) + 1;
116 const auto nn = data.number_of_neighbors_list(target_index);
117 for (
int e=0; e<n_evaluation_sites_per_target; ++e) {
119 for (
size_t j=0; j<data.operations_size; ++j) {
120 for (
int k=0; k<data._d_ss._lro_output_tile_size[j]; ++k) {
121 for (
int m=0; m<data._d_ss._lro_input_tile_size[j]; ++m) {
122 const int offset_index_jmke = data._d_ss.getTargetOffsetIndex(j,m,k,e);
123 for (
int i=0; i<nn + data._d_ss._added_alpha_size; ++i) {
125 const int Q_col = i+m*nn;
128 for (
int l=0; l<data.this_num_cols; ++l) {
129 if (data._sampling_multiplier>1 && m<data._sampling_multiplier) {
131 alpha_ij += P_target_row(offset_index_jmke, l)*Q(l, Q_col);
134 &&
"NaN in P_target_row matrix.");
136 &&
"NaN in Q coefficient matrix.");
138 }
else if (data._sampling_multiplier == 1) {
140 alpha_ij += P_target_row(offset_index_jmke, l)*Q(l, i);
143 &&
"NaN in P_target_row matrix.");
145 &&
"NaN in Q coefficient matrix.");
151 this_alphas(offset_index_jmke,i) = alpha_ij;
159 teamMember.team_barrier();
std::size_t global_index_type
team_policy::member_type member_type
#define TO_GLOBAL(variable)
#define compadre_kernel_assert_extreme_debug(condition)
Kokkos::View< double **, layout_right, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_matrix_right_type
KOKKOS_INLINE_FUNCTION void applyTargetsToCoefficients(const SolutionData &data, const member_type &teamMember, scratch_matrix_right_type Q, scratch_matrix_right_type P_target_row)
For applying the evaluations from a target functional to the polynomial coefficients.