11 #include "KokkosBatched_UTV_Decl.hpp"
12 #include "KokkosBatched_SolveUTV_Decl_Compadre.hpp"
14 using namespace KokkosBatched;
17 namespace GMLS_LinearAlgebra {
19 template<
typename DeviceType,
21 typename MatrixViewType_A,
22 typename MatrixViewType_B,
23 typename MatrixViewType_X>
33 KOKKOS_INLINE_FUNCTION
38 const MatrixViewType_A &a,
39 const MatrixViewType_B &b,
40 const bool implicit_RHS)
41 : _a(a), _b(b), _M(M), _N(N), _NRHS(NRHS), _implicit_RHS(implicit_RHS)
42 { _pm_getTeamScratchLevel_0 = 0; _pm_getTeamScratchLevel_1 = 0; }
44 template<
typename MemberType>
45 KOKKOS_INLINE_FUNCTION
48 const int k = member.league_rank();
55 _a.extent(1), _a.extent(2));
57 _b.extent(1), _b.extent(2));
59 _b.extent(1), _b.extent(2));
62 if ((
size_t)_M!=_a.extent(1) || (
size_t)_N!=_a.extent(2)) {
66 Kokkos::parallel_for(Kokkos::TeamThreadRange(member,0,_M),[&](
const int &i) {
67 Kokkos::parallel_for(Kokkos::ThreadVectorRange(member,0,_N),[&](
const int &j) {
71 member.team_barrier();
72 Kokkos::parallel_for(Kokkos::TeamThreadRange(member,0,_M),[&](
const int &i) {
73 Kokkos::parallel_for(Kokkos::ThreadVectorRange(member,0,_N),[&](
const int &j) {
77 member.team_barrier();
81 if (std::is_same<typename MatrixViewType_B::array_layout, layout_left>::value) {
87 _b.extent(1), _b.extent(2));
88 Kokkos::parallel_for(Kokkos::TeamThreadRange(member,0,_N),[&](
const int &i) {
89 Kokkos::parallel_for(Kokkos::ThreadVectorRange(member,0,_NRHS),[&](
const int &j) {
90 tmp(i,j) = bb_left(i,j);
93 member.team_barrier();
94 Kokkos::parallel_for(Kokkos::TeamThreadRange(member,0,_N),[&](
const int &i) {
95 Kokkos::parallel_for(Kokkos::ThreadVectorRange(member,0,_NRHS),[&](
const int &j) {
105 bool do_print =
false;
107 Kokkos::single(Kokkos::PerTeam(member), [&] () {
108 #if KOKKOS_VERSION >= 40200
109 using Kokkos::printf;
112 printf(
"a=zeros(%lu,%lu);\n", aa.extent(0), aa.extent(1));
113 for (
size_t i=0; i<aa.extent(0); ++i) {
114 for (
size_t j=0; j<aa.extent(1); ++j) {
115 printf(
"a(%lu,%lu)= %f;\n", i+1,j+1, aa(i,j));
119 printf(
"b=zeros(%lu,%lu);\n", bb.extent(0), bb.extent(1));
120 for (
size_t i=0; i<bb.extent(0); ++i) {
121 for (
size_t j=0; j<bb.extent(1); ++j) {
122 printf(
"b(%lu,%lu)= %f;\n", i+1,j+1, bb(i,j));
135 member.team_barrier();
136 TeamVectorUTV<MemberType,AlgoTagType>
137 ::invoke(member, aa, pp, uu, vv, ww_fast, matrix_rank);
138 member.team_barrier();
141 Kokkos::single(Kokkos::PerTeam(member), [&] () {
142 #if KOKKOS_VERSION >= 40200
143 using Kokkos::printf;
145 printf(
"matrix_rank: %d\n", matrix_rank);
147 printf(
"u=zeros(%lu,%lu);\n", uu.extent(0), uu.extent(1));
148 for (
size_t i=0; i<uu.extent(0); ++i) {
149 for (
size_t j=0; j<uu.extent(1); ++j) {
150 printf(
"u(%lu,%lu)= %f;\n", i+1,j+1, uu(i,j));
155 TeamVectorSolveUTVCompadre<MemberType,AlgoTagType>
156 ::invoke(member, matrix_rank, _M, _N, _NRHS, uu, aa, vv, pp, bb, xx, ww_slow, ww_fast, _implicit_RHS);
157 member.team_barrier();
163 typedef typename MatrixViewType_A::non_const_value_type value_type;
164 std::string name_region(
"KokkosBatched::Test::TeamVectorSolveUTVCompadre");
165 std::string name_value_type = ( std::is_same<value_type,float>::value ?
"::Float" :
166 std::is_same<value_type,double>::value ?
"::Double" :
167 std::is_same<value_type,Kokkos::complex<float> >::value ?
"::ComplexFloat" :
168 std::is_same<value_type,Kokkos::complex<double> >::value ?
"::ComplexDouble" :
"::UnknownValueType" );
169 std::string name = name_region + name_value_type;
170 Kokkos::Profiling::pushRegion( name.c_str() );
175 int scratch_size = scratch_matrix_right_type::shmem_size(_N, _N);
176 scratch_size += scratch_matrix_right_type::shmem_size(_M, _N );
177 scratch_size += scratch_vector_type::shmem_size(_N*_NRHS);
179 int l0_scratch_size = scratch_vector_type::shmem_size(_N);
180 l0_scratch_size += scratch_vector_type::shmem_size(3*_M);
189 Kokkos::Profiling::popRegion();
195 template <
typename A_layout,
typename B_layout,
typename X_layout>
196 void batchQRPivotingSolve(
ParallelManager pm,
double *A,
int lda,
int nda,
double *B,
int ldb,
int ndb,
int M,
int N,
int NRHS,
const int num_matrices,
const bool implicit_RHS) {
198 typedef Algo::UTV::Unblocked algo_tag_type;
199 typedef Kokkos::View<double***, A_layout, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
201 typedef Kokkos::View<double***, B_layout, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
203 typedef Kokkos::View<double***, X_layout, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
206 MatrixViewType_A mat_A(A, num_matrices, lda, nda);
207 MatrixViewType_B mat_B(B, num_matrices, ldb, ndb);
210 <
device_execution_space, algo_tag_type, MatrixViewType_A, MatrixViewType_B, MatrixViewType_X>(M,N,NRHS,mat_A,mat_B,implicit_RHS).run(pm);
214 template void batchQRPivotingSolve<layout_right, layout_right, layout_right>(
ParallelManager,
double*,
int,
int,
double*,
int,
int,
int,
int,
int,
const int,
const bool);
215 template void batchQRPivotingSolve<layout_right, layout_right, layout_left >(
ParallelManager,
double*,
int,
int,
double*,
int,
int,
int,
int,
int,
const int,
const bool);
216 template void batchQRPivotingSolve<layout_right, layout_left , layout_right>(
ParallelManager,
double*,
int,
int,
double*,
int,
int,
int,
int,
int,
const int,
const bool);
217 template void batchQRPivotingSolve<layout_right, layout_left , layout_left >(
ParallelManager,
double*,
int,
int,
double*,
int,
int,
int,
int,
int,
const int,
const bool);
218 template void batchQRPivotingSolve<layout_left , layout_right, layout_right>(
ParallelManager,
double*,
int,
int,
double*,
int,
int,
int,
int,
int,
const int,
const bool);
219 template void batchQRPivotingSolve<layout_left , layout_right, layout_left >(
ParallelManager,
double*,
int,
int,
double*,
int,
int,
int,
int,
int,
const int,
const bool);
220 template void batchQRPivotingSolve<layout_left , layout_left , layout_right>(
ParallelManager,
double*,
int,
int,
double*,
int,
int,
int,
int,
int,
const int,
const bool);
221 template void batchQRPivotingSolve<layout_left , layout_left , layout_left >(
ParallelManager,
double*,
int,
int,
double*,
int,
int,
int,
int,
int,
const int,
const bool);
Kokkos::DefaultExecutionSpace device_execution_space
Kokkos::View< double **, layout_left, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_matrix_left_type
Kokkos::View< double *, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_vector_type
#define TO_GLOBAL(variable)
Kokkos::View< int *, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_local_index_type
Kokkos::View< double **, layout_right, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_matrix_right_type
void setTeamScratchSize(const int level, const int value)
void CallFunctorWithTeamThreadsAndVectors(C functor, const global_index_type batch_size, const int threads_per_team=-1, const int vector_lanes_per_thread=-1) const
Calls a parallel_for parallel_for will break out over loops over teams with each vector lane executin...
KOKKOS_INLINE_FUNCTION int getTeamScratchLevel(const int level) const
template void batchQRPivotingSolve< layout_left, layout_right, layout_right >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
template void batchQRPivotingSolve< layout_left, layout_left, layout_right >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
void batchQRPivotingSolve(ParallelManager pm, double *A, int lda, int nda, double *B, int ldb, int ndb, int M, int N, int NRHS, const int num_matrices, const bool implicit_RHS)
Solves a batch of problems with QR+Pivoting.
template void batchQRPivotingSolve< layout_left, layout_right, layout_left >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
template void batchQRPivotingSolve< layout_left, layout_left, layout_left >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
template void batchQRPivotingSolve< layout_right, layout_left, layout_right >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
template void batchQRPivotingSolve< layout_right, layout_right, layout_left >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
template void batchQRPivotingSolve< layout_right, layout_left, layout_left >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
template void batchQRPivotingSolve< layout_right, layout_right, layout_right >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
void run(ParallelManager pm)
int _pm_getTeamScratchLevel_1
int _pm_getTeamScratchLevel_0
KOKKOS_INLINE_FUNCTION void operator()(const MemberType &member) const
KOKKOS_INLINE_FUNCTION Functor_TestBatchedTeamVectorSolveUTV(const int M, const int N, const int NRHS, const MatrixViewType_A &a, const MatrixViewType_B &b, const bool implicit_RHS)