This repository has been archived by the owner on Jul 26, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
kgemm_nt_batched.hpp
76 lines (59 loc) · 2.12 KB
/
kgemm_nt_batched.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#ifndef KGEMM_NT_BATCHED_H
#define KGEMM_NT_BATCHED_H 1
#include "kroncommon.hpp"
#include "kgemm_nt.hpp"
template<typename T>
GLOBAL_FUNCTION
void kgemm_nt_batched( int const mm, int const nn, int const kk,
T const alpha,
T* const Aarray_[],
int const ldAarray_[],
T* const Barray_[],
int const ldBarray_[],
T const beta,
T* const Carray_[],
int const ldCarray_[],
int const batchCount)
{
// ----------------------------
// use Fortran 1-based indexing
// ----------------------------
auto Aarray = [&] (int const i) -> T* const & {
return( Aarray_[ (i) - 1] );
};
auto Barray = [&] (int const i) -> T* const & {
return( Barray_[ (i) - 1] );
};
auto Carray = [&] (int const i) -> T* const & {
return( Carray_[ (i) - 1] );
};
auto ldAarray = [&] (int const i) -> int const & {
return( ldAarray_[ (i) - 1] );
};
auto ldBarray = [&] (int const i) -> int const & {
return( ldBarray_[ (i) - 1] );
};
auto ldCarray = [&] (int const i) -> int const & {
return( ldCarray_[ (i) - 1] );
};
#ifdef USE_GPU
int const iz_start = blockIdx.x + 1;
int const iz_size = gridDim.x;
expect( gridDim.y == 1);
expect( gridDim.z == 1);
#else
int const iz_start = 1;
int const iz_size = 1;
#endif
for(int ibatch=iz_start; ibatch <= batchCount; ibatch += iz_size) {
T const * const A_ = Aarray(ibatch);
T const * const B_ = Barray(ibatch);
T* const C_ = Carray(ibatch);
int const ldA = ldAarray(ibatch);
int const ldB = ldBarray(ibatch);
int const ldC = ldCarray(ibatch);
kgemm_nt( mm,nn,kk, alpha, A_, ldA, B_, ldB,
beta, C_, ldC );
};
}
#endif