-
Notifications
You must be signed in to change notification settings - Fork 37
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Unify par_dispatch
, par_for_outer
& par_for_inner
overloads
#1142
base: develop
Are you sure you want to change the base?
Changes from 8 commits
6995d11
2e61847
054ca0e
4c83c4c
0730429
adb15dd
5db0d34
ba335d7
2fa8ad1
b9a95a1
9033aa3
2d450c8
503ae0c
e01dabf
259115d
ad53f40
442c04f
a865f00
dd46b7e
7c7ecc0
53f0f85
e9b440d
2452b48
188d413
5bb7764
1102470
049bf52
9a39c02
08e788f
8d1a5ca
3a32e84
c2ac94f
d7477c7
69cda38
12297d2
0905832
afc86c1
ceaac8f
de6df61
5954079
1d7719c
7b25d89
8ab0985
c16fc6e
a25ffef
7d49d4e
72aa437
ed0be07
fb5ccb2
aae696c
8fb8f5c
066b3d7
54a3c01
b2a6e49
5e3e8b2
d626fed
565717f
c53ec4f
14c098a
f6d9c21
be6ed04
b091860
b532cc7
8a74cba
64cf179
001108f
4662b1b
520811a
bd9df9c
ebae3cd
48664ac
4887cfc
afc381c
842b94d
beb6847
f68ec09
9aa1560
f2cfd91
f255f71
c6f9de1
f1403b1
b31b1dc
4fbef8f
0ee6005
6e37753
1976164
7c20e6d
8758aef
4fa3932
ce39ccc
a450c7c
85568d6
052e39f
ae7a3b9
562b396
034efed
ccf3e2d
f4b2141
4cb73d1
fbd2674
b175f41
11616d8
1661218
e6c7b93
28c3bee
3dfc5cf
2de2eab
206cd31
7bc731e
c448372
6152149
10f3bc2
279b126
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -105,26 +105,26 @@ constexpr InnerLoopPatternSimdFor inner_loop_pattern_simdfor_tag; | |
|
||
// trait to track if pattern requests any type of hierarchial parallelism | ||
template <typename Pattern, typename T = void> | ||
struct UsesHierarchialPar : std::false_type { | ||
struct UsesHierarchicalPar : std::false_type { | ||
static constexpr std::size_t Nvector = 0; | ||
static constexpr std::size_t Nthread = 0; | ||
}; | ||
|
||
template <std::size_t num_thread, std::size_t num_vector> | ||
struct UsesHierarchialPar<LoopPatternTeamThreadVec<num_thread, num_vector>> | ||
struct UsesHierarchicalPar<LoopPatternTeamThreadVec<num_thread, num_vector>> | ||
: std::true_type { | ||
static constexpr std::size_t Nthread = num_thread; | ||
static constexpr std::size_t Nvector = num_vector; | ||
}; | ||
|
||
template <> | ||
struct UsesHierarchialPar<OuterLoopPatternTeams> : std::true_type { | ||
struct UsesHierarchicalPar<OuterLoopPatternTeams> : std::true_type { | ||
static constexpr std::size_t Nvector = 0; | ||
static constexpr std::size_t Nthread = 0; | ||
}; | ||
|
||
template <std::size_t num_vector> | ||
struct UsesHierarchialPar<InnerLoopThreadVec<num_vector>> : std::true_type { | ||
struct UsesHierarchicalPar<InnerLoopThreadVec<num_vector>> : std::true_type { | ||
static constexpr std::size_t Nvector = num_vector; | ||
}; | ||
|
||
|
@@ -191,10 +191,12 @@ struct DispatchType { | |
using Translator = LoopBoundTranslator<Bounds...>; | ||
static constexpr std::size_t Rank = Translator::Rank; | ||
|
||
using HierarchialPar = UsesHierarchialPar<Pattern>; | ||
using HierarchicalPar = UsesHierarchicalPar<Pattern>; | ||
|
||
static constexpr bool is_ParFor = | ||
std::is_same<Tag, dispatch_impl::ParallelForDispatch>::value; | ||
static constexpr bool is_ParRed = | ||
std::is_same<Tag, dispatch_impl::ParallelReduceDispatch>::value; | ||
static constexpr bool is_ParScan = | ||
std::is_same<Tag, dispatch_impl::ParallelScanDispatch>::value; | ||
|
||
|
@@ -219,11 +221,12 @@ struct DispatchType { | |
// for now this is guaranteed to be par_for_inner, when par_reduce_inner is | ||
// supported need to check | ||
return PT::simd; | ||
} else if constexpr (IsMDRange) { | ||
} else if constexpr (IsMDRange || is_ParRed) { | ||
// par_reduce does not currently work with either team-based patterns | ||
return PT::md; | ||
} else if constexpr (std::is_same_v<Pattern, OuterLoopPatternTeams>) { | ||
return PT::outer; | ||
} else if constexpr (HierarchialPar::value) { | ||
} else if constexpr (HierarchicalPar::value) { | ||
return PT::collapse; | ||
} | ||
|
||
|
@@ -332,7 +335,7 @@ struct dispatch_collapse { | |
}; | ||
|
||
// builds a functor that uses inner hierarchial parrallelism used by both par_disp_inner & | ||
// par_dipsatch for LoopPatternCollapse | ||
// par_dispatch for LoopPatternCollapse | ||
template <std::size_t Rank, std::size_t Nteam, std::size_t Nthread, std::size_t Nvector, | ||
typename IdxTeam, typename Function, typename... ExtraFuncArgs> | ||
KOKKOS_FORCEINLINE_FUNCTION auto | ||
|
@@ -360,7 +363,7 @@ struct par_disp_inner_impl<Pattern, Function, TypeList<Bounds...>, TypeList<Args | |
Function function, Args &&...args) { | ||
auto bound_arr = bound_translator().GetIndexRanges(std::forward<Bounds>(bounds)...); | ||
constexpr bool isSimdFor = std::is_same_v<InnerLoopPatternSimdFor, Pattern>; | ||
constexpr std::size_t Nvector = dispatch_type::HierarchialPar::Nvector; | ||
constexpr std::size_t Nvector = dispatch_type::HierarchicalPar::Nvector; | ||
constexpr std::size_t Nthread = Rank - Nvector; | ||
constexpr auto pattern_tag = LoopPatternTag<dispatch_type::GetPatternTag()>(); | ||
|
||
|
@@ -409,7 +412,7 @@ struct par_dispatch_impl<Tag, Pattern, Function, TypeList<Bounds...>, TypeList<A | |
Function function, Args &&...args, const int scratch_level = 0, | ||
const std::size_t scratch_size_in_bytes = 0) { | ||
constexpr std::size_t Ninner = | ||
dispatch_type::HierarchialPar::Nvector + dispatch_type::HierarchialPar::Nthread; | ||
dispatch_type::HierarchicalPar::Nvector + dispatch_type::HierarchicalPar::Nthread; | ||
|
||
constexpr auto pattern_tag = LoopPatternTag<dispatch_type::GetPatternTag()>(); | ||
static_assert( | ||
|
@@ -486,11 +489,16 @@ struct par_dispatch_impl<Tag, Pattern, Function, TypeList<Bounds...>, TypeList<A | |
Args &&...args, const int scratch_level, | ||
const std::size_t scratch_size_in_bytes) { | ||
static_assert(sizeof...(InnerIs) == 0); | ||
kokkos_dispatch( | ||
Tag(), name, | ||
Kokkos::MDRangePolicy<Kokkos::Rank<Rank>>(exec_space, {bound_arr[OuterIs].s...}, | ||
{(1 + bound_arr[OuterIs].e)...}), | ||
function, std::forward<Args>(args)...); | ||
constexpr std::size_t Nouter = sizeof...(OuterIs); | ||
Kokkos::Array<int, Nouter> tiling{(OuterIs, 1)...}; | ||
tiling[Nouter - 1] = bound_arr[Nouter - 1].e + 1 - bound_arr[Nouter - 1].s; | ||
kokkos_dispatch(Tag(), name, | ||
Kokkos::Experimental::require( | ||
Kokkos::MDRangePolicy<Kokkos::Rank<Rank>>( | ||
exec_space, {bound_arr[OuterIs].s...}, | ||
{(1 + bound_arr[OuterIs].e)...}, tiling), | ||
Kokkos::Experimental::WorkItemProperty::HintLightWeight), | ||
function, std::forward<Args>(args)...); | ||
} | ||
|
||
// Flatten loop bounds into a single outer team_policy | ||
|
@@ -500,13 +508,14 @@ struct par_dispatch_impl<Tag, Pattern, Function, TypeList<Bounds...>, TypeList<A | |
Kokkos::Array<IndexRange, Rank> bound_arr, Function function, | ||
Args &&...args, const int scratch_level, | ||
const std::size_t scratch_size_in_bytes) { | ||
const auto idxer = | ||
MakeIndexer(Kokkos::Array<IndexRange, sizeof...(OuterIs)>{bound_arr[OuterIs]...}); | ||
const std::size_t size = ((bound_arr[OuterIs].e - bound_arr[OuterIs].s + 1) * ...); | ||
kokkos_dispatch( | ||
Tag(), name, | ||
team_policy(exec_space, idxer.size(), Kokkos::AUTO) | ||
team_policy(exec_space, size, Kokkos::AUTO) | ||
.set_scratch_size(scratch_level, Kokkos::PerTeam(scratch_size_in_bytes)), | ||
KOKKOS_LAMBDA(team_mbr_t team_member, ExtraFuncArgs... fargs) { | ||
const auto idxer = MakeIndexer( | ||
Kokkos::Array<IndexRange, sizeof...(OuterIs)>{bound_arr[OuterIs]...}); | ||
const auto idx_arr = idxer.GetIdxArray(team_member.league_rank()); | ||
function(team_member, idx_arr[OuterIs]..., | ||
std::forward<ExtraFuncArgs>(fargs)...); | ||
|
@@ -524,9 +533,9 @@ struct par_dispatch_impl<Tag, Pattern, Function, TypeList<Bounds...>, TypeList<A | |
const std::size_t scratch_size_in_bytes) { | ||
const auto idxer = | ||
MakeIndexer(Kokkos::Array<IndexRange, sizeof...(OuterIs)>{bound_arr[OuterIs]...}); | ||
using HierarchialPar = typename dispatch_type::HierarchialPar; | ||
constexpr std::size_t Nvector = HierarchialPar::Nvector; | ||
constexpr std::size_t Nthread = HierarchialPar::Nthread; | ||
using HierarchicalPar = typename dispatch_type::HierarchicalPar; | ||
constexpr std::size_t Nvector = HierarchicalPar::Nvector; | ||
constexpr std::size_t Nthread = HierarchicalPar::Nthread; | ||
constexpr std::size_t Nouter = Rank - Nvector - Nthread; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What exactly is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This one covers all the various |
||
kokkos_dispatch( | ||
Tag(), name, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this working as expected?
If I infer the intent correctly, this should create an array initialized to
1
everywhere.My compiler complains with a warning
AFAIK default init doesn't work for arrays, so we might need sth like
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was working for me, but better to avoid the warning.
The warning makes sense since
(OuterIs, 1)
will always just evaluate to 1.