Skip to content

Commit

Permalink
Revert "refactor: change encoder for thread group over dispatch 2/n"
Browse files Browse the repository at this point in the history
This reverts commit 0466317.

Signed-off-by: Compute-Runtime-Validation <[email protected]>
  • Loading branch information
Compute-Runtime-Validation authored and Compute-Runtime-Automation committed Oct 29, 2024
1 parent 7f81179 commit 022f9e6
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 15 deletions.
32 changes: 18 additions & 14 deletions shared/source/command_container/command_encoder_xehp_and_later.inl
Original file line number Diff line number Diff line change
Expand Up @@ -1143,23 +1143,26 @@ void EncodeDispatchKernel<GfxFamily>::encodeThreadGroupDispatch(InterfaceDescrip
threadsPerXeCore /= 2;
}
auto tgDispatchSizeSelected = 8;
uint32_t numberOfThreadsInThreadGroup = interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup();

if (threadGroupDimensions[0] > 1 && (threadGroupDimensions[1] > 1 || threadGroupDimensions[2] > 1)) {
while (threadGroupDimensions[0] % tgDispatchSizeSelected != 0) {
if (walkerCmd.getThreadGroupIdXDimension() > 1 && (walkerCmd.getThreadGroupIdYDimension() > 1 || walkerCmd.getThreadGroupIdZDimension() > 1)) {
while (walkerCmd.getThreadGroupIdXDimension() % tgDispatchSizeSelected != 0) {
tgDispatchSizeSelected /= 2;
}
} else if (threadGroupDimensions[1] > 1 && threadGroupDimensions[2] > 1) {
while (threadGroupDimensions[1] % tgDispatchSizeSelected != 0) {
} else if (walkerCmd.getThreadGroupIdYDimension() > 1 && walkerCmd.getThreadGroupIdZDimension() > 1) {
while (walkerCmd.getThreadGroupIdYDimension() % tgDispatchSizeSelected != 0) {
tgDispatchSizeSelected /= 2;
}
}

auto workgroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension();

// make sure we fit all xe core
while (threadGroupCount / tgDispatchSizeSelected < hwInfo.gtSystemInfo.MaxSubSlicesSupported * tileCount && tgDispatchSizeSelected > 1) {
while (workgroupCount / tgDispatchSizeSelected < hwInfo.gtSystemInfo.MaxSubSlicesSupported * tileCount && tgDispatchSizeSelected > 1) {
tgDispatchSizeSelected /= 2;
}

auto threadCountPerGrouping = tgDispatchSizeSelected * threadsPerThreadGroup;
auto threadCountPerGrouping = tgDispatchSizeSelected * numberOfThreadsInThreadGroup;
// make sure we do not use more threads then present on each xe core
while (threadCountPerGrouping > threadsPerXeCore && tgDispatchSizeSelected > 1) {
tgDispatchSizeSelected /= 2;
Expand All @@ -1184,25 +1187,26 @@ void EncodeDispatchKernel<GfxFamily>::encodeThreadGroupDispatch(InterfaceDescrip
uint32_t availableThreadCount = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, grfCount);
availableThreadCount *= tileCount;

uint32_t dispatchedTotalThreadCount = threadsPerThreadGroup * threadGroupCount;
UNRECOVERABLE_IF(threadsPerThreadGroup == 0u);
uint32_t numberOfThreadsInThreadGroup = interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup();
uint32_t dispatchedTotalThreadCount = numberOfThreadsInThreadGroup * threadGroupCount;
UNRECOVERABLE_IF(numberOfThreadsInThreadGroup == 0u);
auto tgDispatchSizeSelected = 1u;

if (dispatchedTotalThreadCount <= availableThreadCount) {
tgDispatchSizeSelected = 1;
} else if (threadsPerThreadGroup <= maxThreadsInTGForTGDispatchSize8) {
} else if (numberOfThreadsInThreadGroup <= maxThreadsInTGForTGDispatchSize8) {
tgDispatchSizeSelected = 8;
} else if (threadsPerThreadGroup <= maxThreadsInTGForTGDispatchSize4) {
} else if (numberOfThreadsInThreadGroup <= maxThreadsInTGForTGDispatchSize4) {
tgDispatchSizeSelected = 4;
} else {
tgDispatchSizeSelected = 2;
}
if (threadGroupDimensions[0] > 1 && (threadGroupDimensions[1] > 1 || threadGroupDimensions[2] > 1)) {
while (threadGroupDimensions[0] % tgDispatchSizeSelected != 0) {
if (walkerCmd.getThreadGroupIdXDimension() > 1 && (walkerCmd.getThreadGroupIdYDimension() > 1 || walkerCmd.getThreadGroupIdZDimension() > 1)) {
while (walkerCmd.getThreadGroupIdXDimension() % tgDispatchSizeSelected != 0) {
tgDispatchSizeSelected /= 2;
}
} else if (threadGroupDimensions[1] > 1 && threadGroupDimensions[2] > 1) {
while (threadGroupDimensions[1] % tgDispatchSizeSelected != 0) {
} else if (walkerCmd.getThreadGroupIdYDimension() > 1 && walkerCmd.getThreadGroupIdZDimension() > 1) {
while (walkerCmd.getThreadGroupIdYDimension() % tgDispatchSizeSelected != 0) {
tgDispatchSizeSelected /= 2;
}
}
Expand Down
2 changes: 1 addition & 1 deletion shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ void EncodeDispatchKernel<Family>::encodeThreadGroupDispatch(InterfaceDescriptor
const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t grfCount, const uint32_t threadsPerThreadGroup, WalkerType &walkerCmd) {
const auto &productHelper = device.getProductHelper();
if (productHelper.isDisableOverdispatchAvailable(hwInfo)) {
if (threadsPerThreadGroup == 1) {
if (interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup() == 1) {
interfaceDescriptor.setThreadGroupDispatchSize(static_cast<INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE>(2u));
} else {
interfaceDescriptor.setThreadGroupDispatchSize(static_cast<INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE>(3u));
Expand Down

0 comments on commit 022f9e6

Please sign in to comment.