Skip to content
This repository has been archived by the owner on Jan 26, 2024. It is now read-only.

Commit

Permalink
Revert "Reduce the number of allocated signals"
Browse files Browse the repository at this point in the history
This reverts commit 9e354e2.

Reason for revert: <INSERT REASONING HERE>

Change-Id: I627774837d1dc19c50f879719e40932195448e9f
  • Loading branch information
amd-aakash committed Dec 12, 2020
1 parent 9e354e2 commit 90af834
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 39 deletions.
43 changes: 5 additions & 38 deletions device/rocm/rocvirtual.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -455,21 +455,8 @@ bool VirtualGPU::dispatchGenericAqlPacket(

// TODO: placeholder to setup the kernel to populate start and end timestamp.
if (timestamp_ != nullptr) {
// Pool size must grow to the size of pending AQL packets
const uint32_t pool_size = index - read;
if (pool_size >= signal_pool_.size()) {
ProfilingSignal profiling_signal = {};
if (HSA_STATUS_SUCCESS != hsa_signal_create(0, 0, nullptr, &profiling_signal.signal_)) {
LogPrintfError("Failed signal allocation id = %d", pool_size);
return false;
}
signal_pool_.push_back(profiling_signal);
assert(queueSize >= signal_pool_.size() && "Pool will be reallocated!");
}
// Move index inside the valid pool
++current_signal_ %= signal_pool_.size();
// Find signal slot
ProfilingSignal* profilingSignal = &signal_pool_[current_signal_];
ProfilingSignal* profilingSignal = &signal_pool_[index & queueMask];
// Make sure we save the old results in the TS structure
if (profilingSignal->ts_ != nullptr) {
profilingSignal->ts_->checkGpuTime();
Expand Down Expand Up @@ -549,8 +536,6 @@ bool VirtualGPU::dispatchGenericAqlPacket(
LogPrintfError("Failed signal [0x%lx] wait", signal.handle);
return false;
}
// Reset the pool of signals
current_signal_ = 0;
}

return true;
Expand Down Expand Up @@ -635,14 +620,8 @@ void VirtualGPU::ResetQueueStates() {
// Release all memory dependencies
memoryDependency().clear();

if (dev().settings().barrier_sync_) {
// Release the pool, since runtime just completed a barrier
// @note: Runtime can reset kernel arg pool only if the barrier with L2 invalidation was issued
resetKernArgPool();
} else {
// Reset the pool of signals
current_signal_ = 0;
}
// Release the pool, since runtime just completed a barrier
resetKernArgPool();
}

// ================================================================================================
Expand Down Expand Up @@ -854,17 +833,8 @@ bool VirtualGPU::initPool(size_t kernarg_pool_size, uint signal_pool_count) {
}

if (signal_pool_count != 0) {
// Reserve signal pool for all entries in the queue, since profiling logic will save the
// pointer in timestamp info for the future references
signal_pool_.reserve(signal_pool_count);
// If barrier is disable, then allocate a small portion of all signals and grow the array later.
// @note: the optimization requires a wait for signal on reuse, which is only available when
// the barrier is disabled
constexpr uint32_t kDefaultSignalPoolSize = 32;
const uint32_t default_signal_pool_size = (dev().settings().barrier_sync_) ?
signal_pool_count : kDefaultSignalPoolSize;
signal_pool_.resize(default_signal_pool_size);
for (uint i = 0; i < default_signal_pool_size; ++i) {
signal_pool_.resize(signal_pool_count);
for (uint i = 0; i < signal_pool_count; ++i) {
ProfilingSignal profilingSignal;
if (HSA_STATUS_SUCCESS != hsa_signal_create(0, 0, nullptr, &profilingSignal.signal_)) {
return false;
Expand Down Expand Up @@ -911,9 +881,6 @@ void* VirtualGPU::allocKernArg(size_t size, size_t alignment) {
}

resetKernArgPool();

// Reset the pool of signals
current_signal_ = 0;
}
} while (true);

Expand Down
1 change: 0 additions & 1 deletion device/rocm/rocvirtual.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,6 @@ class VirtualGPU : public device::VirtualDevice {
uint kernarg_pool_cur_offset_;

std::vector<ProfilingSignal> signal_pool_; //!< Pool of signals for profiling
uint32_t current_signal_ = 0; //!< Current avaialble signal in the pool
friend class Timestamp;

// PM4 packet for gfx8 performance counter
Expand Down

0 comments on commit 90af834

Please sign in to comment.