Skip to content

Commit

Permalink
PaRSEC: more adjustments to the changes in the gpu task structure
Browse files Browse the repository at this point in the history
Signed-off-by: Joseph Schuchart <[email protected]>
  • Loading branch information
devreal committed Jan 10, 2025
1 parent 4bf379e commit a55f24a
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 29 deletions.
25 changes: 9 additions & 16 deletions ttg/ttg/parsec/devicefunc.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,8 @@ namespace ttg_parsec {
uint8_t i; // only limited number of flows
detail::parsec_ttg_task_base_t *caller = detail::parsec_ttg_caller;
assert(nullptr != caller->dev_ptr);
caller->dev_ptr->gpu_task->allocate_flows(span.size());
parsec_gpu_task_t *gpu_task = caller->dev_ptr->gpu_task;
parsec_flow_t *flows = caller->dev_ptr->flows;

bool is_current = false;
for (i = 0; i < span.size(); ++i) {
Expand All @@ -140,14 +140,15 @@ namespace ttg_parsec {

/* build the flow */
/* TODO: reuse the flows of the task class? How can we control the sync direction then? */
flows[i] = parsec_flow_t{.name = nullptr,
*((parsec_flow_t*)gpu_task->flow_info[i].flow) =
parsec_flow_t{.name = nullptr,
.sym_type = PARSEC_SYM_INOUT,
.flow_flags = static_cast<uint8_t>(access),
.flow_index = i,
.flow_datatype_mask = ~0 };

gpu_task->flow_nb_elts[i] = data->nb_elts; // size in bytes
gpu_task->flow[i] = &flows[i];
gpu_task->flow_info[i].flow_span = data->span; // size in bytes
gpu_task->flow_info[i].flow_dc = nullptr;

/* set the input data copy, parsec will take care of the transfer
* and the buffer will look at the parsec_data_t for the current pointer */
Expand All @@ -158,25 +159,17 @@ namespace ttg_parsec {

} else {
/* ignore the flow */
flows[i] = parsec_flow_t{.name = nullptr,
*((parsec_flow_t*)gpu_task->flow_info[i].flow) =
parsec_flow_t{.name = nullptr,
.sym_type = PARSEC_FLOW_ACCESS_NONE,
.flow_flags = 0,
.flow_index = i,
.flow_datatype_mask = ~0 };
gpu_task->flow[i] = &flows[i];
gpu_task->flow_nb_elts[i] = 0; // size in bytes
gpu_task->flow_info[i].flow_span = 0; // size in bytes
caller->parsec_task.data[i].data_in = nullptr;
}
}

/* reset all remaining entries in the current task */
for (; i < MAX_PARAM_COUNT; ++i) {
detail::parsec_ttg_caller->parsec_task.data[i].data_in = nullptr;
detail::parsec_ttg_caller->dev_ptr->flows[i].flow_flags = PARSEC_FLOW_ACCESS_NONE;
detail::parsec_ttg_caller->dev_ptr->flows[i].flow_index = i;
detail::parsec_ttg_caller->dev_ptr->gpu_task->flow[i] = &detail::parsec_ttg_caller->dev_ptr->flows[i];
detail::parsec_ttg_caller->dev_ptr->gpu_task->flow_nb_elts[i] = 0;
}
// we cannot allow the calling thread to submit kernels so say we're not ready
return is_current;
}
Expand All @@ -198,7 +191,7 @@ namespace ttg_parsec {
int ret = device_module->memcpy_async(device_module, stream,
data->device_copies[0]->device_private,
data->device_copies[data->owner_device]->device_private,
data->nb_elts, parsec_device_gpu_transfer_direction_d2h);
data->span, parsec_device_gpu_transfer_direction_d2h);
assert(ret == PARSEC_SUCCESS);
}
if constexpr (sizeof...(Is) > 0) {
Expand Down
18 changes: 8 additions & 10 deletions ttg/ttg/parsec/task.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,15 @@ namespace ttg_parsec {
if (this->memory != nullptr) free_flows();
constexpr const auto align = std::align_val_t(std::max(alignof(parsec_flow_t), alignof(parsec_gpu_flow_info_t)));
this->memory = new(align) std::byte[size * (sizeof(parsec_flow_t) + sizeof(parsec_gpu_flow_info_s))];
if (this->flow_info != nullptr) {
parsec_flow_t *flows = (parsec_flow_t*)this->memory;
this->flow_info = (parsec_gpu_flow_info_t*)(this->memory + size * sizeof(parsec_flow_t));
for (std::size_t i = 0; i < size; ++i) {
this->flow_info[i].flow = &flows[i];
flows[i].flow_index = i;
flows[i].flow_flags = 0;
flows[i].flow_datatype_mask = ~0;
}
this->nb_flows = size;
parsec_flow_t *flows = (parsec_flow_t*)this->memory;
this->flow_info = (parsec_gpu_flow_info_t*)(this->memory + size * sizeof(parsec_flow_t));
for (std::size_t i = 0; i < size; ++i) {
this->flow_info[i].flow = &flows[i];
flows[i].flow_index = i;
flows[i].flow_flags = 0;
flows[i].flow_datatype_mask = ~0;
}
this->nb_flows = size;
}

void free_flows() {
Expand Down
6 changes: 3 additions & 3 deletions ttg/ttg/parsec/ttg.h
Original file line number Diff line number Diff line change
Expand Up @@ -1466,10 +1466,10 @@ namespace ttg_parsec {
ttg::device::detail::reset_current();

auto discard_tmp_flows = [&](){
for (int i = 0; i < MAX_PARAM_COUNT; ++i) {
if (gpu_task->flow[i]->flow_flags & TTG_PARSEC_FLOW_ACCESS_TMP) {
for (int i = 0; i < gpu_task->nb_flows; ++i) {
if (gpu_task->flow_info[i].flow->flow_flags & TTG_PARSEC_FLOW_ACCESS_TMP) {
/* temporary flow, discard by setting it to read-only to avoid evictions */
const_cast<parsec_flow_t*>(gpu_task->flow[i])->flow_flags = PARSEC_FLOW_ACCESS_READ;
const_cast<parsec_flow_t*>(gpu_task->flow_info[i].flow)->flow_flags = PARSEC_FLOW_ACCESS_READ;
task->parsec_task.data[i].data_out->readers = 1;
}
}
Expand Down

0 comments on commit a55f24a

Please sign in to comment.