diff --git a/ttg/ttg/parsec/devicefunc.h b/ttg/ttg/parsec/devicefunc.h
index a44572018..f1ef7c1e6 100644
--- a/ttg/ttg/parsec/devicefunc.h
+++ b/ttg/ttg/parsec/devicefunc.h
@@ -114,8 +114,8 @@ namespace ttg_parsec {
     uint8_t i; // only limited number of flows
     detail::parsec_ttg_task_base_t *caller = detail::parsec_ttg_caller;
     assert(nullptr != caller->dev_ptr);
+    caller->dev_ptr->gpu_task->allocate_flows(span.size());
     parsec_gpu_task_t *gpu_task = caller->dev_ptr->gpu_task;
-    parsec_flow_t *flows = caller->dev_ptr->flows;
 
     bool is_current = false;
     for (i = 0; i < span.size(); ++i) {
@@ -140,14 +140,15 @@ namespace ttg_parsec {
 
         /* build the flow */
         /* TODO: reuse the flows of the task class? How can we control the sync direction then? */
-        flows[i] = parsec_flow_t{.name = nullptr,
+        *((parsec_flow_t*)gpu_task->flow_info[i].flow) =
+                  parsec_flow_t{.name = nullptr,
                                 .sym_type = PARSEC_SYM_INOUT,
                                 .flow_flags = static_cast<uint8_t>(access),
                                 .flow_index = i,
                                 .flow_datatype_mask = ~0 };
 
-        gpu_task->flow_nb_elts[i] = data->nb_elts; // size in bytes
-        gpu_task->flow[i] = &flows[i];
+        gpu_task->flow_info[i].flow_span = data->span; // size in bytes
+        gpu_task->flow_info[i].flow_dc = nullptr;
 
         /* set the input data copy, parsec will take care of the transfer
         * and the buffer will look at the parsec_data_t for the current pointer */
@@ -158,25 +159,17 @@ namespace ttg_parsec {
 
       } else {
         /* ignore the flow */
-        flows[i] = parsec_flow_t{.name = nullptr,
+        *((parsec_flow_t*)gpu_task->flow_info[i].flow) =
+                   parsec_flow_t{.name = nullptr,
                                  .sym_type = PARSEC_FLOW_ACCESS_NONE,
                                  .flow_flags = 0,
                                  .flow_index = i,
                                  .flow_datatype_mask = ~0 };
-        gpu_task->flow[i] = &flows[i];
-        gpu_task->flow_nb_elts[i] = 0; // size in bytes
+        gpu_task->flow_info[i].flow_span = 0; // size in bytes
         caller->parsec_task.data[i].data_in = nullptr;
       }
     }
 
-    /* reset all remaining entries in the current task */
-    for (; i < MAX_PARAM_COUNT; ++i) {
-      detail::parsec_ttg_caller->parsec_task.data[i].data_in = nullptr;
-      detail::parsec_ttg_caller->dev_ptr->flows[i].flow_flags = PARSEC_FLOW_ACCESS_NONE;
-      detail::parsec_ttg_caller->dev_ptr->flows[i].flow_index = i;
-      detail::parsec_ttg_caller->dev_ptr->gpu_task->flow[i] = &detail::parsec_ttg_caller->dev_ptr->flows[i];
-      detail::parsec_ttg_caller->dev_ptr->gpu_task->flow_nb_elts[i] = 0;
-    }
     // we cannot allow the calling thread to submit kernels so say we're not ready
     return is_current;
   }
@@ -198,7 +191,7 @@ namespace ttg_parsec {
         int ret = device_module->memcpy_async(device_module, stream,
                                               data->device_copies[0]->device_private,
                                               data->device_copies[data->owner_device]->device_private,
-                                              data->nb_elts, parsec_device_gpu_transfer_direction_d2h);
+                                              data->span, parsec_device_gpu_transfer_direction_d2h);
         assert(ret == PARSEC_SUCCESS);
       }
       if constexpr (sizeof...(Is) > 0) {
diff --git a/ttg/ttg/parsec/task.h b/ttg/ttg/parsec/task.h
index 5df3aca0f..2080aaf29 100644
--- a/ttg/ttg/parsec/task.h
+++ b/ttg/ttg/parsec/task.h
@@ -16,17 +16,15 @@ namespace ttg_parsec {
         if (this->memory != nullptr) free_flows();
         constexpr const auto align = std::align_val_t(std::max(alignof(parsec_flow_t), alignof(parsec_gpu_flow_info_t)));
         this->memory = new(align) std::byte[size * (sizeof(parsec_flow_t) + sizeof(parsec_gpu_flow_info_s))];
-        if (this->flow_info != nullptr) {
-          parsec_flow_t *flows = (parsec_flow_t*)this->memory;
-          this->flow_info = (parsec_gpu_flow_info_t*)(this->memory + size * sizeof(parsec_flow_t));
-          for (std::size_t i = 0; i < size; ++i) {
-            this->flow_info[i].flow = &flows[i];
-            flows[i].flow_index = i;
-            flows[i].flow_flags = 0;
-            flows[i].flow_datatype_mask = ~0;
-          }
-          this->nb_flows  = size;
+        parsec_flow_t *flows = (parsec_flow_t*)this->memory;
+        this->flow_info = (parsec_gpu_flow_info_t*)(this->memory + size * sizeof(parsec_flow_t));
+        for (std::size_t i = 0; i < size; ++i) {
+          this->flow_info[i].flow = &flows[i];
+          flows[i].flow_index = i;
+          flows[i].flow_flags = 0;
+          flows[i].flow_datatype_mask = ~0;
         }
+        this->nb_flows  = size;
       }
 
       void free_flows() {
diff --git a/ttg/ttg/parsec/ttg.h b/ttg/ttg/parsec/ttg.h
index 1c439ce4c..597d29800 100644
--- a/ttg/ttg/parsec/ttg.h
+++ b/ttg/ttg/parsec/ttg.h
@@ -1466,10 +1466,10 @@ namespace ttg_parsec {
       ttg::device::detail::reset_current();
 
       auto discard_tmp_flows = [&](){
-        for (int i = 0; i < MAX_PARAM_COUNT; ++i) {
-          if (gpu_task->flow[i]->flow_flags & TTG_PARSEC_FLOW_ACCESS_TMP) {
+        for (int i = 0; i < gpu_task->nb_flows; ++i) {
+          if (gpu_task->flow_info[i].flow->flow_flags & TTG_PARSEC_FLOW_ACCESS_TMP) {
             /* temporary flow, discard by setting it to read-only to avoid evictions */
-            const_cast<parsec_flow_t*>(gpu_task->flow[i])->flow_flags = PARSEC_FLOW_ACCESS_READ;
+            const_cast<parsec_flow_t*>(gpu_task->flow_info[i].flow)->flow_flags = PARSEC_FLOW_ACCESS_READ;
             task->parsec_task.data[i].data_out->readers = 1;
           }
         }