Skip to content

Commit

Permalink
Mostly improvement to the debuging output.
Browse files Browse the repository at this point in the history
Name the data_t allocated for temporaries allowing developers to track
them through the execution. Add the keys to all outputs (tasks and
copies).

Signed-off-by: George Bosilca <[email protected]>
  • Loading branch information
bosilca committed Aug 8, 2024
1 parent 7c2c1a3 commit 9042b6a
Show file tree
Hide file tree
Showing 8 changed files with 46 additions and 25 deletions.
9 changes: 9 additions & 0 deletions parsec/arena.c
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,10 @@ int parsec_arena_allocate_device_private(parsec_data_copy_t *copy,
#include "parsec/utils/zone_malloc.h"
#include "mca/device/device_gpu.h"

#if defined(PARSEC_DEBUG)
static int64_t parsec_countable_incoming_message = 0xF000000000000000;
#endif /* defined(PARSEC_DEBUG) */

static inline parsec_data_copy_t *
parsec_arena_internal_copy_new(parsec_arena_t *arena,
parsec_data_t *data,
Expand All @@ -251,6 +255,11 @@ parsec_arena_internal_copy_new(parsec_arena_t *arena,
if( NULL == ldata ) {
return NULL;
}
#if defined(PARSEC_DEBUG)
/* Name the data with a default key to facilitate debuging */
ldata->key = (uint64_t)parsec_atomic_fetch_inc_int64(&parsec_countable_incoming_message);
ldata->key |= ((uint64_t)device) << 56;
#endif /* defined(PARSEC_DEBUG) */
}
if( 0 == device ) {
copy = parsec_data_copy_new(ldata, device, dtt,
Expand Down
2 changes: 2 additions & 0 deletions parsec/class/info.c
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,8 @@ void *parsec_info_get(parsec_info_object_array_t *oa, parsec_info_id_t iid)
if(NULL == ie->constructor)
return ret;
nio = ie->constructor(oa->cons_obj, ie->cons_data);
if( NULL == nio )
return ret;
ret = parsec_info_test_and_set(oa, iid, nio, NULL);
if(ret != nio && NULL != ie->destructor) {
ie->destructor(nio, ie->des_data);
Expand Down
5 changes: 3 additions & 2 deletions parsec/data.c
Original file line number Diff line number Diff line change
Expand Up @@ -466,10 +466,11 @@ void parsec_data_copy_dump(parsec_data_copy_t* copy)
if( PARSEC_DATA_COHERENCY_SHARED == copy->coherency_state ) coherency = "shared";

parsec_debug_verbose(0, 0, "%s [%d]: copy %p [ref %d] coherency %s readers %d version %u transit %s flags %s\n"
" older %p orig %p arena %p dev_priv %p\n",
" older %p orig %p [%llx] arena %p dev_priv %p\n",
((NULL != copy->original) && (copy->original->owner_device == copy->device_index)) ? "*" : " ",
(int)copy->device_index, copy, copy->super.super.obj_reference_count, coherency, copy->readers, copy->version, tranfer, flags,
(void *)copy->older, (void *)copy->original, (void *)copy->arena_chunk, copy->device_private);
(void *)copy->older, (void *)copy->original,
(NULL != copy->original) ? (unsigned long)copy->original->key : (unsigned long)-1, (void *)copy->arena_chunk, copy->device_private);
}

void parsec_data_dump(parsec_data_t* data)
Expand Down
26 changes: 14 additions & 12 deletions parsec/mca/device/device_gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ void parsec_device_dump_exec_stream(parsec_gpu_exec_stream_t* exec_stream)
int i;

parsec_debug_verbose(0, parsec_gpu_output_stream,
"Dev: GPU stream %d{%p} [events = %d, start = %d, end = %d, executed = %d]",
"Dev: GPU stream %s{%p} [events = %d, start = %d, end = %d, executed = %d]",
exec_stream->name, exec_stream, exec_stream->max_events, exec_stream->start, exec_stream->end,
exec_stream->executed);
for( i = 0; i < exec_stream->max_events; i++ ) {
Expand All @@ -321,12 +321,12 @@ void parsec_device_dump_gpu_state(parsec_device_gpu_module_t* gpu_device)
data_in_dev += gpu_device->super.data_in_from_device[i];
}

parsec_output(parsec_gpu_output_stream, "\n\n");
parsec_output(parsec_gpu_output_stream, "Device %d:%d (%p) epoch\n", gpu_device->super.device_index,
gpu_device->super.device_index, gpu_device, gpu_device->data_avail_epoch);
parsec_output(parsec_gpu_output_stream, "\tpeer mask %x executed tasks with %llu streams %d\n",
gpu_device->peer_access_mask, (unsigned long long)gpu_device->super.executed_tasks, gpu_device->num_exec_streams);
parsec_output(parsec_gpu_output_stream, "\tstats transferred [in: %llu from host %llu from other device out: %llu] required [in: %llu out: %llu]\n",
parsec_output(parsec_gpu_output_stream,
"\n\nDevice %s:%d (%p) epoch %zu\n"
"\tpeer mask %x executed tasks %llu streams %d\n"
"\tstats transferred [in: %llu from host %llu from other device out: %llu] required [in: %llu out: %llu]\n",
gpu_device->super.name, gpu_device->super.device_index, gpu_device, gpu_device->data_avail_epoch,
gpu_device->peer_access_mask, (unsigned long long)gpu_device->super.executed_tasks, gpu_device->num_exec_streams,
(unsigned long long)data_in_host, (unsigned long long)data_in_dev,
(unsigned long long)gpu_device->super.data_out_to_host,
(unsigned long long)gpu_device->super.required_data_in, (unsigned long long)gpu_device->super.required_data_out);
Expand Down Expand Up @@ -1029,7 +1029,7 @@ parsec_device_data_reserve_space( parsec_device_gpu_module_t* gpu_device,
for( j = 0; j <= i; j++ ) {
/* This flow could be a control flow */
if( NULL == temp_loc[j] ) continue;
this_task->data[j].data_out = gpu_elem; /* reset the data out */
this_task->data[j].data_out = NULL; /* reset the data out */
/* This flow could be non-parsec-owned, in which case we can't reclaim it */
if( 0 == (temp_loc[j]->flags & PARSEC_DATA_FLAG_PARSEC_OWNED) ) continue;
PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream,
Expand All @@ -1043,6 +1043,9 @@ parsec_device_data_reserve_space( parsec_device_gpu_module_t* gpu_device,
PARSEC_DATA_COPY_RELEASE(gpu_elem);
#endif
parsec_atomic_unlock(&master->lock);
if( data_avail_epoch ) { /* update the memory epoch */
gpu_device->data_avail_epoch++;
}
return PARSEC_HOOK_RETURN_AGAIN;
}

Expand Down Expand Up @@ -1380,7 +1383,7 @@ parsec_device_data_stage_in( parsec_device_gpu_module_t* gpu_device,
"GPU[%d:%s]: Prefetch task %p is staging in",
gpu_device->super.device_index, gpu_device->super.name, gpu_task);
}
if( NULL == gpu_elem ) {
if( gpu_elem == candidate ) { /* data already located in the right place */
if( candidate->device_index == gpu_device->super.device_index ) {
/* the candidate is already located on the GPU, no transfer should be necessary but let's do the bookkeeping */
if( (PARSEC_FLOW_ACCESS_WRITE & type) && (gpu_task->task_type != PARSEC_GPU_TASK_TYPE_PREFETCH) ) {
Expand Down Expand Up @@ -2115,7 +2118,7 @@ parsec_device_kernel_push( parsec_device_gpu_module_t *gpu_device,
gpu_task->last_data_check_epoch = gpu_device->data_avail_epoch;
return ret;
}

gpu_task->last_status = 0; /* mark the task as clean */
for( i = 0; i < this_task->task_class->nb_flows; i++ ) {

flow = gpu_task->flow[i];
Expand Down Expand Up @@ -2143,11 +2146,10 @@ parsec_device_kernel_push( parsec_device_gpu_module_t *gpu_device,
return ret;
}
}

PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream,
"GPU[%d:%s]: Push task %s DONE",
gpu_device->super.device_index, gpu_device->super.name,
parsec_task_snprintf(tmp, MAX_TASK_STRLEN, this_task) );
parsec_task_snprintf(tmp, MAX_TASK_STRLEN, this_task));
gpu_task->complete_stage = parsec_device_callback_complete_push;
#if defined(PARSEC_PROF_TRACE)
gpu_task->prof_key_end = -1; /* We do not log that event as the completion of this task */
Expand Down
14 changes: 12 additions & 2 deletions parsec/parsec.c
Original file line number Diff line number Diff line change
Expand Up @@ -1942,10 +1942,20 @@ parsec_task_snprintf( char* str, size_t size,
task->locals[i].value );
if( index >= size ) return str;
}
index += snprintf(str + index, size - index, "]<%d>", task->priority );
index += snprintf(str + index, size - index, "]<%d> keys = {", task->priority );
if( index >= size ) return str;
for( i = 0; i < tc->nb_flows; i++ ) {
char *prefix = (i == 0) ? "" : ", ";
if ((NULL == task->data[i].data_in) || (NULL == task->data[i].data_in->original))
index += snprintf(str + index, size - index, "%s*", prefix);
else
index += snprintf(str + index, size - index, "%s%lx", prefix, task->data[i].data_in->original->key);
if( index >= size ) return str;
}
index += snprintf(str + index, size - index, "}" );
if( index >= size ) return str;
if( NULL != task->taskpool ) {
index += snprintf(str + index, size - index, "{%u}", task->taskpool->taskpool_id );
index += snprintf(str + index, size - index, " {tp: %u}", task->taskpool->taskpool_id );
if( index >= size ) return str;
}
return str;
Expand Down
1 change: 1 addition & 0 deletions parsec/remote_dep_mpi.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ remote_dep_cmd_to_string(remote_dep_wire_activate_t* origin,
if( NULL == task.task_class ) return snprintf(str, len, "UNKNOWN_of_TASKCLASS_%d", origin->task_class_id), str;
memcpy(&task.locals, origin->locals, sizeof(parsec_assignment_t) * task.task_class->nb_locals);
task.priority = 0xFFFFFFFF;
for(int i = 0; i < task.task_class->nb_flows; task.data[i++].data_in = NULL);
return parsec_task_snprintf(str, len, &task);
}

Expand Down
2 changes: 1 addition & 1 deletion parsec/utils/debug.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ extern void (*parsec_weaksym_exit)(int status);
#else /* defined(PARSEC_DEBUG_NOISIER) */

#define PARSEC_DEBUG_VERBOSE(...) do{} while(0)
#endif /* defined(PARSEC_DEBUG_VERBOSE) */
#endif /* defined(PARSEC_DEBUG_NOISIER) */

/** $brief To check if any parsec function returned error.
*/
Expand Down
12 changes: 4 additions & 8 deletions parsec/utils/zone_malloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,10 @@ void *zone_malloc(zone_malloc_t *gdata, size_t size)
current_segment = SEGMENT_AT_TID(gdata, current_tid);
if( NULL == current_segment ) {
/* Maybe there is a free slot in the beginning. Let's cycle at least once before we bail out */
if( cycled_through == 0 ) {
current_tid = 0;
cycled_through = 1;
current_segment = SEGMENT_AT_TID(gdata, current_tid);
} else {
parsec_atomic_unlock(&gdata->lock);
return NULL;
}
if( 0 != cycled_through ) break;
current_tid = 0;
cycled_through = 1;
current_segment = SEGMENT_AT_TID(gdata, current_tid);
}

if( current_segment->status == SEGMENT_EMPTY && current_segment->nb_units >= nb_units ) {
Expand Down

0 comments on commit 9042b6a

Please sign in to comment.