Skip to content

Commit

Permalink
Merge pull request ICLDisco#644 from abouteiller/bugfix/no-gpu-found
Browse files Browse the repository at this point in the history
Consolidated error handling when GPU only tests execute on CPU systems
  • Loading branch information
abouteiller authored May 24, 2024
2 parents 1fdfded + 5ff246a commit a5f49ab
Show file tree
Hide file tree
Showing 10 changed files with 42 additions and 41 deletions.
2 changes: 2 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ function(parsec_addtest_cmd target)
# restrict memory use for oversubscribed runners
set_tests_properties(${target} PROPERTIES ENVIRONMENT
"PARSEC_MCA_device_cuda_enabled=0;PARSEC_MCA_device_hip_enabled=0;PARSEC_MCA_device_level_zero_enabled=0;PARSEC_MCA_device_cuda_memory_use=10;PARSEC_MCA_device_hip_memory_use=10;PARSEC_MCA_device_level_zero_memory_use=10")
# skip tests that fail because the device is not available */
set_tests_properties(${target} PROPERTIES SKIP_RETURN_CODE 10) # 10 is -PARSEC_ERR_DEVICE, positive 7bit return codes are more portable
endfunction(parsec_addtest_cmd)

check_function_exists(erand48 PARSEC_HAVE_ERAND48)
Expand Down
3 changes: 2 additions & 1 deletion tests/dsl/dtd/dtd_test_task_insertion.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,12 @@ int main(int argc, char ** argv)
#endif

int m, n;
int no_of_tasks = 500000;
int no_of_tasks = 50000;
int amount_of_work[3] = {100, 1000, 10000};
parsec_taskpool_t *dtd_tp;

parsec = parsec_init( cores, &argc, &argv );
cores = parsec_context_query(parsec, PARSEC_CONTEXT_QUERY_CORES);

dtd_tp = parsec_dtd_taskpool_new();

Expand Down
10 changes: 9 additions & 1 deletion tests/runtime/cuda/nvlink_main.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2019-2020 The University of Tennessee and The University
* Copyright (c) 2019-2024 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
*/
Expand Down Expand Up @@ -34,6 +34,14 @@ int main(int argc, char *argv[])

parsec = parsec_init(-1, &argc, &argv);

/* can the test run? */
int nb_gpus = parsec_context_query(parsec, PARSEC_CONTEXT_QUERY_DEVICES, PARSEC_DEV_CUDA);
assert(nb_gpus >= 0);
if(nb_gpus == 0) {
parsec_warning("This test can only run if at least one GPU device is present");
exit(-PARSEC_ERR_DEVICE);
}

tp = testing_nvlink_New(parsec, 10, 512);
if( NULL != tp ) {
parsec_context_add_taskpool(parsec, tp);
Expand Down
25 changes: 6 additions & 19 deletions tests/runtime/cuda/nvlink_wrapper.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@

/**
* Copyright (c) 2019-2021 The University of Tennessee and The University
* Copyright (c) 2019-2024 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
Expand Down Expand Up @@ -103,20 +102,8 @@ parsec_taskpool_t* testing_nvlink_New( parsec_context_t *ctx, int depth, int mb
parsec_matrix_block_cyclic_t *userM;

/** Find all CUDA devices */
nb = 0;
for(dev = 0; dev < (int)parsec_nb_devices; dev++) {
parsec_device_module_t *device = parsec_mca_device_get(dev);
if( PARSEC_DEV_CUDA == device->type ) {
nb++;
}
}
if(nb == 0) {
char hostname[256];
gethostname(hostname, 256);
fprintf(stderr, "This test requires at least one CUDA device per node -- no CUDA device found on rank %d on %s\n",
ctx->my_rank, hostname);
return NULL;
}
nb = parsec_context_query(ctx, PARSEC_CONTEXT_QUERY_DEVICES, PARSEC_DEV_CUDA);
assert(nb >= 0);
dev_index = (int*)malloc(nb * sizeof(int));
nb = 0;
for(dev = 0; dev < (int)parsec_nb_devices; dev++) {
Expand Down Expand Up @@ -156,7 +143,7 @@ parsec_taskpool_t* testing_nvlink_New( parsec_context_t *ctx, int depth, int mb

/* GEMM1 tasks will create one data copy per GPU, and work on those.
* see nvlink.jdf:MAKE_C tasks */

/* userM is a user-managed matrix: the user creates the data copies
* only on the GPU they want the GEMM2 to run. To simplify the code,
* we use parsec_matrix_block_cyclic that requires to also have a CPU data
Expand Down Expand Up @@ -208,14 +195,14 @@ parsec_taskpool_t* testing_nvlink_New( parsec_context_t *ctx, int depth, int mb
g++;
}
}

testing_handle = parsec_nvlink_new(dcA, userM, ctx->nb_nodes, CuHI, nb, dev_index);

parsec_add2arena( &testing_handle->arenas_datatypes[PARSEC_nvlink_DEFAULT_ADT_IDX],
parsec_datatype_double_complex_t,
PARSEC_MATRIX_FULL, 1, mb, mb, mb,
PARSEC_ARENA_ALIGNMENT_SSE, -1 );

return &testing_handle->super;
}

6 changes: 3 additions & 3 deletions tests/runtime/cuda/stage_custom.jdf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
extern "C" %{
/*
* Copyright (c) 2019-2023 The University of Tennessee and The University
* Copyright (c) 2019-2024 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
Expand Down Expand Up @@ -162,7 +162,7 @@ BODY [type=CUDA
lbeta, (double*)A, ldam );
status = cublasGetError();
PARSEC_CUDA_CHECK_ERROR( "cublasDgemm", status,
{return -1;} );
{return PARSEC_HOOK_RETURN_ERROR;} );
}
END

Expand Down Expand Up @@ -203,7 +203,7 @@ BODY [type=CUDA
lbeta, (double*)B, ldbm );
status = cublasGetError();
PARSEC_CUDA_CHECK_ERROR( "cublasDgemm", status,
{return -1;} );
{return PARSEC_HOOK_RETURN_ERROR;} );

}
END
Expand Down
17 changes: 12 additions & 5 deletions tests/runtime/cuda/stage_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ int main(int argc, char *argv[])
{
parsec_context_t *parsec = NULL;
parsec_taskpool_t *tp;
int i;
int size = 1;
int rank = 0;
int M;
Expand All @@ -41,7 +40,15 @@ int main(int argc, char *argv[])
exit(-1);
}

/* can the test run? */
assert(size == 1);
int nb_gpus = parsec_context_query(parsec, PARSEC_CONTEXT_QUERY_DEVICES, PARSEC_DEV_CUDA);
assert(nb_gpus >= 0);
if(nb_gpus == 0) {
parsec_warning("This test can only run if at least one GPU device is present");
printf("TEST SKIPPED\n");
exit(-PARSEC_ERR_DEVICE);
}

/* Test: comparing results when:
- tile matrix transfered to GPU with default stage_in/stage_out
Expand Down Expand Up @@ -87,9 +94,9 @@ int main(int argc, char *argv[])
parsec_taskpool_free(tp);
}

if(ret!= 0){
printf("TEST FAILED\n");
}else{
if( ret != 0) {
printf("TEST FAILED (%d errors)\n", ret);
} else {
printf("TEST PASSED\n");
}

Expand All @@ -98,5 +105,5 @@ int main(int argc, char *argv[])
MPI_Finalize();
#endif /* DISTRIBUTED */

return ret;
return (0 == ret)? EXIT_SUCCESS: EXIT_FAILURE;
}
2 changes: 1 addition & 1 deletion tests/runtime/cuda/stress.jdf
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ r = 0 .. NP-1
// Parameters
READ A <- (g == 0) ? A READ_A(m, r) : A GEMM(m, g-1, r)
-> ((g + 1) < NGPUs) ? A GEMM(m, g+1, r)
READ B <- A READ_A( (m+g) % descA->super.mt, r)
READ B <- A READ_A(m, r)
RW C <- (m == 0) ? C MAKE_C(g, r) : C GEMM(m-1, g, r)
-> ((m + 1) < (descA->super.mt)) ? C GEMM(m+1, g, r)
: C DISCARD_C(g, r)
Expand Down
2 changes: 1 addition & 1 deletion tests/runtime/cuda/stress_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ int main(int argc, char *argv[])

parsec = parsec_init(-1, &argc, &argv);

tp = testing_stress_New(parsec, 4000, 1024);
tp = testing_stress_New(parsec, 80, 1024);
if( NULL != tp ) {
parsec_context_add_taskpool(parsec, tp);
parsec_context_start(parsec);
Expand Down
12 changes: 4 additions & 8 deletions tests/runtime/cuda/stress_wrapper.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ static void __parsec_stress_destructor( parsec_taskpool_t *tp )
dcA = stress_taskpool->_g_descA;
parsec_tiled_matrix_destroy( (parsec_tiled_matrix_t*)stress_taskpool->_g_descA );
free(dcA);
free(stress_taskpool->_g_cuda_device_index);
}

PARSEC_OBJ_CLASS_INSTANCE(parsec_stress_taskpool_t, parsec_taskpool_t,
Expand All @@ -27,19 +28,14 @@ parsec_taskpool_t* testing_stress_New( parsec_context_t *ctx, int depth, int mb
parsec_matrix_block_cyclic_t *dcA;

/** Find all CUDA devices */
nb = 0;
for(dev = 0; dev < (int)parsec_nb_devices; dev++) {
parsec_device_module_t *device = parsec_mca_device_get(dev);
if( PARSEC_DEV_CUDA == device->type ) {
nb++;
}
}
nb = parsec_context_query(ctx, PARSEC_CONTEXT_QUERY_DEVICES, PARSEC_DEV_CUDA);
assert(nb >= 0);
if(nb == 0) {
/* We just simulate a run on CPUs, with an arbitrary number of pseudo-GPUs */
nb = 8;
dev_index = (int*)malloc(nb * sizeof(int));
memset(dev_index, -1, nb*sizeof(int));
fprintf(stderr, "Simulating %d GPUs for sanity checking in stress test\n", nb);
parsec_warning("Simulating %d GPUs for sanity checking in stress test\n", nb);
} else {
dev_index = (int*)malloc(nb * sizeof(int));
nb = 0;
Expand Down
4 changes: 2 additions & 2 deletions tests/runtime/cuda/testing_get_best_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ int main(int argc, char *argv[])

/* Check result */
if( 0 == rank && info != 0 ) {
fprintf(stderr, "Result is Wrong !!!\n");
fprintf(stderr, "Result is Wrong (info %d) !!!\n", info);
}

parsec_data_free(dcA.mat);
Expand All @@ -172,5 +172,5 @@ int main(int argc, char *argv[])
MPI_Finalize();
#endif

return info;
return (0 == info)? EXIT_SUCCESS: EXIT_FAILURE;
}

0 comments on commit a5f49ab

Please sign in to comment.