Skip to content

Commit

Permalink
Merge pull request #18065 from jenshannoschwalm/opencl_50_fixes
Browse files Browse the repository at this point in the history
A few OpenCL fixes
  • Loading branch information
TurboGit authored Jan 21, 2025
2 parents 1a976ea + dd57645 commit b952d8c
Show file tree
Hide file tree
Showing 10 changed files with 82 additions and 84 deletions.
4 changes: 2 additions & 2 deletions src/common/nlmeans_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -709,7 +709,7 @@ int nlmeans_denoise_cl(
dt_opencl_finish_sync_pipe(devid, params->pipetype);

// indirectly give gpu some air to breathe (and to do display related stuff)
dt_iop_nap(dt_opencl_micro_nap(devid));
dt_opencl_micro_nap(devid);
}

error:
Expand Down Expand Up @@ -796,7 +796,7 @@ int nlmeans_denoiseprofile_cl(
dt_opencl_finish_sync_pipe(devid, params->pipetype);

// indirectly give gpu some air to breathe (and to do display related stuff)
dt_iop_nap(dt_opencl_micro_nap(devid));
dt_opencl_micro_nap(devid);
}

error:
Expand Down
130 changes: 65 additions & 65 deletions src/common/opencl.c
Original file line number Diff line number Diff line change
Expand Up @@ -311,10 +311,11 @@ gboolean dt_opencl_avoid_atomics(const int devid)
return (!_cldev_running(devid)) ? FALSE : cl->dev[devid].avoid_atomics;
}

int dt_opencl_micro_nap(const int devid)
void dt_opencl_micro_nap(const int devid)
{
dt_opencl_t *cl = darktable.opencl;
return (!_cldev_running(devid)) ? 0 : cl->dev[devid].micro_nap;
if(_cldev_running(devid))
dt_iop_nap(cl->dev[devid].micro_nap);
}

gboolean dt_opencl_use_pinned_memory(const int devid)
Expand Down Expand Up @@ -1483,22 +1484,20 @@ void dt_opencl_init(
_opencl_apply_scheduling_profile(profile);

// let's keep track on unified memory devices
size_t unified_sysmem = 0;
dt_sys_resources_t *res = &darktable.dtresources;
for(int i = 0; i < cl->num_devs; i++)
{
if(cl->dev[i].unified_memory)
{
const size_t reserved = MIN(cl->dev[i].max_global_mem,
darktable.dtresources.total_memory * cl->dev[i].unified_fraction);
const size_t reserved = MIN(cl->dev[i].max_global_mem, res->total_memory * cl->dev[i].unified_fraction);
cl->dev[i].max_global_mem = reserved;
cl->dev[i].max_mem_alloc = MIN(cl->dev[i].max_mem_alloc, reserved);
dt_print_nts(DT_DEBUG_OPENCL,
" UNIFIED MEM SIZE: %.0f MB reserved for '%s'",
(double)reserved / 1024.0 / 1024.0,
cl->dev[i].cname);
unified_sysmem = MAX(unified_sysmem, reserved);
" UNIFIED MEM SIZE: %.0f MB reserved for '%s' id=%d",
(double)reserved / 1024.0 / 1024.0, cl->dev[i].cname, i);
res->total_memory -= reserved;
}
}
darktable.dtresources.total_memory -= unified_sysmem;
}
else // initialization failed
{
Expand Down Expand Up @@ -1569,9 +1568,9 @@ void dt_opencl_cleanup(dt_opencl_t *cl)
if(cl->print_statistics && (darktable.unmuted & DT_DEBUG_MEMORY))
{
dt_print_nts(DT_DEBUG_OPENCL,
" [opencl_summary_statistics] device '%s' (%d):"
" peak memory usage %zu bytes (%.1f MB)%s\n",
cl->dev[i].fullname, i, cl->dev[i].peak_memory,
" [opencl_summary_statistics] device '%s' id=%d:"
" peak memory usage %.1f MB%s\n",
cl->dev[i].fullname, i,
(float)cl->dev[i].peak_memory/(1024*1024),
cl->dev[i].clmem_error
? ", clmem runtime problem"
Expand All @@ -1583,7 +1582,7 @@ void dt_opencl_cleanup(dt_opencl_t *cl)
if(cl->dev[i].totalevents)
{
dt_print_nts(DT_DEBUG_OPENCL,
" [opencl_summary_statistics] device '%s' (%d): %d"
" [opencl_summary_statistics] device '%s' id=%d: %d"
" out of %d events were "
"successful and %d events lost. max event=%d%s%s\n",
cl->dev[i].fullname, i, cl->dev[i].totalsuccess,
Expand All @@ -1599,7 +1598,7 @@ void dt_opencl_cleanup(dt_opencl_t *cl)
else
{
dt_print_nts(DT_DEBUG_OPENCL,
" [opencl_summary_statistics] device '%s' (%d): NOT utilized\n",
" [opencl_summary_statistics] device '%s' id=%d: NOT utilized\n",
cl->dev[i].fullname, i);
}
}
Expand Down Expand Up @@ -2562,8 +2561,8 @@ static gboolean _check_kernel(const int dev,
if(err != CL_SUCCESS)
{
dt_print(DT_DEBUG_OPENCL,
"[opencl_create_kernel] could not create kernel `%s'! (%s)",
cl->name_saved[kernel], cl_errstr(err));
"[opencl_create_kernel] could not create kernel `%s' for '%s' id=%d: (%s)",
cl->name_saved[kernel], cl->dev[dev].fullname, dev, cl_errstr(err));
cl->dev[dev].kernel_used[kernel] = 0;
cl->name_saved[kernel] = NULL; // don't try again
dt_pthread_mutex_unlock(&cl->lock);
Expand Down Expand Up @@ -2732,9 +2731,9 @@ int dt_opencl_enqueue_kernel_ndim_with_local(const int dev,

if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[dt_opencl_enqueue_kernel_%id%s] kernel `%s' (%i) on device %d: %s",
"[dt_opencl_enqueue_kernel_%id%s] kernel `%s' (%i) on device '%s' id=%d: %s",
dimensions, local ? "_with_local" : "",
darktable.opencl->name_saved[kernel], kernel, dev, cl_errstr(err));
cl->name_saved[kernel], kernel, cl->dev[dev].fullname, dev, cl_errstr(err));
_check_clmem_err(dev, err);
return err;
}
Expand All @@ -2758,9 +2757,10 @@ int dt_opencl_enqueue_kernel_2d_args_internal(const int dev,
va_end(ap);
if(err != CL_SUCCESS)
{
dt_opencl_t *cl = darktable.opencl;
dt_print(DT_DEBUG_OPENCL,
"[dt_opencl_enqueue_kernel_2d_args_internal] kernel `%s' (%i) on device %d: %s",
darktable.opencl->name_saved[kernel], kernel, dev, cl_errstr(err));
"[dt_opencl_enqueue_kernel_2d_args_internal] kernel `%s' (%i) on device '%s' id=%d: %s",
cl->name_saved[kernel], kernel, cl->dev[dev].fullname, dev, cl_errstr(err));
return err;
}
const size_t sizes[] = { ROUNDUPDWD(w, dev), ROUNDUPDHT(h, dev), 1 };
Expand All @@ -2779,9 +2779,10 @@ int dt_opencl_enqueue_kernel_1d_args_internal(const int dev,
va_end(ap);
if(err != CL_SUCCESS)
{
dt_opencl_t *cl = darktable.opencl;
dt_print(DT_DEBUG_OPENCL,
"[dt_opencl_enqueue_kernel_1d_args_internal] kernel `%s' (%i) on device %d: %s",
darktable.opencl->name_saved[kernel], kernel, dev, cl_errstr(err));
"[dt_opencl_enqueue_kernel_1d_args_internal] kernel `%s' (%i) on device '%s' id=%d: %s",
cl->name_saved[kernel], kernel, cl->dev[dev].fullname, dev, cl_errstr(err));
return err;
}
const size_t sizes[] = { ROUNDUPDWD(x, dev), 1, 1 };
Expand Down Expand Up @@ -2974,8 +2975,8 @@ int dt_opencl_enqueue_copy_image(const int devid,

if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[opencl copy_image] could not copy on device %d: %s",
devid, cl_errstr(err));
"[opencl copy_image] could not copy on device '%s' id=%d: %s",
darktable.opencl->dev[devid].fullname, devid, cl_errstr(err));
_check_clmem_err(devid, err);
return err;
}
Expand All @@ -2997,8 +2998,8 @@ int dt_opencl_enqueue_copy_image_to_buffer(const int devid,

if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[opencl copy_image_to_buffer] could not copy on device %d: %s",
devid, cl_errstr(err));
"[opencl copy_image_to_buffer] could not copy on device '%s' id=%d: %s",
darktable.opencl->dev[devid].fullname, devid, cl_errstr(err));
_check_clmem_err(devid, err);
return err;
}
Expand All @@ -3020,8 +3021,8 @@ int dt_opencl_enqueue_copy_buffer_to_image(const int devid,

if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[opencl copy_buffer_to_image] could not copy on device %d: %s",
devid, cl_errstr(err));
"[opencl copy_buffer_to_image] could not copy on device '%s' id=%d: %s",
darktable.opencl->dev[devid].fullname, devid, cl_errstr(err));
_check_clmem_err(devid, err);
return err;
}
Expand All @@ -3043,8 +3044,8 @@ int dt_opencl_enqueue_copy_buffer_to_buffer(const int devid,
dstoffset, size, 0, NULL, eventp);
if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[opencl copy_buffer_to_buffer] could not copy on device %d: %s",
devid, cl_errstr(err));
"[opencl copy_buffer_to_buffer] could not copy on device '%s' id=%d: %s",
darktable.opencl->dev[devid].fullname, devid, cl_errstr(err));
_check_clmem_err(devid, err);
return err;
}
Expand All @@ -3068,8 +3069,8 @@ int dt_opencl_read_buffer_from_device(const int devid,
offset, size, host, 0, NULL, eventp);
if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[opencl read_buffer_from_device] could not read from device %d: %s",
devid, cl_errstr(err));
"[opencl read_buffer_from_device] could not read from device '%s' id=%d: %s",
darktable.opencl->dev[devid].fullname, devid, cl_errstr(err));
return err;
}

Expand All @@ -3093,8 +3094,8 @@ int dt_opencl_write_buffer_to_device(const int devid,

if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[opencl write_buffer_to_device] could not write to device %d: %s",
devid, cl_errstr(err));
"[opencl write_buffer_to_device] could not write to device '%s' id=%d: %s",
darktable.opencl->dev[devid].fullname, devid, cl_errstr(err));
return err;
}

Expand All @@ -3114,8 +3115,8 @@ void *dt_opencl_copy_host_to_device_constant(const int devid,
if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[opencl copy_host_to_device_constant]"
" could not alloc buffer on device %d: %s",
devid, cl_errstr(err));
" could not alloc buffer on device '%s' id=%d: %s",
darktable.opencl->dev[devid].fullname, devid, cl_errstr(err));

dt_opencl_memory_statistics(devid, dev, OPENCL_MEMORY_ADD);

Expand Down Expand Up @@ -3163,8 +3164,8 @@ void *dt_opencl_copy_host_to_device_rowpitch(const int devid,
if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[opencl copy_host_to_device]"
" could not alloc/copy img buffer on device %d: %s",
devid, cl_errstr(err));
" could not alloc/copy img buffer on device '%s' id=%d: %s",
darktable.opencl->dev[devid].fullname, devid, cl_errstr(err));

_check_clmem_err(devid, err);
dt_opencl_memory_statistics(devid, dev, OPENCL_MEMORY_ADD);
Expand Down Expand Up @@ -3209,8 +3210,8 @@ void *dt_opencl_map_buffer(const int devid,

if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[opencl map buffer] could not map buffer on device %d: %s",
devid, cl_errstr(err));
"[opencl map buffer] could not map buffer on device '%s' id=%d: %s",
darktable.opencl->dev[devid].fullname, devid, cl_errstr(err));
_check_clmem_err(devid, err);
return ptr;
}
Expand All @@ -3228,8 +3229,8 @@ int dt_opencl_unmap_mem_object(const int devid,

if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[opencl unmap mem object] could not unmap mem object on device %d: %s",
devid, cl_errstr(err));
"[opencl unmap mem object] could not unmap mem object on device '%s' id=%d: %s",
darktable.opencl->dev[devid].fullname, devid, cl_errstr(err));
return err;
}

Expand Down Expand Up @@ -3269,8 +3270,8 @@ void *dt_opencl_alloc_device(const int devid,

if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[opencl alloc_device] could not alloc img buffer on device %d: %s",
devid, cl_errstr(err));
"[opencl alloc_device] could not alloc img buffer on device '%s' id=%d: %s",
cl->dev[devid].fullname, devid, cl_errstr(err));

_check_clmem_err(devid, err);
dt_opencl_memory_statistics(devid, dev, OPENCL_MEMORY_ADD);
Expand Down Expand Up @@ -3318,8 +3319,8 @@ void *dt_opencl_alloc_device_use_host_pointer(const int devid,
if(err != CL_SUCCESS || dev == NULL)
dt_print(DT_DEBUG_OPENCL,
"[opencl alloc_device_use_host_pointer]"
" could not allocate cl image on device %d: %s",
devid, cl_errstr(err));
" could not allocate cl image on device '%s' id=%d: %s",
cl->dev[devid].fullname, devid, cl_errstr(err));

_check_clmem_err(devid, err);
dt_opencl_memory_statistics(devid, dev, OPENCL_MEMORY_ADD);
Expand All @@ -3343,8 +3344,8 @@ void *dt_opencl_alloc_device_buffer(const int devid,
CL_MEM_READ_WRITE, size, NULL, &err);
if(err != CL_SUCCESS || buf == NULL)
dt_print(DT_DEBUG_OPENCL,
"[opencl alloc_device_buffer] could not allocate cl buffer on device %d: %s",
devid, cl_errstr(err));
"[opencl alloc_device_buffer] could not allocate cl buffer on device '%s' id=%d: %s",
cl->dev[devid].fullname, devid, cl_errstr(err));

_check_clmem_err(devid, err);
dt_opencl_memory_statistics(devid, buf, OPENCL_MEMORY_ADD);
Expand All @@ -3369,8 +3370,8 @@ void *dt_opencl_alloc_device_buffer_with_flags(const int devid,
flags, size, NULL, &err);
if(err != CL_SUCCESS || buf == NULL)
dt_print(DT_DEBUG_OPENCL,
"[opencl alloc_device_buffer_with_flags] could not allocate cl buffer on device %d: %s",
devid, cl_errstr(err));
"[opencl alloc_device_buffer_with_flags] could not allocate cl buffer on device '%s' id=%d: %s",
cl->dev[devid].fullname, devid, cl_errstr(err));

_check_clmem_err(devid, err);
dt_opencl_memory_statistics(devid, buf, OPENCL_MEMORY_ADD);
Expand Down Expand Up @@ -3504,28 +3505,27 @@ void dt_opencl_memory_statistics(int devid,
if(devid < 0)
return;

dt_opencl_t *cl = darktable.opencl;

if(action == OPENCL_MEMORY_ADD)
darktable.opencl->dev[devid].memory_in_use += dt_opencl_get_mem_object_size(mem);
cl->dev[devid].memory_in_use += dt_opencl_get_mem_object_size(mem);
else
darktable.opencl->dev[devid].memory_in_use -= dt_opencl_get_mem_object_size(mem);
cl->dev[devid].memory_in_use -= dt_opencl_get_mem_object_size(mem);

darktable.opencl->dev[devid].peak_memory =
MAX(darktable.opencl->dev[devid].peak_memory,
darktable.opencl->dev[devid].memory_in_use);
cl->dev[devid].peak_memory = MAX(cl->dev[devid].peak_memory, cl->dev[devid].memory_in_use);

if(darktable.unmuted & DT_DEBUG_MEMORY)
{
dt_print(DT_DEBUG_OPENCL,"[opencl memory] device %d: %zu bytes (%.1f MB) in use, %.1f MB available GPU memory, %.1f MB global GPU mem size",
devid,
darktable.opencl->dev[devid].memory_in_use,
(float)darktable.opencl->dev[devid].memory_in_use/(1024*1024),
(float)darktable.opencl->dev[devid].used_available/(1024*1024),
(float)darktable.opencl->dev[devid].max_global_mem/(1024*1024));
if(darktable.opencl->dev[devid].memory_in_use > darktable.opencl->dev[devid].used_available)
dt_print(DT_DEBUG_OPENCL,"[opencl memory] device '%s' id=%d: %.1fMB in use, %.1fMB available GPU mem of %.1fMB",
cl->dev[devid].fullname, devid,
(float)cl->dev[devid].memory_in_use/(1024*1024),
(float)cl->dev[devid].used_available/(1024*1024),
(float)cl->dev[devid].max_global_mem/(1024*1024));
if(cl->dev[devid].memory_in_use > darktable.opencl->dev[devid].used_available)
{
dt_print(DT_DEBUG_OPENCL,
"[opencl memory] Warning, device %d used more GPU memory than available",
devid);
"[opencl memory] Warning, device '%s' id=%d used more GPU memory than available",
cl->dev[devid].fullname, devid);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/common/opencl.h
Original file line number Diff line number Diff line change
Expand Up @@ -635,7 +635,7 @@ int dt_opencl_local_buffer_opt(const int devid,
void dt_opencl_write_device_config(const int devid);
gboolean dt_opencl_read_device_config(const int devid);
gboolean dt_opencl_avoid_atomics(const int devid);
int dt_opencl_micro_nap(const int devid);
void dt_opencl_micro_nap(const int devid);
gboolean dt_opencl_use_pinned_memory(const int devid);

G_END_DECLS
Expand Down
4 changes: 2 additions & 2 deletions src/develop/pixelpipe_hb.c
Original file line number Diff line number Diff line change
Expand Up @@ -1798,7 +1798,7 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
}

// indirectly give gpu some air to breathe (and to do display related stuff)
dt_iop_nap(dt_opencl_micro_nap(pipe->devid));
dt_opencl_micro_nap(pipe->devid);

// transform to input colorspace
if(success_opencl)
Expand Down Expand Up @@ -2092,7 +2092,7 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
return TRUE;

// indirectly give gpu some air to breathe (and to do display related stuff)
dt_iop_nap(dt_opencl_micro_nap(pipe->devid));
dt_opencl_micro_nap(pipe->devid);

// transform to module input colorspace
if(success_opencl)
Expand Down
Loading

0 comments on commit b952d8c

Please sign in to comment.