Skip to content

Commit

Permalink
pin just part of buffer allocd for compressed data
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinPulec committed Jan 15, 2025
1 parent c0dc84e commit 7403c40
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 7 deletions.
16 changes: 11 additions & 5 deletions src/gpujpeg_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -916,11 +916,15 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para

// (Re)allocate huffman coder data in host memory
if (coder->data_compressed != NULL) {
cudaFreeHost(coder->data_compressed);
cudaHostUnregister(coder->data_compressed);
free(coder->data_compressed);
coder->data_compressed = NULL;
}
cudaMallocHost((void**)&coder->data_compressed, max_compressed_data_size * sizeof(uint8_t));
gpujpeg_cuda_check_error("Coder data compressed host allocation", return 0);
coder->data_compressed = malloc(max_compressed_data_size);
coder->data_compressed_pinned_sz = max_compressed_data_size / (GPUJPEG_MAX_BLOCK_COMPRESSED_SIZE
/ GPUJPEG_BLOCK_SQUARED_SIZE); // divide by 8 to get the WxHxCH bytes
cudaHostRegister(coder->data_compressed, coder->data_compressed_pinned_sz, cudaHostRegisterDefault);
gpujpeg_cuda_check_error("Coder data compressed host registration", return 0);

// (Re)allocate huffman coder data in device memory
if (coder->d_data_compressed != NULL) {
Expand Down Expand Up @@ -1088,8 +1092,10 @@ gpujpeg_coder_deinit(struct gpujpeg_coder* coder)
cudaFreeHost(coder->data_quantized);
if ( coder->d_data_quantized != NULL )
cudaFree(coder->d_data_quantized);
if ( coder->data_compressed != NULL )
cudaFreeHost(coder->data_compressed);
if ( coder->data_compressed != NULL ) {
cudaHostUnregister(coder->data_compressed);
free(coder->data_compressed);
}
if ( coder->d_data_compressed != NULL )
cudaFree(coder->d_data_compressed);
if ( coder->segment != NULL )
Expand Down
1 change: 1 addition & 0 deletions src/gpujpeg_common_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,7 @@ struct gpujpeg_coder

/// Huffman coder data in host memory (output/input for encoder/decoder)
uint8_t* data_compressed;
size_t data_compressed_pinned_sz; ///< amount of pinned memory from data_compressed
/// Huffman coder data in device memory (output/input for encoder/decoder)
uint8_t* d_data_compressed;
/// Huffman coder temporary data (in device memory only)
Expand Down
10 changes: 8 additions & 2 deletions src/gpujpeg_encoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -537,8 +537,14 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, const struct gpujpeg_par
GPUJPEG_CUSTOM_TIMER_START(coder->duration_memory_from, coder->param.perf_stats, encoder->stream, return -1);

// Copy compressed data from device memory to cpu memory
if ( cudaSuccess != cudaMemcpyAsync(coder->data_compressed, coder->d_data_compressed, output_size, cudaMemcpyDeviceToHost, encoder->stream) ) {
return -1;
cudaMemcpyAsync(coder->data_compressed, coder->d_data_compressed,
MIN(output_size, coder->data_compressed_pinned_sz), cudaMemcpyDeviceToHost, encoder->stream);
gpujpeg_cuda_check_error("Encoder copy compressed data to pinned memory", return -1);
if ( output_size > coder->data_compressed_pinned_sz ) {
cudaMemcpyAsync(coder->data_compressed + coder->data_compressed_pinned_sz,
coder->d_data_compressed + coder->data_compressed_pinned_sz,
output_size - coder->data_compressed_pinned_sz, cudaMemcpyDeviceToHost, encoder->stream);
gpujpeg_cuda_check_error("Encoder copy compressed data to pageable emory", return -1);
}
// Copy segments from device memory
if ( cudaSuccess != cudaMemcpyAsync(coder->segment, coder->d_segment, coder->segment_count * sizeof(struct gpujpeg_segment), cudaMemcpyDeviceToHost, encoder->stream) ) {
Expand Down

0 comments on commit 7403c40

Please sign in to comment.