Skip to content

Commit

Permalink
Merge branch 'cmake-lang-cuda' into dev/add-cuda-managed-memory
Browse files Browse the repository at this point in the history
  • Loading branch information
Carsten Griwodz committed Jan 14, 2025
2 parents 29f8f35 + 5e85a1d commit 83317b8
Show file tree
Hide file tree
Showing 13 changed files with 5 additions and 103 deletions.
13 changes: 0 additions & 13 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-$

option(PopSift_BUILD_EXAMPLES "Build PopSift applications." ON)
option(PopSift_BUILD_DOCS "Build PopSift documentation." OFF)
option(PopSift_USE_NVTX_PROFILING "Use CUDA NVTX for profiling." OFF)
option(PopSift_ERRCHK_AFTER_KERNEL "Synchronize and check CUDA error after every kernel." OFF)
option(PopSift_USE_POSITION_INDEPENDENT_CODE "Generate position independent code." ON)
option(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce compile time while debugging other things." ON)
Expand Down Expand Up @@ -99,10 +98,6 @@ find_package(CUDAToolkit)
message(STATUS "CUDA Version is ${CUDAToolkit_VERSION}")
set(CUDA_VERSION ${CUDAToolkit_VERSION})

if(PopSift_USE_NVTX_PROFILING)
message(STATUS "PROFILING CPU CODE: NVTX is in use")
endif()

if(PopSift_ERRCHK_AFTER_KERNEL)
message(STATUS "Synchronizing and checking errors after every kernel call")
list(APPEND CUDA_NVCC_FLAGS "-DERRCHK_AFTER_KERNEL")
Expand Down Expand Up @@ -153,13 +148,6 @@ else()
set(DISABLE_GRID_FILTER 0)
endif()

if(PopSift_USE_NVTX_PROFILING)
# library required for NVTX profiling of the CPU
set(PopSift_USE_NVTX 1)
else()
set(PopSift_USE_NVTX 0)
endif()

add_subdirectory(src)

if(PopSift_BUILD_DOCS)
Expand Down Expand Up @@ -197,7 +185,6 @@ message(STATUS "Build Shared libs: " ${BUILD_SHARED_LIBS})
message(STATUS "Build examples: " ${PopSift_BUILD_EXAMPLES})
message(STATUS "Build documentation: " ${PopSift_BUILD_DOCS})
message(STATUS "Generate position independent code: " ${CMAKE_POSITION_INDEPENDENT_CODE})
message(STATUS "Use CUDA NVTX for profiling: " ${PopSift_USE_NVTX_PROFILING})
message(STATUS "Synchronize and check CUDA error after every kernel: " ${PopSift_ERRCHK_AFTER_KERNEL})
message(STATUS "Grid filtering: " ${PopSift_USE_GRID_FILTER})
message(STATUS "Additional warning for CUDA nvcc: " ${PopSift_NVCC_WARNINGS})
Expand Down
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ install:
before_build:
- md build
- cd build
- cmake -G "Visual Studio 17 2022" -A x64 -T v143,host=x64,cuda="%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" -DBUILD_SHARED_LIBS:BOOL=ON -DPopSift_USE_NVTX_PROFILING:BOOL=OFF -DPopSift_USE_GRID_FILTER:BOOL=OFF -DPopSift_BUILD_DOCS:BOOL=OFF -DPopSift_USE_POSITION_INDEPENDENT_CODE:BOOL=ON -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake ..
- cmake -G "Visual Studio 17 2022" -A x64 -T v143,host=x64,cuda="%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" -DBUILD_SHARED_LIBS:BOOL=ON -DPopSift_USE_GRID_FILTER:BOOL=OFF -DPopSift_BUILD_DOCS:BOOL=OFF -DPopSift_USE_POSITION_INDEPENDENT_CODE:BOOL=ON -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake ..
- ls -l

build:
Expand Down
1 change: 0 additions & 1 deletion cmake/sift_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,4 @@

#define POPSIFT_HAVE_SHFL_DOWN_SYNC() @PopSift_HAVE_SHFL_DOWN_SYNC@
#define POPSIFT_DISABLE_GRID_FILTER() @DISABLE_GRID_FILTER@
#define POPSIFT_USE_NVTX() @PopSift_USE_NVTX@

3 changes: 0 additions & 3 deletions cudaInstallAppveyor.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@ echo Downloading CUDA toolkit 12 for Windows 10

appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.5.82-archive.zip -Filename cuda_nvcc.zip
appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.5.82-archive.zip -Filename cuda_cudart.zip
appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.5.82-archive.zip -Filename cuda_nvtx.zip
appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.5.82-archive.zip -Filename vs_integration.zip
dir

echo Unzipping CUDA toolkit 12
tar -xf cuda_nvcc.zip
tar -xf cuda_cudart.zip
tar -xf cuda_nvtx.zip
tar -xf vs_integration.zip
dir

Expand All @@ -22,7 +20,6 @@ mkdir "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5\extras"
echo Copying toolkit files to install dir(s)
xcopy cuda_cudart-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y
xcopy cuda_nvcc-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y
xcopy cuda_nvtx-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y
xcopy visual_studio_integration-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5\extras" /s /e /i /y


Expand Down
6 changes: 0 additions & 6 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,6 @@ target_link_libraries(popsift
CUDA::cudart
Threads::Threads)

if(PopSift_USE_NVTX_PROFILING)
target_link_libraries(popsift
PUBLIC
CUDA::nvtx3)
endif()

set_target_properties(popsift PROPERTIES VERSION ${PROJECT_VERSION})
set_target_properties(popsift PROPERTIES DEBUG_POSTFIX "d")
set_target_properties(popsift PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
Expand Down
4 changes: 4 additions & 0 deletions src/application/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ endif()

find_package(DevIL COMPONENTS IL ILU) # yields IL_FOUND, IL_LIBRARIES, IL_INCLUDE_DIR

# for newer CMake versions and Boost 1.70 pr newer must use Boost's make file
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.30)
cmake_policy(SET CMP0167 NEW)
endif()
if(PopSift_BOOST_USE_STATIC_LIBS)
set(Boost_USE_STATIC_LIBS ON)
endif()
Expand Down
6 changes: 0 additions & 6 deletions src/popsift/popsift.cu
Original file line number Diff line number Diff line change
Expand Up @@ -438,18 +438,12 @@ void SiftJob::setImg( popsift::ImageBase* img )

popsift::ImageBase* SiftJob::getImg()
{
#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
_nvtx_id = nvtxRangeStartA( "inserting image" );
#endif
return _img;
}

void SiftJob::setFeatures( popsift::FeaturesBase* f )
{
_p.set_value( f );
#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
nvtxRangeEnd( _nvtx_id );
#endif
}

popsift::FeaturesHost* SiftJob::get()
Expand Down
10 changes: 0 additions & 10 deletions src/popsift/popsift.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,6 @@
#include <thread>
#include <vector>

#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
#include <nvtx3/nvToolsExtCuda.h>
#else
#define nvtxRangeStartA(a)
#define nvtxRangeEnd(a)
#endif

/* user parameters */
namespace popsift
{
Expand All @@ -50,9 +43,6 @@ class SiftJob
unsigned char* _imageData;
popsift::ImageBase* _img;
std::exception_ptr _err;
#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
nvtxRangeId_t _nvtx_id;
#endif

public:

Expand Down
9 changes: 0 additions & 9 deletions src/popsift/s_filtergrid.cu
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,6 @@
#include "sift_extremum.h"
#include "sift_pyramid.h"

#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
#include <nvtx3/nvToolsExtCuda.h>
#else
#define nvtxRangePushA(a)
#define nvtxRangePop()
#endif

#if ! POPSIFT_IS_DEFINED(POPSIFT_DISABLE_GRID_FILTER)

#include <thrust/copy.h>
Expand Down Expand Up @@ -317,9 +310,7 @@ int Pyramid::extrema_filter_grid( const Config& conf, int ext_total )
}
}

nvtxRangePushA( "writing back count" );
writeDescCountersToDevice( );
nvtxRangePop( );

return ret_ext_total;
}
Expand Down
23 changes: 0 additions & 23 deletions src/popsift/s_image.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,6 @@
#include <fstream>
#include <iostream>

#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
#include <nvtx3/nvToolsExtCuda.h>
#else
#define nvtxRangePushA(a)
#define nvtxRangePop()
#endif

using namespace std;

namespace popsift {
Expand Down Expand Up @@ -98,8 +91,6 @@ void Image::resetDimensions( int w, int h )
destroyTexture( );
createTexture( );
} else {
nvtxRangePushA( "reallocating host-side image memory" );

_max_w = max( w, _max_w );
_max_h = max( h, _max_h );
_input_image_h.freeHost( popsift::CudaAllocated );
Expand All @@ -111,21 +102,15 @@ void Image::resetDimensions( int w, int h )

destroyTexture( );
createTexture( );

nvtxRangePop(); // "reallocating host-side image memory"
}
}

void Image::allocate( int w, int h )
{
nvtxRangePushA( "allocating host-side image memory" );

_input_image_h.allocHost( w, h, popsift::CudaAllocated );
_input_image_d.allocDev( w, h );

createTexture( );

nvtxRangePop(); // "allocating host-side image memory"
}

void Image::destroyTexture( )
Expand Down Expand Up @@ -222,8 +207,6 @@ void ImageFloat::resetDimensions( int w, int h )
destroyTexture( );
createTexture( );
} else {
nvtxRangePushA( "reallocating host-side image memory" );

_max_w = max( w, _max_w );
_max_h = max( h, _max_h );
_input_image_h.freeHost( popsift::CudaAllocated );
Expand All @@ -235,21 +218,15 @@ void ImageFloat::resetDimensions( int w, int h )

destroyTexture( );
createTexture( );

nvtxRangePop(); // "reallocating host-side image memory"
}
}

void ImageFloat::allocate( int w, int h )
{
nvtxRangePushA( "allocating host-side image memory" );

_input_image_h.allocHost( w, h, popsift::CudaAllocated );
_input_image_d.allocDev( w, h );

createTexture( );

nvtxRangePop(); // "allocating host-side image memory"
}

void ImageFloat::destroyTexture( )
Expand Down
7 changes: 0 additions & 7 deletions src/popsift/s_orientation.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,6 @@
#include <cmath>
#include <cstdio>

#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
#include <nvtx3/nvToolsExtCuda.h>
#else
#define nvtxRangePushA(a)
#define nvtxRangePop()
#endif

using namespace popsift;
using namespace std;

Expand Down
10 changes: 0 additions & 10 deletions src/popsift/sift_desc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,6 @@
#include <cstdio>
#include <iostream>

#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
#include <nvtx3/nvToolsExtCuda.h>
#else
#define nvtxRangePushA(a)
#define nvtxRangePop()
#endif

using namespace popsift;
using namespace std;

Expand Down Expand Up @@ -55,11 +48,8 @@ using namespace std;
__host__
void Pyramid::descriptors( const Config& conf )
{
nvtxRangePushA("Reading orientation count");

readDescCountersFromDevice( _octaves[0].getStream() );
cudaStreamSynchronize( _octaves[0].getStream() );
nvtxRangePop( );

for( int octave=_num_octaves-1; octave>=0; octave-- )
// for( int octave=0; octave<_num_octaves; octave++ )
Expand Down
14 changes: 0 additions & 14 deletions src/popsift/sift_pyramid.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,6 @@
#define mkdir(path, perm) _mkdir(path)
#endif

#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
#include <nvtx3/nvToolsExtCuda.h>
#else
#define nvtxRangePushA(a)
#define nvtxRangePop()
#endif

#define PYRAMID_PRINT_DEBUG 0

using namespace std;
Expand Down Expand Up @@ -285,22 +278,18 @@ FeaturesHost* Pyramid::get_descriptors( const Config& conf )

readDescCountersFromDevice();

nvtxRangePushA( "download descriptors" );
FeaturesHost* features = new FeaturesHost( hct.ext_total, hct.ori_total );

if( hct.ext_total == 0 || hct.ori_total == 0 )
{
nvtxRangePop();
return features;
}

dim3 grid( grid_divide( hct.ext_total, 32 ) );
prep_features<<<grid,32,0,_download_stream>>>( features->getDescriptors(), up_fac );
POP_SYNC_CHK;

nvtxRangePushA( "register host memory" );
features->pin( );
nvtxRangePop();
popcuda_memcpy_async( features->getFeatures(),
dobuf_shadow.features,
hct.ext_total * sizeof(Feature),
Expand All @@ -313,10 +302,7 @@ FeaturesHost* Pyramid::get_descriptors( const Config& conf )
cudaMemcpyDeviceToHost,
_download_stream );
cudaStreamSynchronize( _download_stream );
nvtxRangePushA( "unregister host memory" );
features->unpin( );
nvtxRangePop();
nvtxRangePop();

return features;
}
Expand Down

0 comments on commit 83317b8

Please sign in to comment.