Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update TensorrtAPI to TensorRT 10 #219

Merged
merged 2 commits into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 14 additions & 5 deletions engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ set (CMAKE_CXX_STANDARD 17)

option(USE_PROFILING "Build with profiling" OFF)
option(USE_RL "Build with reinforcement learning support" OFF)
option(BACKEND_TENSORRT "Build with TensorRT support" ON)
option(BACKEND_TENSORRT_10 "Build with TensorRT 10 support" OFF)
option(BACKEND_TENSORRT_8 "Build with TensorRT 8 support" ON)
option(BACKEND_TENSORRT_7 "Build with deprecated TensorRT 7 support" OFF)
option(BACKEND_MXNET "Build with MXNet backend (Blas/IntelMKL/CUDA/TensorRT) support" OFF)
option(BACKEND_TORCH "Build with Torch backend (CPU/GPU) support" OFF)
Expand Down Expand Up @@ -402,7 +403,7 @@ include_directories("src/domain/crazyhouse")
include_directories("src/agents")
include_directories("src/agents/config")
include_directories("src/nn")

link_libraries(stdc++fs)

if (BACKEND_MXNET)
IF(DEFINED ENV{MXNET_PATH})
Expand Down Expand Up @@ -459,11 +460,18 @@ if (USE_RL)
endif()

if(BACKEND_TENSORRT_7)
set(BACKEND_TENSORRT ON)
add_definitions(-DTENSORRT7)
endif()

if (BACKEND_TENSORRT)
if (BACKEND_TENSORRT_8)
add_definitions(-DTENSORRT8)
endif()

if (BACKEND_TENSORRT_10)
add_definitions(-DTENSORRT10)
endif()

if (BACKEND_TENSORRT_7 OR BACKEND_TENSORRT_8 OR BACKEND_TENSORRT_10)
# build CrazyAra with TensorRT support, requires a working TensorRT-MXNet library package
message(STATUS "Enabled TensorRT support")
message(STATUS "TensorRT path: $ENV{TENSORRT_PATH}")
Expand All @@ -487,12 +495,13 @@ if (BACKEND_TENSORRT)
endif()
include_directories("$ENV{TENSORRT_PATH}/include")
include_directories("$ENV{TENSORRT_PATH}/samples/common/")
include_directories("$ENV{TENSORRT_PATH}/samples/")
add_definitions(-DTENSORRT)
endif()

add_executable(${PROJECT_NAME} ${source_files})

if (BACKEND_TENSORRT)
if (BACKEND_TENSORRT_7 OR BACKEND_TENSORRT_8 OR BACKEND_TENSORRT_10)
target_link_libraries(${PROJECT_NAME} nvonnxparser nvinfer cudart ${CUDART_LIB} ${CUBLAS_LIB} ${CUDNN_LIB})
if(BACKEND_TENSORRT_7)
target_link_libraries(${PROJECT_NAME} myelin)
Expand Down
2 changes: 1 addition & 1 deletion engine/src/environments/chess_related/chessbatchstream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ int ChessBatchStream::getBatchSize() const

nvinfer1::Dims ChessBatchStream::getDims() const
{
Dims dims;
nvinfer1::Dims dims;
dims.nbDims = 4;
dims.d[0] = mBatchSize;
dims.d[1] = mDims.d[0];
Expand Down
2 changes: 1 addition & 1 deletion engine/src/environments/chess_related/chessbatchstream.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ class ChessBatchStream : public IBatchStream
int mBatchSize{0};
int mBatchCount{0};
int mMaxBatches{0};
Dims mDims{};
nvinfer1::Dims mDims{};
std::vector<float> mData;
std::vector<float> mLabels{};
};
Expand Down
54 changes: 46 additions & 8 deletions engine/src/nn/tensorrtapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ void TensorrtAPI::load_parameters()
// do nothing
}

#ifndef TENSORRT10
bool TensorrtAPI::retrieve_indices_by_name(bool verbose)
{
idxInput = engine->getBindingIndex(nnDesign.inputLayerName.c_str());
Expand Down Expand Up @@ -122,9 +123,11 @@ bool TensorrtAPI::retrieve_indices_by_name(bool verbose)
}
return true;
}
#endif

void TensorrtAPI::init_nn_design()
{
#ifndef TENSORRT10
nnDesign.hasAuxiliaryOutputs = engine->getNbBindings() > 3;
if (!retrieve_indices_by_name(generatedTrtFromONNX)) {
info_string_important("Fallback to default indices.");
Expand All @@ -133,15 +136,24 @@ void TensorrtAPI::init_nn_design()
idxPolicyOutput = nnDesign.policyOutputIdx + nnDesign.nbInputs;
idxAuxiliaryOutput = nnDesign.auxiliaryOutputIdx + nnDesign.nbInputs;
}

set_shape(nnDesign.inputShape, engine->getBindingDimensions(idxInput));
// make sure that the first dimension is the batch size, otherwise '-1' could cause problems
nnDesign.inputShape.v[0] = batchSize;

set_shape(nnDesign.valueOutputShape, engine->getBindingDimensions(idxValueOutput));
set_shape(nnDesign.policyOutputShape, engine->getBindingDimensions(idxPolicyOutput));

if (nnDesign.hasAuxiliaryOutputs) {
set_shape(nnDesign.auxiliaryOutputShape, engine->getBindingDimensions(idxAuxiliaryOutput));
}
#else
set_shape(nnDesign.inputShape, engine->getTensorShape(nnDesign.inputLayerName.c_str()));
set_shape(nnDesign.valueOutputShape, engine->getTensorShape(nnDesign.valueOutputName.c_str()));
set_shape(nnDesign.policyOutputShape, engine->getTensorShape(nnDesign.policySoftmaxOutputName.c_str()));
if (nnDesign.hasAuxiliaryOutputs) {
set_shape(nnDesign.auxiliaryOutputShape, engine->getTensorShape(nnDesign.auxiliaryOutputName.c_str()));
}
#endif
// make sure that the first dimension is the batch size, otherwise '-1' could cause problems
nnDesign.inputShape.v[0] = batchSize;
nnDesign.isPolicyMap = unsigned(nnDesign.policyOutputShape.v[1]) != StateConstants::NB_LABELS();
}

Expand All @@ -151,7 +163,11 @@ void TensorrtAPI::bind_executor()
context = SampleUniquePtr<nvinfer1::IExecutionContext>(engine->createExecutionContext());
Dims inputDims;
set_dims(inputDims, nnDesign.inputShape);
#ifdef TENSORRT10
context->setInputShape(nnDesign.inputLayerName.c_str(), inputDims);
#else
context->setBindingDimensions(0, inputDims);
#endif

// create buffers object with respect to the engine and batch size
CHECK(cudaStreamCreate(&stream));
Expand Down Expand Up @@ -184,8 +200,25 @@ void TensorrtAPI::predict(float* inputPlanes, float* valueOutput, float* probOut
CHECK(cudaMemcpyAsync(deviceMemory[idxInput], inputPlanes, memorySizes[idxInput],
cudaMemcpyHostToDevice, stream));

#ifdef TENSORRT10
context->setTensorAddress(nnDesign.inputLayerName.c_str(), deviceMemory[idxInput]);
context->setTensorAddress(nnDesign.valueOutputName.c_str(), deviceMemory[idxValueOutput]);
context->setTensorAddress(nnDesign.policySoftmaxOutputName.c_str(), deviceMemory[idxPolicyOutput]);
#ifdef DYNAMIC_NN_ARCH
if (has_auxiliary_outputs()) {
#else
if (StateConstants::NB_AUXILIARY_OUTPUTS()) {
#endif
context->setTensorAddress(nnDesign.auxiliaryOutputName.c_str(), deviceMemory[idxAuxiliaryOutput]);
}
#endif

// run inference for given data
#ifdef TENSORRT10
context->enqueueV3(stream);
#else
context->enqueueV2(deviceMemory, stream, nullptr);
#endif

// copy output from device back to host
CHECK(cudaMemcpyAsync(valueOutput, deviceMemory[idxValueOutput],
Expand All @@ -209,7 +242,9 @@ ICudaEngine* TensorrtAPI::create_cuda_engine_from_onnx()
info_string("This may take a few minutes...");
// create an engine builder
SampleUniquePtr<IBuilder> builder = SampleUniquePtr<IBuilder>(createInferBuilder(gLogger.getTRTLogger()));
#ifndef TENSORRT10
builder->setMaxBatchSize(int(batchSize));
#endif

// create an ONNX network object
const uint32_t explicitBatch = 1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
Expand All @@ -232,7 +267,7 @@ ICudaEngine* TensorrtAPI::create_cuda_engine_from_onnx()
SampleUniquePtr<nvinfer1::IBuilderConfig> config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
unique_ptr<IInt8Calibrator> calibrator;
unique_ptr<IBatchStream> calibrationStream;
set_config_settings(config, 1_GiB, calibrator, calibrationStream);
set_config_settings(config, calibrator, calibrationStream);

IOptimizationProfile* profile = builder->createOptimizationProfile();

Expand All @@ -243,12 +278,16 @@ ICudaEngine* TensorrtAPI::create_cuda_engine_from_onnx()
profile->setDimensions(nnDesign.inputLayerName.c_str(), OptProfileSelector::kMAX, inputDims);
config->addOptimizationProfile(profile);

#ifdef TENSORRT10
nnDesign.hasAuxiliaryOutputs = network->getNbOutputs() > 2;
#endif

// build an engine from the TensorRT network with a given configuration struct
#ifdef TENSORRT7
return builder->buildEngineWithConfig(*network, *config);
#else
SampleUniquePtr<IHostMemory> serializedModel{builder->buildSerializedNetwork(*network, *config)};
SampleUniquePtr<IRuntime> runtime{createInferRuntime(sample::gLogger.getTRTLogger())};
runtime = SampleUniquePtr<IRuntime>(createInferRuntime(sample::gLogger.getTRTLogger()));

// build an engine from the serialized model
return runtime->deserializeCudaEngine(serializedModel->data(), serializedModel->size());;
Expand All @@ -263,7 +302,7 @@ ICudaEngine* TensorrtAPI::get_cuda_engine() {
const char* buffer = read_buffer(trtFilePath, bufferSize);
if (buffer) {
info_string("deserialize engine:", trtFilePath);
unique_ptr<IRuntime, samplesCommon::InferDeleter> runtime{createInferRuntime(gLogger)};
runtime = unique_ptr<IRuntime, samplesCommon::InferDeleter>{createInferRuntime(gLogger)};
#ifdef TENSORRT7
engine = runtime->deserializeCudaEngine(buffer, bufferSize, nullptr);
#else
Expand Down Expand Up @@ -293,10 +332,9 @@ ICudaEngine* TensorrtAPI::get_cuda_engine() {
}

void TensorrtAPI::set_config_settings(SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
size_t maxWorkspace, unique_ptr<IInt8Calibrator>& calibrator,
unique_ptr<IInt8Calibrator>& calibrator,
unique_ptr<IBatchStream>& calibrationStream)
{
config->setMaxWorkspaceSize(maxWorkspace);
switch (precision) {
case float32:
// default: do nothing
Expand Down
7 changes: 5 additions & 2 deletions engine/src/nn/tensorrtapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include "BatchStream.h"

using namespace std;
using namespace nvinfer1;

enum Precision {
float32,
Expand Down Expand Up @@ -77,6 +78,7 @@ class TensorrtAPI : public NeuralNetAPI
string trtFilePath;
std::shared_ptr<nvinfer1::ICudaEngine> engine;
SampleUniquePtr<nvinfer1::IExecutionContext> context;
SampleUniquePtr<IRuntime> runtime;
cudaStream_t stream;
bool generatedTrtFromONNX;
public:
Expand All @@ -93,12 +95,14 @@ class TensorrtAPI : public NeuralNetAPI

void predict(float* inputPlanes, float* valueOutput, float* probOutputs, float* auxiliaryOutputs) override;

#ifndef TENSORRT10
/**
* @brief retrieve_indices_by_name Sets the layer name indices by names.
* @param verbose If true debug info will be shown
* @return True if all layer names were found, else false
*/
bool retrieve_indices_by_name(bool verbose);
#endif

private:
void load_model() override;
Expand All @@ -123,12 +127,11 @@ class TensorrtAPI : public NeuralNetAPI
/**
* @brief set_config_settings Sets the configuration object which will be later used to build the engine
* @param config Configuration object
* @param maxWorkspace Maximum allowable GPU work space for TensorRT tactic selection (e.g. 16_MiB, 1_GiB)
* @param calibrator INT8 calibration object
* @param calibrationStream Calibration stream used for INT8 calibration
*/
void set_config_settings(SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
size_t maxWorkspace, unique_ptr<IInt8Calibrator>& calibrator,
unique_ptr<IInt8Calibrator>& calibrator,
unique_ptr<IBatchStream>& calibrationStream);


Expand Down
Loading