Skip to content

Commit

Permalink
[cudacompat] Run everything on CPU via cudacompat
Browse files Browse the repository at this point in the history
  • Loading branch information
makortel committed Dec 29, 2020
1 parent 8ba8566 commit c22f848
Show file tree
Hide file tree
Showing 25 changed files with 508 additions and 442 deletions.
16 changes: 16 additions & 0 deletions src/cudacompat/CUDADataFormats/SiPixelClustersSoA.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#include "CUDADataFormats/SiPixelClustersSoA.h"

SiPixelClustersSoA::SiPixelClustersSoA(size_t maxClusters) {
moduleStart_d = std::make_unique<uint32_t[]>(maxClusters + 1);
clusInModule_d = std::make_unique<uint32_t[]>(maxClusters);
moduleId_d = std::make_unique<uint32_t[]>(maxClusters);
clusModuleStart_d = std::make_unique<uint32_t[]>(maxClusters + 1);

auto view = std::make_unique<DeviceConstView>();
view->moduleStart_ = moduleStart_d.get();
view->clusInModule_ = clusInModule_d.get();
view->moduleId_ = moduleId_d.get();
view->clusModuleStart_ = clusModuleStart_d.get();

view_d = std::move(view);
}
56 changes: 56 additions & 0 deletions src/cudacompat/CUDADataFormats/SiPixelClustersSoA.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#ifndef CUDADataFormats_SiPixelCluster_interface_SiPixelClustersSoA_h
#define CUDADataFormats_SiPixelCluster_interface_SiPixelClustersSoA_h

#include "CUDACore/cudaCompat.h"
#include "CUDADataFormats/SiPixelClustersCUDA.h"

#include <memory>

class SiPixelClustersSoA {
public:
SiPixelClustersSoA() = default;
explicit SiPixelClustersSoA(size_t maxClusters);
~SiPixelClustersSoA() = default;

SiPixelClustersSoA(const SiPixelClustersSoA &) = delete;
SiPixelClustersSoA &operator=(const SiPixelClustersSoA &) = delete;
SiPixelClustersSoA(SiPixelClustersSoA &&) = default;
SiPixelClustersSoA &operator=(SiPixelClustersSoA &&) = default;

void setNClusters(uint32_t nClusters) { nClusters_h = nClusters; }

uint32_t nClusters() const { return nClusters_h; }

uint32_t *moduleStart() { return moduleStart_d.get(); }
uint32_t *clusInModule() { return clusInModule_d.get(); }
uint32_t *moduleId() { return moduleId_d.get(); }
uint32_t *clusModuleStart() { return clusModuleStart_d.get(); }

uint32_t const *moduleStart() const { return moduleStart_d.get(); }
uint32_t const *clusInModule() const { return clusInModule_d.get(); }
uint32_t const *moduleId() const { return moduleId_d.get(); }
uint32_t const *clusModuleStart() const { return clusModuleStart_d.get(); }

uint32_t const *c_moduleStart() const { return moduleStart_d.get(); }
uint32_t const *c_clusInModule() const { return clusInModule_d.get(); }
uint32_t const *c_moduleId() const { return moduleId_d.get(); }
uint32_t const *c_clusModuleStart() const { return clusModuleStart_d.get(); }

using DeviceConstView = SiPixelClustersCUDA::DeviceConstView;

DeviceConstView *view() const { return view_d.get(); }

private:
std::unique_ptr<uint32_t[]> moduleStart_d; // index of the first pixel of each module
std::unique_ptr<uint32_t[]> clusInModule_d; // number of clusters found in each module
std::unique_ptr<uint32_t[]> moduleId_d; // module id of each module

// originally from rechits
std::unique_ptr<uint32_t[]> clusModuleStart_d; // index of the first cluster of each module

std::unique_ptr<DeviceConstView> view_d; // "me" pointer

uint32_t nClusters_h;
};

#endif
17 changes: 17 additions & 0 deletions src/cudacompat/CUDADataFormats/SiPixelDigiErrorsSoA.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include "CUDADataFormats/SiPixelDigiErrorsSoA.h"

#include <cassert>
#include <cstring>

SiPixelDigiErrorsSoA::SiPixelDigiErrorsSoA(size_t maxFedWords, PixelFormatterErrors errors)
: formatterErrors_h(std::move(errors)) {
error_d = std::make_unique<cms::cuda::SimpleVector<PixelErrorCompact>>();
data_d = std::make_unique<PixelErrorCompact[]>(maxFedWords);

std::memset(data_d.get(), 0x00, maxFedWords);

error_d = std::make_unique<cms::cuda::SimpleVector<PixelErrorCompact>>();
cms::cuda::make_SimpleVector(error_d.get(), maxFedWords, data_d.get());
assert(error_d->empty());
assert(error_d->capacity() == static_cast<int>(maxFedWords));
}
32 changes: 32 additions & 0 deletions src/cudacompat/CUDADataFormats/SiPixelDigiErrorsSoA.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsSoA_h
#define CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsSoA_h

#include <memory>

#include "CUDACore/SimpleVector.h"
#include "DataFormats/PixelErrors.h"

class SiPixelDigiErrorsSoA {
public:
SiPixelDigiErrorsSoA() = default;
explicit SiPixelDigiErrorsSoA(size_t maxFedWords, PixelFormatterErrors errors);
~SiPixelDigiErrorsSoA() = default;

SiPixelDigiErrorsSoA(const SiPixelDigiErrorsSoA&) = delete;
SiPixelDigiErrorsSoA& operator=(const SiPixelDigiErrorsSoA&) = delete;
SiPixelDigiErrorsSoA(SiPixelDigiErrorsSoA&&) = default;
SiPixelDigiErrorsSoA& operator=(SiPixelDigiErrorsSoA&&) = default;

const PixelFormatterErrors& formatterErrors() const { return formatterErrors_h; }

cms::cuda::SimpleVector<PixelErrorCompact>* error() { return error_d.get(); }
cms::cuda::SimpleVector<PixelErrorCompact> const* error() const { return error_d.get(); }
cms::cuda::SimpleVector<PixelErrorCompact> const* c_error() const { return error_d.get(); }

private:
std::unique_ptr<PixelErrorCompact[]> data_d;
std::unique_ptr<cms::cuda::SimpleVector<PixelErrorCompact>> error_d;
PixelFormatterErrors formatterErrors_h;
};

#endif
21 changes: 21 additions & 0 deletions src/cudacompat/CUDADataFormats/SiPixelDigisSoA.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include "CUDADataFormats/SiPixelDigisSoA.h"

SiPixelDigisSoA::SiPixelDigisSoA(size_t maxFedWords) {
xx_d = std::make_unique<uint16_t[]>(maxFedWords);
yy_d = std::make_unique<uint16_t[]>(maxFedWords);
adc_d = std::make_unique<uint16_t[]>(maxFedWords);
moduleInd_d = std::make_unique<uint16_t[]>(maxFedWords);
clus_d = std::make_unique<int32_t[]>(maxFedWords);

pdigi_d = std::make_unique<uint32_t[]>(maxFedWords);
rawIdArr_d = std::make_unique<uint32_t[]>(maxFedWords);

auto view = std::make_unique<DeviceConstView>();
view->xx_ = xx_d.get();
view->yy_ = yy_d.get();
view->adc_ = adc_d.get();
view->moduleInd_ = moduleInd_d.get();
view->clus_ = clus_d.get();

view_d = std::move(view);
}
74 changes: 74 additions & 0 deletions src/cudacompat/CUDADataFormats/SiPixelDigisSoA.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigisSoA_h
#define CUDADataFormats_SiPixelDigi_interface_SiPixelDigisSoA_h

#include "CUDACore/cudaCompat.h"
#include "CUDADataFormats/SiPixelDigisCUDA.h"

#include <memory>

class SiPixelDigisSoA {
public:
SiPixelDigisSoA() = default;
explicit SiPixelDigisSoA(size_t maxFedWords);
~SiPixelDigisSoA() = default;

SiPixelDigisSoA(const SiPixelDigisSoA &) = delete;
SiPixelDigisSoA &operator=(const SiPixelDigisSoA &) = delete;
SiPixelDigisSoA(SiPixelDigisSoA &&) = default;
SiPixelDigisSoA &operator=(SiPixelDigisSoA &&) = default;

void setNModulesDigis(uint32_t nModules, uint32_t nDigis) {
nModules_h = nModules;
nDigis_h = nDigis;
}

uint32_t nModules() const { return nModules_h; }
uint32_t nDigis() const { return nDigis_h; }

uint16_t *xx() { return xx_d.get(); }
uint16_t *yy() { return yy_d.get(); }
uint16_t *adc() { return adc_d.get(); }
uint16_t *moduleInd() { return moduleInd_d.get(); }
int32_t *clus() { return clus_d.get(); }
uint32_t *pdigi() { return pdigi_d.get(); }
uint32_t *rawIdArr() { return rawIdArr_d.get(); }

uint16_t const *xx() const { return xx_d.get(); }
uint16_t const *yy() const { return yy_d.get(); }
uint16_t const *adc() const { return adc_d.get(); }
uint16_t const *moduleInd() const { return moduleInd_d.get(); }
int32_t const *clus() const { return clus_d.get(); }
uint32_t const *pdigi() const { return pdigi_d.get(); }
uint32_t const *rawIdArr() const { return rawIdArr_d.get(); }

uint16_t const *c_xx() const { return xx_d.get(); }
uint16_t const *c_yy() const { return yy_d.get(); }
uint16_t const *c_adc() const { return adc_d.get(); }
uint16_t const *c_moduleInd() const { return moduleInd_d.get(); }
int32_t const *c_clus() const { return clus_d.get(); }
uint32_t const *c_pdigi() const { return pdigi_d.get(); }
uint32_t const *c_rawIdArr() const { return rawIdArr_d.get(); }

using DeviceConstView = SiPixelDigisCUDA::DeviceConstView;

const DeviceConstView *view() const { return view_d.get(); }

private:
// These are consumed by downstream device code
std::unique_ptr<uint16_t[]> xx_d; // local coordinates of each pixel
std::unique_ptr<uint16_t[]> yy_d; //
std::unique_ptr<uint16_t[]> adc_d; // ADC of each pixel
std::unique_ptr<uint16_t[]> moduleInd_d; // module id of each pixel
std::unique_ptr<int32_t[]> clus_d; // cluster id of each pixel
std::unique_ptr<DeviceConstView> view_d; // "me" pointer

// These are for CPU output; should we (eventually) place them to a
// separate product?
std::unique_ptr<uint32_t[]> pdigi_d;
std::unique_ptr<uint32_t[]> rawIdArr_d;

uint32_t nModules_h = 0;
uint32_t nDigis_h = 0;
};

#endif
2 changes: 2 additions & 0 deletions src/cudacompat/CondFormats/SiPixelFedCablingMapGPUWrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@ class SiPixelFedCablingMapGPUWrapper {

// returns pointer to GPU memory
const SiPixelFedCablingMapGPU *getGPUProductAsync(cudaStream_t cudaStream) const;
const SiPixelFedCablingMapGPU *getCPUProduct() const { return cablingMapHost; }

// returns pointer to GPU memory
const unsigned char *getModToUnpAllAsync(cudaStream_t cudaStream) const;
const unsigned char *getModToUnpAll() const { return modToUnpDefault.data(); }

private:
std::vector<unsigned char, cms::cuda::HostAllocator<unsigned char>> modToUnpDefault;
Expand Down
11 changes: 10 additions & 1 deletion src/cudacompat/CondFormats/SiPixelGainCalibrationForHLTGPU.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,20 @@
SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU(SiPixelGainForHLTonGPU const& gain,
std::vector<char> gainData)
: gainData_(std::move(gainData)) {
/*
cudaCheck(cudaMallocHost(&gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU)));
*gainForHLTonHost_ = gain;
*/
gainForHLTonHost_ = new SiPixelGainForHLTonGPU(gain);
gainForHLTonHost_->v_pedestals = reinterpret_cast<SiPixelGainForHLTonGPU_DecodingStructure*>(gainData_.data());
}

SiPixelGainCalibrationForHLTGPU::~SiPixelGainCalibrationForHLTGPU() { cudaCheck(cudaFreeHost(gainForHLTonHost_)); }
SiPixelGainCalibrationForHLTGPU::~SiPixelGainCalibrationForHLTGPU() {
/*
cudaCheck(cudaFreeHost(gainForHLTonHost_));
*/
delete gainForHLTonHost_;
}

SiPixelGainCalibrationForHLTGPU::GPUData::~GPUData() {
cudaCheck(cudaFree(gainForHLTonGPU));
Expand Down
24 changes: 6 additions & 18 deletions src/cudacompat/bin/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,15 @@ namespace {
void print_help(std::string const& name) {
std::cout
<< name
<< ": [--numberOfThreads NT] [--numberOfStreams NS] [--maxEvents ME] [--data PATH] [--transfer] [--validation] "
<< ": [--numberOfThreads NT] [--numberOfStreams NS] [--maxEvents ME] [--data PATH] [--validation] "
"[--histogram] [--empty]\n\n"
<< "Options\n"
<< " --numberOfThreads Number of threads to use (default 1)\n"
<< " --numberOfStreams Number of concurrent events (default 0=numberOfThreads)\n"
<< " --maxEvents Number of events to process (default -1 for all events in the input file)\n"
<< " --data Path to the 'data' directory (default 'data' in the directory of the executable)\n"
<< " --transfer Transfer results from GPU to CPU (default is to leave them on GPU)\n"
<< " --validation Run (rudimentary) validation at the end (implies --transfer)\n"
<< " --histogram Produce histograms at the end (implies --transfer)\n"
<< " --validation Run (rudimentary) validation at the end\n"
<< " --histogram Produce histograms at the end\n"
<< " --empty Ignore all producers (for testing only)\n"
<< std::endl;
}
Expand All @@ -39,7 +38,6 @@ int main(int argc, char** argv) {
int numberOfStreams = 0;
int maxEvents = -1;
std::filesystem::path datadir;
bool transfer = false;
bool validation = false;
bool histogram = false;
bool empty = false;
Expand All @@ -59,13 +57,9 @@ int main(int argc, char** argv) {
} else if (*i == "--data") {
++i;
datadir = *i;
} else if (*i == "--transfer") {
transfer = true;
} else if (*i == "--validation") {
transfer = true;
validation = true;
} else if (*i == "--histogram") {
transfer = true;
histogram = true;
} else if (*i == "--empty") {
empty = true;
Expand All @@ -85,6 +79,8 @@ int main(int argc, char** argv) {
std::cout << "Data directory '" << datadir << "' does not exist" << std::endl;
return EXIT_FAILURE;
}

// TODO: remove when can run without a GPU
int numberOfDevices;
auto status = cudaGetDeviceCount(&numberOfDevices);
if (cudaSuccess != status) {
Expand All @@ -98,19 +94,11 @@ int main(int argc, char** argv) {
std::vector<std::string> esmodules;
if (not empty) {
edmodules = {
"BeamSpotToCUDA", "SiPixelRawToClusterCUDA", "SiPixelRecHitCUDA", "CAHitNtupletCUDA", "PixelVertexProducerCUDA"};
"BeamSpotToPOD", "SiPixelRawToClusterCUDA", "SiPixelRecHitCUDA", "CAHitNtupletCUDA", "PixelVertexProducerCUDA"};
esmodules = {"BeamSpotESProducer",
"SiPixelFedCablingMapGPUWrapperESProducer",
"SiPixelGainCalibrationForHLTGPUESProducer",
"PixelCPEFastESProducer"};
if (transfer) {
auto capos = std::find(edmodules.begin(), edmodules.end(), "CAHitNtupletCUDA");
assert(capos != edmodules.end());
edmodules.insert(capos + 1, "PixelTrackSoAFromCUDA");
auto vertpos = std::find(edmodules.begin(), edmodules.end(), "PixelVertexProducerCUDA");
assert(vertpos != edmodules.end());
edmodules.insert(vertpos + 1, "PixelVertexSoAFromCUDA");
}
if (validation) {
edmodules.emplace_back("CountValidator");
}
Expand Down
24 changes: 24 additions & 0 deletions src/cudacompat/plugin-BeamSpotProducer/BeamSpotToPOD.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#include "DataFormats/BeamSpotPOD.h"
#include "Framework/EDProducer.h"
#include "Framework/Event.h"
#include "Framework/EventSetup.h"
#include "Framework/PluginFactory.h"

class BeamSpotToPOD : public edm::EDProducer {
public:
explicit BeamSpotToPOD(edm::ProductRegistry& reg);
~BeamSpotToPOD() override = default;

void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override;

private:
const edm::EDPutTokenT<BeamSpotPOD> bsPutToken_;
};

BeamSpotToPOD::BeamSpotToPOD(edm::ProductRegistry& reg) : bsPutToken_{reg.produces<BeamSpotPOD>()} {}

void BeamSpotToPOD::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) {
iEvent.emplace(bsPutToken_, iSetup.get<BeamSpotPOD>());
}

DEFINE_FWK_MODULE(BeamSpotToPOD);
Loading

0 comments on commit c22f848

Please sign in to comment.