diff --git a/CUDADataFormats/Common/BuildFile.xml b/CUDADataFormats/Common/BuildFile.xml index b990c1295e31a..f6b68fe69b400 100644 --- a/CUDADataFormats/Common/BuildFile.xml +++ b/CUDADataFormats/Common/BuildFile.xml @@ -1,4 +1,6 @@ + + diff --git a/CUDADataFormats/Common/src/classes.h b/CUDADataFormats/Common/src/classes.h new file mode 100644 index 0000000000000..239e071d513a2 --- /dev/null +++ b/CUDADataFormats/Common/src/classes.h @@ -0,0 +1,7 @@ +#ifndef CUDADataFormats_Common_src_classes_h +#define CUDADataFormats_Common_src_classes_h + +#include "CUDADataFormats/Common/interface/HostProduct.h" +#include "DataFormats/Common/interface/Wrapper.h" + +#endif // CUDADataFormats_Common_src_classes_h diff --git a/CUDADataFormats/Common/src/classes_def.xml b/CUDADataFormats/Common/src/classes_def.xml new file mode 100644 index 0000000000000..d8514251c807a --- /dev/null +++ b/CUDADataFormats/Common/src/classes_def.xml @@ -0,0 +1,4 @@ + + + + diff --git a/CUDADataFormats/SiPixelCluster/BuildFile.xml b/CUDADataFormats/SiPixelCluster/BuildFile.xml new file mode 100644 index 0000000000000..5406d1355533f --- /dev/null +++ b/CUDADataFormats/SiPixelCluster/BuildFile.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h new file mode 100644 index 0000000000000..acdf1b34a6d79 --- /dev/null +++ b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h @@ -0,0 +1,63 @@ +#ifndef CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h +#define CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h + +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" + +#include + +class SiPixelClustersCUDA { +public: + SiPixelClustersCUDA() = default; + explicit SiPixelClustersCUDA(size_t maxModules, cudaStream_t stream); + ~SiPixelClustersCUDA() = default; + + SiPixelClustersCUDA(const SiPixelClustersCUDA &) = delete; + SiPixelClustersCUDA &operator=(const SiPixelClustersCUDA &) = delete; + SiPixelClustersCUDA(SiPixelClustersCUDA &&) = default; + SiPixelClustersCUDA &operator=(SiPixelClustersCUDA &&) = default; + + void setNClusters(uint32_t nClusters) { nClusters_h = nClusters; } + + uint32_t nClusters() const { return nClusters_h; } + + uint32_t *moduleStart() { return moduleStart_d.get(); } + uint32_t *clusInModule() { return clusInModule_d.get(); } + uint32_t *moduleId() { return moduleId_d.get(); } + uint32_t *clusModuleStart() { return clusModuleStart_d.get(); } + + uint32_t const *moduleStart() const { return moduleStart_d.get(); } + uint32_t const *clusInModule() const { return clusInModule_d.get(); } + uint32_t const *moduleId() const { return moduleId_d.get(); } + uint32_t const *clusModuleStart() const { return clusModuleStart_d.get(); } + + class DeviceConstView { + public: + __device__ __forceinline__ uint32_t moduleStart(int i) const { return __ldg(moduleStart_ + i); } + __device__ __forceinline__ uint32_t clusInModule(int i) const { return __ldg(clusInModule_ + i); } + __device__ __forceinline__ uint32_t moduleId(int i) const { return __ldg(moduleId_ + i); } + __device__ __forceinline__ uint32_t clusModuleStart(int i) const { return __ldg(clusModuleStart_ + i); } + + uint32_t const *moduleStart_; + uint32_t const *clusInModule_; + uint32_t const *moduleId_; + uint32_t const *clusModuleStart_; + }; + + DeviceConstView *view() const { return view_d.get(); } + +private: + cms::cuda::device::unique_ptr moduleStart_d; // index of the first pixel of each module + cms::cuda::device::unique_ptr clusInModule_d; // number of clusters found in each module + cms::cuda::device::unique_ptr moduleId_d; // module id of each module + + // originally from rechits + cms::cuda::device::unique_ptr clusModuleStart_d; // index of the first cluster of each module + + cms::cuda::device::unique_ptr view_d; // "me" pointer + + uint32_t nClusters_h = 0; +}; + +#endif // CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h diff --git a/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h b/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h new file mode 100644 index 0000000000000..e9dfed7bca7a6 --- /dev/null +++ b/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h @@ -0,0 +1,37 @@ +#ifndef CUDADataFormats_SiPixelCluster_interface_gpuClusteringConstants_h +#define CUDADataFormats_SiPixelCluster_interface_gpuClusteringConstants_h + +#include +#include + +namespace pixelGPUConstants { +#ifdef GPU_SMALL_EVENTS + // kept for testing and debugging + constexpr uint32_t maxNumberOfHits = 24 * 1024; +#else + // data at pileup 50 has 18300 +/- 3500 hits; 40000 is around 6 sigma away + // tested on MC events with 55-75 pileup events + constexpr uint32_t maxNumberOfHits = 48 * 1024; +#endif +} // namespace pixelGPUConstants + +namespace gpuClustering { +#ifdef GPU_SMALL_EVENTS + // kept for testing and debugging + constexpr uint32_t maxHitsInIter() { return 64; } +#else + // optimized for real data PU 50 + // tested on MC events with 55-75 pileup events + constexpr uint32_t maxHitsInIter() { return 160; } +#endif + constexpr uint32_t maxHitsInModule() { return 1024; } + + constexpr uint16_t maxNumModules = 2000; + constexpr int32_t maxNumClustersPerModules = maxHitsInModule(); + constexpr uint32_t maxNumClusters = pixelGPUConstants::maxNumberOfHits; + constexpr uint16_t invalidModuleId = std::numeric_limits::max() - 1; + static_assert(invalidModuleId > maxNumModules); // invalidModuleId must be > maxNumModules + +} // namespace gpuClustering + +#endif // CUDADataFormats_SiPixelCluster_interface_gpuClusteringConstants_h diff --git a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc new file mode 100644 index 0000000000000..ae4a24dbbf83b --- /dev/null +++ b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc @@ -0,0 +1,19 @@ +#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" +#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" + +SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxModules, cudaStream_t stream) + : moduleStart_d(cms::cuda::make_device_unique(maxModules + 1, stream)), + clusInModule_d(cms::cuda::make_device_unique(maxModules, stream)), + moduleId_d(cms::cuda::make_device_unique(maxModules, stream)), + clusModuleStart_d(cms::cuda::make_device_unique(maxModules + 1, stream)) { + auto view = cms::cuda::make_host_unique(stream); + view->moduleStart_ = moduleStart_d.get(); + view->clusInModule_ = clusInModule_d.get(); + view->moduleId_ = moduleId_d.get(); + view->clusModuleStart_ = clusModuleStart_d.get(); + + view_d = cms::cuda::make_device_unique(stream); + cms::cuda::copyAsync(view_d, view, stream); +} diff --git a/CUDADataFormats/SiPixelCluster/src/classes.h b/CUDADataFormats/SiPixelCluster/src/classes.h new file mode 100644 index 0000000000000..3eee5a1fce009 --- /dev/null +++ b/CUDADataFormats/SiPixelCluster/src/classes.h @@ -0,0 +1,8 @@ +#ifndef CUDADataFormats_SiPixelCluster_src_classes_h +#define CUDADataFormats_SiPixelCluster_src_classes_h + +#include "CUDADataFormats/Common/interface/Product.h" +#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" +#include "DataFormats/Common/interface/Wrapper.h" + +#endif // CUDADataFormats_SiPixelCluster_src_classes_h diff --git a/CUDADataFormats/SiPixelCluster/src/classes_def.xml b/CUDADataFormats/SiPixelCluster/src/classes_def.xml new file mode 100644 index 0000000000000..70decb9f27df7 --- /dev/null +++ b/CUDADataFormats/SiPixelCluster/src/classes_def.xml @@ -0,0 +1,4 @@ + + + + diff --git a/CUDADataFormats/SiPixelDigi/BuildFile.xml b/CUDADataFormats/SiPixelDigi/BuildFile.xml new file mode 100644 index 0000000000000..0806768a9b657 --- /dev/null +++ b/CUDADataFormats/SiPixelDigi/BuildFile.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h new file mode 100644 index 0000000000000..bfb15c4ac9f5c --- /dev/null +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h @@ -0,0 +1,42 @@ +#ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h +#define CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h + +#include + +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" +#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" + +class SiPixelDigiErrorsCUDA { +public: + using SiPixelErrorCompactVector = cms::cuda::SimpleVector; + + SiPixelDigiErrorsCUDA() = default; + explicit SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatterErrors errors, cudaStream_t stream); + ~SiPixelDigiErrorsCUDA() = default; + + SiPixelDigiErrorsCUDA(const SiPixelDigiErrorsCUDA&) = delete; + SiPixelDigiErrorsCUDA& operator=(const SiPixelDigiErrorsCUDA&) = delete; + SiPixelDigiErrorsCUDA(SiPixelDigiErrorsCUDA&&) = default; + SiPixelDigiErrorsCUDA& operator=(SiPixelDigiErrorsCUDA&&) = default; + + const SiPixelFormatterErrors& formatterErrors() const { return formatterErrors_h; } + + SiPixelErrorCompactVector* error() { return error_d.get(); } + SiPixelErrorCompactVector const* error() const { return error_d.get(); } + + using HostDataError = std::pair>; + HostDataError dataErrorToHostAsync(cudaStream_t stream) const; + + void copyErrorToHostAsync(cudaStream_t stream); + +private: + cms::cuda::device::unique_ptr data_d; + cms::cuda::device::unique_ptr error_d; + cms::cuda::host::unique_ptr error_h; + SiPixelFormatterErrors formatterErrors_h; +}; + +#endif // CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h new file mode 100644 index 0000000000000..950f9651cf83b --- /dev/null +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h @@ -0,0 +1,85 @@ +#ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h +#define CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h + +#include + +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" + +class SiPixelDigisCUDA { +public: + SiPixelDigisCUDA() = default; + explicit SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream); + ~SiPixelDigisCUDA() = default; + + SiPixelDigisCUDA(const SiPixelDigisCUDA &) = delete; + SiPixelDigisCUDA &operator=(const SiPixelDigisCUDA &) = delete; + SiPixelDigisCUDA(SiPixelDigisCUDA &&) = default; + SiPixelDigisCUDA &operator=(SiPixelDigisCUDA &&) = default; + + void setNModulesDigis(uint32_t nModules, uint32_t nDigis) { + nModules_h = nModules; + nDigis_h = nDigis; + } + + uint32_t nModules() const { return nModules_h; } + uint32_t nDigis() const { return nDigis_h; } + + uint16_t *xx() { return xx_d.get(); } + uint16_t *yy() { return yy_d.get(); } + uint16_t *adc() { return adc_d.get(); } + uint16_t *moduleInd() { return moduleInd_d.get(); } + int32_t *clus() { return clus_d.get(); } + uint32_t *pdigi() { return pdigi_d.get(); } + uint32_t *rawIdArr() { return rawIdArr_d.get(); } + + uint16_t const *xx() const { return xx_d.get(); } + uint16_t const *yy() const { return yy_d.get(); } + uint16_t const *adc() const { return adc_d.get(); } + uint16_t const *moduleInd() const { return moduleInd_d.get(); } + int32_t const *clus() const { return clus_d.get(); } + uint32_t const *pdigi() const { return pdigi_d.get(); } + uint32_t const *rawIdArr() const { return rawIdArr_d.get(); } + + cms::cuda::host::unique_ptr adcToHostAsync(cudaStream_t stream) const; + cms::cuda::host::unique_ptr clusToHostAsync(cudaStream_t stream) const; + cms::cuda::host::unique_ptr pdigiToHostAsync(cudaStream_t stream) const; + cms::cuda::host::unique_ptr rawIdArrToHostAsync(cudaStream_t stream) const; + + class DeviceConstView { + public: + __device__ __forceinline__ uint16_t xx(int i) const { return __ldg(xx_ + i); } + __device__ __forceinline__ uint16_t yy(int i) const { return __ldg(yy_ + i); } + __device__ __forceinline__ uint16_t adc(int i) const { return __ldg(adc_ + i); } + __device__ __forceinline__ uint16_t moduleInd(int i) const { return __ldg(moduleInd_ + i); } + __device__ __forceinline__ int32_t clus(int i) const { return __ldg(clus_ + i); } + + uint16_t const *xx_; + uint16_t const *yy_; + uint16_t const *adc_; + uint16_t const *moduleInd_; + int32_t const *clus_; + }; + + const DeviceConstView *view() const { return view_d.get(); } + +private: + // These are consumed by downstream device code + cms::cuda::device::unique_ptr xx_d; // local coordinates of each pixel + cms::cuda::device::unique_ptr yy_d; // + cms::cuda::device::unique_ptr adc_d; // ADC of each pixel + cms::cuda::device::unique_ptr moduleInd_d; // module id of each pixel + cms::cuda::device::unique_ptr clus_d; // cluster id of each pixel + cms::cuda::device::unique_ptr view_d; // "me" pointer + + // These are for CPU output; should we (eventually) place them to a + // separate product? + cms::cuda::device::unique_ptr pdigi_d; // packed digi (row, col, adc) of each pixel + cms::cuda::device::unique_ptr rawIdArr_d; // DetId of each pixel + + uint32_t nModules_h = 0; + uint32_t nDigis_h = 0; +}; + +#endif // CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc new file mode 100644 index 0000000000000..eecea35ddd622 --- /dev/null +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc @@ -0,0 +1,40 @@ +#include + +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" +#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/memsetAsync.h" + +SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatterErrors errors, cudaStream_t stream) + : data_d(cms::cuda::make_device_unique(maxFedWords, stream)), + error_d(cms::cuda::make_device_unique(stream)), + error_h(cms::cuda::make_host_unique(stream)), + formatterErrors_h(std::move(errors)) { + cms::cuda::memsetAsync(data_d, 0x00, maxFedWords, stream); + + cms::cuda::make_SimpleVector(error_h.get(), maxFedWords, data_d.get()); + assert(error_h->empty()); + assert(error_h->capacity() == static_cast(maxFedWords)); + + cms::cuda::copyAsync(error_d, error_h, stream); +} + +void SiPixelDigiErrorsCUDA::copyErrorToHostAsync(cudaStream_t stream) { + cms::cuda::copyAsync(error_h, error_d, stream); +} + +SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync(cudaStream_t stream) const { + // On one hand size() could be sufficient. On the other hand, if + // someone copies the SimpleVector<>, (s)he might expect the data + // buffer to actually have space for capacity() elements. + auto data = cms::cuda::make_host_unique(error_h->capacity(), stream); + + // but transfer only the required amount + if (not error_h->empty()) { + cms::cuda::copyAsync(data, data_d, error_h->size(), stream); + } + auto err = *error_h; + err.set_data(data.get()); + return HostDataError(err, std::move(data)); +} diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc new file mode 100644 index 0000000000000..4e6a3fc2593fd --- /dev/null +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc @@ -0,0 +1,46 @@ +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" +#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" + +SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream) + : xx_d(cms::cuda::make_device_unique(maxFedWords, stream)), + yy_d(cms::cuda::make_device_unique(maxFedWords, stream)), + adc_d(cms::cuda::make_device_unique(maxFedWords, stream)), + moduleInd_d(cms::cuda::make_device_unique(maxFedWords, stream)), + clus_d(cms::cuda::make_device_unique(maxFedWords, stream)), + view_d(cms::cuda::make_device_unique(stream)), + pdigi_d(cms::cuda::make_device_unique(maxFedWords, stream)), + rawIdArr_d(cms::cuda::make_device_unique(maxFedWords, stream)) { + auto view = cms::cuda::make_host_unique(stream); + view->xx_ = xx_d.get(); + view->yy_ = yy_d.get(); + view->adc_ = adc_d.get(); + view->moduleInd_ = moduleInd_d.get(); + view->clus_ = clus_d.get(); + cms::cuda::copyAsync(view_d, view, stream); +} + +cms::cuda::host::unique_ptr SiPixelDigisCUDA::adcToHostAsync(cudaStream_t stream) const { + auto ret = cms::cuda::make_host_unique(nDigis(), stream); + cms::cuda::copyAsync(ret, adc_d, nDigis(), stream); + return ret; +} + +cms::cuda::host::unique_ptr SiPixelDigisCUDA::clusToHostAsync(cudaStream_t stream) const { + auto ret = cms::cuda::make_host_unique(nDigis(), stream); + cms::cuda::copyAsync(ret, clus_d, nDigis(), stream); + return ret; +} + +cms::cuda::host::unique_ptr SiPixelDigisCUDA::pdigiToHostAsync(cudaStream_t stream) const { + auto ret = cms::cuda::make_host_unique(nDigis(), stream); + cms::cuda::copyAsync(ret, pdigi_d, nDigis(), stream); + return ret; +} + +cms::cuda::host::unique_ptr SiPixelDigisCUDA::rawIdArrToHostAsync(cudaStream_t stream) const { + auto ret = cms::cuda::make_host_unique(nDigis(), stream); + cms::cuda::copyAsync(ret, rawIdArr_d, nDigis(), stream); + return ret; +} diff --git a/CUDADataFormats/SiPixelDigi/src/classes.h b/CUDADataFormats/SiPixelDigi/src/classes.h new file mode 100644 index 0000000000000..fc5d318fad688 --- /dev/null +++ b/CUDADataFormats/SiPixelDigi/src/classes.h @@ -0,0 +1,9 @@ +#ifndef CUDADataFormats_SiPixelDigi_src_classes_h +#define CUDADataFormats_SiPixelDigi_src_classes_h + +#include "CUDADataFormats/Common/interface/Product.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" +#include "DataFormats/Common/interface/Wrapper.h" + +#endif // CUDADataFormats_SiPixelDigi_src_classes_h diff --git a/CUDADataFormats/SiPixelDigi/src/classes_def.xml b/CUDADataFormats/SiPixelDigi/src/classes_def.xml new file mode 100644 index 0000000000000..ff775afdc2046 --- /dev/null +++ b/CUDADataFormats/SiPixelDigi/src/classes_def.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/CUDADataFormats/TrackingRecHit/BuildFile.xml b/CUDADataFormats/TrackingRecHit/BuildFile.xml new file mode 100644 index 0000000000000..8dc569d40b6c4 --- /dev/null +++ b/CUDADataFormats/TrackingRecHit/BuildFile.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h new file mode 100644 index 0000000000000..967b5c6c8282f --- /dev/null +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h @@ -0,0 +1,141 @@ +#ifndef CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h +#define CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h + +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h" +#include "CUDADataFormats/Common/interface/HeterogeneousSoA.h" + +template +class TrackingRecHit2DHeterogeneous { +public: + template + using unique_ptr = typename Traits::template unique_ptr; + + using PhiBinner = TrackingRecHit2DSOAView::PhiBinner; + + TrackingRecHit2DHeterogeneous() = default; + + explicit TrackingRecHit2DHeterogeneous(uint32_t nHits, + pixelCPEforGPU::ParamsOnGPU const* cpeParams, + uint32_t const* hitsModuleStart, + cudaStream_t stream); + + ~TrackingRecHit2DHeterogeneous() = default; + + TrackingRecHit2DHeterogeneous(const TrackingRecHit2DHeterogeneous&) = delete; + TrackingRecHit2DHeterogeneous& operator=(const TrackingRecHit2DHeterogeneous&) = delete; + TrackingRecHit2DHeterogeneous(TrackingRecHit2DHeterogeneous&&) = default; + TrackingRecHit2DHeterogeneous& operator=(TrackingRecHit2DHeterogeneous&&) = default; + + TrackingRecHit2DSOAView* view() { return m_view.get(); } + TrackingRecHit2DSOAView const* view() const { return m_view.get(); } + + auto nHits() const { return m_nHits; } + + auto hitsModuleStart() const { return m_hitsModuleStart; } + auto hitsLayerStart() { return m_hitsLayerStart; } + auto phiBinner() { return m_phiBinner; } + auto iphi() { return m_iphi; } + + // only the local coord and detector index + cms::cuda::host::unique_ptr localCoordToHostAsync(cudaStream_t stream) const; + cms::cuda::host::unique_ptr hitsModuleStartToHostAsync(cudaStream_t stream) const; + +private: + static constexpr uint32_t n16 = 4; // number of elements in m_store16 + static constexpr uint32_t n32 = 9; // number of elements in m_store32 + static_assert(sizeof(uint32_t) == sizeof(float)); // just stating the obvious + + unique_ptr m_store16; //! + unique_ptr m_store32; //! + + unique_ptr m_PhiBinnerStore; //! + unique_ptr m_AverageGeometryStore; //! + + unique_ptr m_view; //! + + uint32_t m_nHits; + + uint32_t const* m_hitsModuleStart; // needed for legacy, this is on GPU! + + // needed as kernel params... + PhiBinner* m_phiBinner; + uint32_t* m_hitsLayerStart; + int16_t* m_iphi; +}; + +#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +template +TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nHits, + pixelCPEforGPU::ParamsOnGPU const* cpeParams, + uint32_t const* hitsModuleStart, + cudaStream_t stream) + : m_nHits(nHits), m_hitsModuleStart(hitsModuleStart) { + auto view = Traits::template make_host_unique(stream); + + view->m_nHits = nHits; + m_view = Traits::template make_device_unique(stream); + m_AverageGeometryStore = Traits::template make_device_unique(stream); + view->m_averageGeometry = m_AverageGeometryStore.get(); + view->m_cpeParams = cpeParams; + view->m_hitsModuleStart = hitsModuleStart; + + // if empy do not bother + if (0 == nHits) { + if constexpr (std::is_same::value) { + cms::cuda::copyAsync(m_view, view, stream); + } else { + m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version + } + return; + } + + // the single arrays are not 128 bit alligned... + // the hits are actually accessed in order only in building + // if ordering is relevant they may have to be stored phi-ordered by layer or so + // this will break 1to1 correspondence with cluster and module locality + // so unless proven VERY inefficient we keep it ordered as generated + m_store16 = Traits::template make_device_unique(nHits * n16, stream); + m_store32 = Traits::template make_device_unique(nHits * n32 + 11, stream); + m_PhiBinnerStore = Traits::template make_device_unique(stream); + + auto get16 = [&](int i) { return m_store16.get() + i * nHits; }; + auto get32 = [&](int i) { return m_store32.get() + i * nHits; }; + + // copy all the pointers + m_phiBinner = view->m_phiBinner = m_PhiBinnerStore.get(); + + view->m_xl = get32(0); + view->m_yl = get32(1); + view->m_xerr = get32(2); + view->m_yerr = get32(3); + + view->m_xg = get32(4); + view->m_yg = get32(5); + view->m_zg = get32(6); + view->m_rg = get32(7); + + m_iphi = view->m_iphi = reinterpret_cast(get16(0)); + + view->m_charge = reinterpret_cast(get32(8)); + view->m_xsize = reinterpret_cast(get16(2)); + view->m_ysize = reinterpret_cast(get16(3)); + view->m_detInd = get16(1); + + m_hitsLayerStart = view->m_hitsLayerStart = reinterpret_cast(get32(n32)); + + // transfer view + if constexpr (std::is_same::value) { + cms::cuda::copyAsync(m_view, view, stream); + } else { + m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version + } +} + +using TrackingRecHit2DGPU = TrackingRecHit2DHeterogeneous; +using TrackingRecHit2DCUDA = TrackingRecHit2DHeterogeneous; +using TrackingRecHit2DCPU = TrackingRecHit2DHeterogeneous; +using TrackingRecHit2DHost = TrackingRecHit2DHeterogeneous; + +#endif // CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h new file mode 100644 index 0000000000000..7f3c59cd70faf --- /dev/null +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h @@ -0,0 +1,102 @@ +#ifndef CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DSOAView_h +#define CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DSOAView_h + +#include + +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" +#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" +#include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" + +namespace pixelCPEforGPU { + struct ParamsOnGPU; +} + +class TrackingRecHit2DSOAView { +public: + static constexpr uint32_t maxHits() { return gpuClustering::maxNumClusters; } + using hindex_type = uint32_t; // if above is <=2^32 + + using PhiBinner = + cms::cuda::HistoContainer; + + using AverageGeometry = phase1PixelTopology::AverageGeometry; + + template + friend class TrackingRecHit2DHeterogeneous; + + __device__ __forceinline__ uint32_t nHits() const { return m_nHits; } + + __device__ __forceinline__ float& xLocal(int i) { return m_xl[i]; } + __device__ __forceinline__ float xLocal(int i) const { return __ldg(m_xl + i); } + __device__ __forceinline__ float& yLocal(int i) { return m_yl[i]; } + __device__ __forceinline__ float yLocal(int i) const { return __ldg(m_yl + i); } + + __device__ __forceinline__ float& xerrLocal(int i) { return m_xerr[i]; } + __device__ __forceinline__ float xerrLocal(int i) const { return __ldg(m_xerr + i); } + __device__ __forceinline__ float& yerrLocal(int i) { return m_yerr[i]; } + __device__ __forceinline__ float yerrLocal(int i) const { return __ldg(m_yerr + i); } + + __device__ __forceinline__ float& xGlobal(int i) { return m_xg[i]; } + __device__ __forceinline__ float xGlobal(int i) const { return __ldg(m_xg + i); } + __device__ __forceinline__ float& yGlobal(int i) { return m_yg[i]; } + __device__ __forceinline__ float yGlobal(int i) const { return __ldg(m_yg + i); } + __device__ __forceinline__ float& zGlobal(int i) { return m_zg[i]; } + __device__ __forceinline__ float zGlobal(int i) const { return __ldg(m_zg + i); } + __device__ __forceinline__ float& rGlobal(int i) { return m_rg[i]; } + __device__ __forceinline__ float rGlobal(int i) const { return __ldg(m_rg + i); } + + __device__ __forceinline__ int16_t& iphi(int i) { return m_iphi[i]; } + __device__ __forceinline__ int16_t iphi(int i) const { return __ldg(m_iphi + i); } + + __device__ __forceinline__ int32_t& charge(int i) { return m_charge[i]; } + __device__ __forceinline__ int32_t charge(int i) const { return __ldg(m_charge + i); } + __device__ __forceinline__ int16_t& clusterSizeX(int i) { return m_xsize[i]; } + __device__ __forceinline__ int16_t clusterSizeX(int i) const { return __ldg(m_xsize + i); } + __device__ __forceinline__ int16_t& clusterSizeY(int i) { return m_ysize[i]; } + __device__ __forceinline__ int16_t clusterSizeY(int i) const { return __ldg(m_ysize + i); } + __device__ __forceinline__ uint16_t& detectorIndex(int i) { return m_detInd[i]; } + __device__ __forceinline__ uint16_t detectorIndex(int i) const { return __ldg(m_detInd + i); } + + __device__ __forceinline__ pixelCPEforGPU::ParamsOnGPU const& cpeParams() const { return *m_cpeParams; } + + __device__ __forceinline__ uint32_t hitsModuleStart(int i) const { return __ldg(m_hitsModuleStart + i); } + + __device__ __forceinline__ uint32_t* hitsLayerStart() { return m_hitsLayerStart; } + __device__ __forceinline__ uint32_t const* hitsLayerStart() const { return m_hitsLayerStart; } + + __device__ __forceinline__ PhiBinner& phiBinner() { return *m_phiBinner; } + __device__ __forceinline__ PhiBinner const& phiBinner() const { return *m_phiBinner; } + + __device__ __forceinline__ AverageGeometry& averageGeometry() { return *m_averageGeometry; } + __device__ __forceinline__ AverageGeometry const& averageGeometry() const { return *m_averageGeometry; } + +private: + // local coord + float *m_xl, *m_yl; + float *m_xerr, *m_yerr; + + // global coord + float *m_xg, *m_yg, *m_zg, *m_rg; + int16_t* m_iphi; + + // cluster properties + int32_t* m_charge; + int16_t* m_xsize; + int16_t* m_ysize; + uint16_t* m_detInd; + + // supporting objects + // m_averageGeometry is corrected for beam spot, not sure where to host it otherwise + AverageGeometry* m_averageGeometry; // owned by TrackingRecHit2DHeterogeneous + pixelCPEforGPU::ParamsOnGPU const* m_cpeParams; // forwarded from setup, NOT owned + uint32_t const* m_hitsModuleStart; // forwarded from clusters + + uint32_t* m_hitsLayerStart; + + PhiBinner* m_phiBinner; + + uint32_t m_nHits; +}; + +#endif // CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DSOAView_h diff --git a/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DHeterogeneous.cc b/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DHeterogeneous.cc new file mode 100644 index 0000000000000..dd3cf548e11dd --- /dev/null +++ b/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DHeterogeneous.cc @@ -0,0 +1,20 @@ +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" +#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" + +template <> +cms::cuda::host::unique_ptr TrackingRecHit2DCUDA::localCoordToHostAsync(cudaStream_t stream) const { + auto ret = cms::cuda::make_host_unique(4 * nHits(), stream); + cms::cuda::copyAsync(ret, m_store32, 4 * nHits(), stream); + return ret; +} + +template <> +cms::cuda::host::unique_ptr TrackingRecHit2DCUDA::hitsModuleStartToHostAsync(cudaStream_t stream) const { + auto ret = cms::cuda::make_host_unique(gpuClustering::maxNumModules + 1, stream); + cudaCheck(cudaMemcpyAsync( + ret.get(), m_hitsModuleStart, sizeof(uint32_t) * (gpuClustering::maxNumModules + 1), cudaMemcpyDefault, stream)); + return ret; +} diff --git a/CUDADataFormats/TrackingRecHit/src/classes.h b/CUDADataFormats/TrackingRecHit/src/classes.h new file mode 100644 index 0000000000000..86fef25746efd --- /dev/null +++ b/CUDADataFormats/TrackingRecHit/src/classes.h @@ -0,0 +1,8 @@ +#ifndef CUDADataFormats_SiPixelCluster_src_classes_h +#define CUDADataFormats_SiPixelCluster_src_classes_h + +#include "CUDADataFormats/Common/interface/Product.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" +#include "DataFormats/Common/interface/Wrapper.h" + +#endif // CUDADataFormats_SiPixelCluster_src_classes_h diff --git a/CUDADataFormats/TrackingRecHit/src/classes_def.xml b/CUDADataFormats/TrackingRecHit/src/classes_def.xml new file mode 100644 index 0000000000000..7e1919de510b3 --- /dev/null +++ b/CUDADataFormats/TrackingRecHit/src/classes_def.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/CUDADataFormats/TrackingRecHit/test/BuildFile.xml b/CUDADataFormats/TrackingRecHit/test/BuildFile.xml new file mode 100644 index 0000000000000..74f2818790d0f --- /dev/null +++ b/CUDADataFormats/TrackingRecHit/test/BuildFile.xml @@ -0,0 +1,3 @@ + + + diff --git a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp new file mode 100644 index 0000000000000..3d8413b36ec96 --- /dev/null +++ b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp @@ -0,0 +1,29 @@ +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" +#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" +#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +namespace testTrackingRecHit2D { + + void runKernels(TrackingRecHit2DSOAView* hits); + +} + +int main() { + cms::cudatest::requireDevices(); + + cudaStream_t stream; + cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); + + // inner scope to deallocate memory before destroying the stream + { + auto nHits = 200; + TrackingRecHit2DCUDA tkhit(nHits, nullptr, nullptr, stream); + + testTrackingRecHit2D::runKernels(tkhit.view()); + } + + cudaCheck(cudaStreamDestroy(stream)); + + return 0; +} diff --git a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cu b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cu new file mode 100644 index 0000000000000..06bd599d074f9 --- /dev/null +++ b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cu @@ -0,0 +1,31 @@ +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" + +namespace testTrackingRecHit2D { + + __global__ void fill(TrackingRecHit2DSOAView* phits) { + assert(phits); + auto& hits = *phits; + assert(hits.nHits() == 200); + + int i = threadIdx.x; + if (i > 200) + return; + } + + __global__ void verify(TrackingRecHit2DSOAView const* phits) { + assert(phits); + auto const& hits = *phits; + assert(hits.nHits() == 200); + + int i = threadIdx.x; + if (i > 200) + return; + } + + void runKernels(TrackingRecHit2DSOAView* hits) { + assert(hits); + fill<<<1, 1024>>>(hits); + verify<<<1, 1024>>>(hits); + } + +} // namespace testTrackingRecHit2D diff --git a/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h b/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h new file mode 100644 index 0000000000000..ad8f0a4032588 --- /dev/null +++ b/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h @@ -0,0 +1,14 @@ +#ifndef CalibTracker_Records_SiPixelGainCalibrationForHLTGPURcd_h +#define CalibTracker_Records_SiPixelGainCalibrationForHLTGPURcd_h + +#include "CondFormats/DataRecord/interface/SiPixelGainCalibrationForHLTRcd.h" +#include "FWCore/Framework/interface/DependentRecordImplementation.h" +#include "FWCore/Framework/interface/EventSetupRecordImplementation.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" + +class SiPixelGainCalibrationForHLTGPURcd + : public edm::eventsetup::DependentRecordImplementation< + SiPixelGainCalibrationForHLTGPURcd, + edm::mpl::Vector> {}; + +#endif // CalibTracker_Records_SiPixelGainCalibrationForHLTGPURcd_h diff --git a/CalibTracker/Records/src/SiPixelGainCalibrationForHLTGPURcd.cc b/CalibTracker/Records/src/SiPixelGainCalibrationForHLTGPURcd.cc new file mode 100644 index 0000000000000..e6020eca80b1f --- /dev/null +++ b/CalibTracker/Records/src/SiPixelGainCalibrationForHLTGPURcd.cc @@ -0,0 +1,5 @@ +#include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h" +#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" +#include "FWCore/Utilities/interface/typelookup.h" + +EVENTSETUP_RECORD_REG(SiPixelGainCalibrationForHLTGPURcd); diff --git a/CalibTracker/SiPixelESProducers/BuildFile.xml b/CalibTracker/SiPixelESProducers/BuildFile.xml index 6efeef5ca0d1c..6e64a5b4b94ee 100644 --- a/CalibTracker/SiPixelESProducers/BuildFile.xml +++ b/CalibTracker/SiPixelESProducers/BuildFile.xml @@ -1,10 +1,15 @@ - - - + + + + + + + + diff --git a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h new file mode 100644 index 0000000000000..aea0f4d8b0c63 --- /dev/null +++ b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h @@ -0,0 +1,32 @@ +#ifndef CalibTracker_SiPixelESProducers_interface_SiPixelGainCalibrationForHLTGPU_h +#define CalibTracker_SiPixelESProducers_interface_SiPixelGainCalibrationForHLTGPU_h + +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" + +class SiPixelGainCalibrationForHLT; +class SiPixelGainForHLTonGPU; +struct SiPixelGainForHLTonGPU_DecodingStructure; +class TrackerGeometry; + +class SiPixelGainCalibrationForHLTGPU { +public: + explicit SiPixelGainCalibrationForHLTGPU(const SiPixelGainCalibrationForHLT &gains, const TrackerGeometry &geom); + ~SiPixelGainCalibrationForHLTGPU(); + + const SiPixelGainForHLTonGPU *getGPUProductAsync(cudaStream_t cudaStream) const; + const SiPixelGainForHLTonGPU *cpuProduct() const { return gainForHLTonHost_; } + const SiPixelGainCalibrationForHLT *originalProduct() { return gains_; } + +private: + const SiPixelGainCalibrationForHLT *gains_ = nullptr; + SiPixelGainForHLTonGPU *gainForHLTonHost_ = nullptr; + struct GPUData { + ~GPUData(); + SiPixelGainForHLTonGPU *gainForHLTonGPU = nullptr; + SiPixelGainForHLTonGPU_DecodingStructure *gainDataOnGPU = nullptr; + }; + cms::cuda::ESProduct gpuData_; +}; + +#endif // CalibTracker_SiPixelESProducers_interface_SiPixelGainCalibrationForHLTGPU_h diff --git a/CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h b/CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h new file mode 100644 index 0000000000000..f7555a75d9bec --- /dev/null +++ b/CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h @@ -0,0 +1,55 @@ +#ifndef CalibTracker_SiPixelESProducers_interface_SiPixelROCsStatusAndMappingWrapper_h +#define CalibTracker_SiPixelESProducers_interface_SiPixelROCsStatusAndMappingWrapper_h + +#include + +#include + +#include "CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" + +class SiPixelFedCablingMap; +class TrackerGeometry; +class SiPixelQuality; + +class SiPixelROCsStatusAndMappingWrapper { +public: + SiPixelROCsStatusAndMappingWrapper(SiPixelFedCablingMap const &cablingMap, + TrackerGeometry const &trackerGeom, + SiPixelQuality const *badPixelInfo); + ~SiPixelROCsStatusAndMappingWrapper(); + + bool hasQuality() const { return hasQuality_; } + + // returns pointer to GPU memory + const SiPixelROCsStatusAndMapping *getGPUProductAsync(cudaStream_t cudaStream) const; + + // returns pointer to GPU memory + const unsigned char *getModToUnpAllAsync(cudaStream_t cudaStream) const; + cms::cuda::device::unique_ptr getModToUnpRegionalAsync(std::set const &modules, + cudaStream_t cudaStream) const; + +private: + const SiPixelFedCablingMap *cablingMap_; + std::vector> modToUnpDefault; + unsigned int size; + bool hasQuality_; + + SiPixelROCsStatusAndMapping *cablingMapHost = nullptr; // pointer to struct in CPU + + struct GPUData { + ~GPUData(); + SiPixelROCsStatusAndMapping *cablingMapDevice = nullptr; // pointer to struct in GPU + }; + cms::cuda::ESProduct gpuData_; + + struct ModulesToUnpack { + ~ModulesToUnpack(); + unsigned char *modToUnpDefault = nullptr; // pointer to GPU + }; + cms::cuda::ESProduct modToUnp_; +}; + +#endif // CalibTracker_SiPixelESProducers_interface_SiPixelROCsStatusAndMappingWrapper_h diff --git a/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml b/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml index 5380c9d7d346b..4bef676217b72 100644 --- a/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml +++ b/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml @@ -1,12 +1,14 @@ - - + + + + + - - + diff --git a/CalibTracker/SiPixelESProducers/plugins/SiPixelGainCalibrationForHLTGPUESProducer.cc b/CalibTracker/SiPixelESProducers/plugins/SiPixelGainCalibrationForHLTGPUESProducer.cc new file mode 100644 index 0000000000000..37055ea3e00ca --- /dev/null +++ b/CalibTracker/SiPixelESProducers/plugins/SiPixelGainCalibrationForHLTGPUESProducer.cc @@ -0,0 +1,49 @@ +#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h" +#include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h" +#include "CondFormats/DataRecord/interface/SiPixelGainCalibrationForHLTRcd.h" +#include "FWCore/Framework/interface/ESProducer.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/ModuleFactory.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" + +#include + +class SiPixelGainCalibrationForHLTGPUESProducer : public edm::ESProducer { +public: + explicit SiPixelGainCalibrationForHLTGPUESProducer(const edm::ParameterSet& iConfig); + std::unique_ptr produce(const SiPixelGainCalibrationForHLTGPURcd& iRecord); + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + edm::ESGetToken gainsToken_; + edm::ESGetToken geometryToken_; +}; + +SiPixelGainCalibrationForHLTGPUESProducer::SiPixelGainCalibrationForHLTGPUESProducer(const edm::ParameterSet& iConfig) { + auto cc = setWhatProduced(this); + gainsToken_ = cc.consumes(); + geometryToken_ = cc.consumes(); +} + +void SiPixelGainCalibrationForHLTGPUESProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + descriptions.add("siPixelGainCalibrationForHLTGPU", desc); +} + +std::unique_ptr SiPixelGainCalibrationForHLTGPUESProducer::produce( + const SiPixelGainCalibrationForHLTGPURcd& iRecord) { + auto gains = iRecord.getHandle(gainsToken_); + auto geom = iRecord.getHandle(geometryToken_); + return std::make_unique(*gains, *geom); +} + +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Utilities/interface/typelookup.h" +#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" + +DEFINE_FWK_EVENTSETUP_MODULE(SiPixelGainCalibrationForHLTGPUESProducer); diff --git a/CalibTracker/SiPixelESProducers/plugins/SiPixelROCsStatusAndMappingWrapperESProducer.cc b/CalibTracker/SiPixelESProducers/plugins/SiPixelROCsStatusAndMappingWrapperESProducer.cc new file mode 100644 index 0000000000000..9c37860ca9ffe --- /dev/null +++ b/CalibTracker/SiPixelESProducers/plugins/SiPixelROCsStatusAndMappingWrapperESProducer.cc @@ -0,0 +1,68 @@ +#include + +#include "CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h" +#include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" +#include "CondFormats/DataRecord/interface/SiPixelQualityRcd.h" +#include "FWCore/Framework/interface/ESProducer.h" +#include "FWCore/Framework/interface/ESTransientHandle.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/ModuleFactory.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "RecoTracker/Record/interface/CkfComponentsRecord.h" // TODO: eventually use something more limited + +class SiPixelROCsStatusAndMappingWrapperESProducer : public edm::ESProducer { +public: + explicit SiPixelROCsStatusAndMappingWrapperESProducer(const edm::ParameterSet& iConfig); + std::unique_ptr produce(const CkfComponentsRecord& iRecord); + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + edm::ESGetToken cablingMapToken_; + edm::ESGetToken qualityToken_; + edm::ESGetToken geometryToken_; + bool useQuality_; +}; + +SiPixelROCsStatusAndMappingWrapperESProducer::SiPixelROCsStatusAndMappingWrapperESProducer( + const edm::ParameterSet& iConfig) + : useQuality_(iConfig.getParameter("UseQualityInfo")) { + auto const& component = iConfig.getParameter("ComponentName"); + auto cc = setWhatProduced(this, component); + cablingMapToken_ = cc.consumes(edm::ESInputTag{"", iConfig.getParameter("CablingMapLabel")}); + if (useQuality_) { + qualityToken_ = cc.consumes(); + } + geometryToken_ = cc.consumes(); +} + +void SiPixelROCsStatusAndMappingWrapperESProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("ComponentName", ""); + desc.add("CablingMapLabel", "")->setComment("CablingMap label"); + desc.add("UseQualityInfo", false); + descriptions.addWithDefaultLabel(desc); +} + +std::unique_ptr SiPixelROCsStatusAndMappingWrapperESProducer::produce( + const CkfComponentsRecord& iRecord) { + auto cablingMap = iRecord.getTransientHandle(cablingMapToken_); + + const SiPixelQuality* quality = nullptr; + if (useQuality_) { + auto qualityInfo = iRecord.getTransientHandle(qualityToken_); + quality = qualityInfo.product(); + } + + auto geom = iRecord.getTransientHandle(geometryToken_); + + return std::make_unique(*cablingMap, *geom, quality); +} + +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Utilities/interface/typelookup.h" +#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" + +DEFINE_FWK_EVENTSETUP_MODULE(SiPixelROCsStatusAndMappingWrapperESProducer); diff --git a/CalibTracker/SiPixelESProducers/src/ES_SiPixelGainCalibrationForHLTGPU.cc b/CalibTracker/SiPixelESProducers/src/ES_SiPixelGainCalibrationForHLTGPU.cc new file mode 100644 index 0000000000000..80932fb468f71 --- /dev/null +++ b/CalibTracker/SiPixelESProducers/src/ES_SiPixelGainCalibrationForHLTGPU.cc @@ -0,0 +1,4 @@ +#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h" +#include "FWCore/Utilities/interface/typelookup.h" + +TYPELOOKUP_DATA_REG(SiPixelGainCalibrationForHLTGPU); diff --git a/CalibTracker/SiPixelESProducers/src/ES_SiPixelROCsStatusAndMappingWrapper.cc b/CalibTracker/SiPixelESProducers/src/ES_SiPixelROCsStatusAndMappingWrapper.cc new file mode 100644 index 0000000000000..45767102b5958 --- /dev/null +++ b/CalibTracker/SiPixelESProducers/src/ES_SiPixelROCsStatusAndMappingWrapper.cc @@ -0,0 +1,4 @@ +#include "CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h" +#include "FWCore/Utilities/interface/typelookup.h" + +TYPELOOKUP_DATA_REG(SiPixelROCsStatusAndMappingWrapper); diff --git a/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc b/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc new file mode 100644 index 0000000000000..66b8d9594353b --- /dev/null +++ b/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc @@ -0,0 +1,101 @@ +#include + +#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "Geometry/CommonDetUnit/interface/GeomDetType.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU(const SiPixelGainCalibrationForHLT& gains, + const TrackerGeometry& geom) + : gains_(&gains) { + // bizzarre logic (looking for fist strip-det) don't ask + auto const& dus = geom.detUnits(); + unsigned int n_detectors = dus.size(); + for (unsigned int i = 1; i < 7; ++i) { + const auto offset = geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]); + if (offset != dus.size() && dus[offset]->type().isTrackerStrip()) { + if (n_detectors > offset) + n_detectors = offset; + } + } + + LogDebug("SiPixelGainCalibrationForHLTGPU") + << "caching calibs for " << n_detectors << " pixel detectors of size " << gains.data().size() << '\n' + << "sizes " << sizeof(char) << ' ' << sizeof(uint8_t) << ' ' << sizeof(SiPixelGainForHLTonGPU::DecodingStructure); + + cudaCheck(cudaMallocHost((void**)&gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU))); + gainForHLTonHost_->v_pedestals_ = + (SiPixelGainForHLTonGPU_DecodingStructure*)this->gains_->data().data(); // so it can be used on CPU as well... + + // do not read back from the (possibly write-combined) memory buffer + auto minPed = gains.getPedLow(); + auto maxPed = gains.getPedHigh(); + auto minGain = gains.getGainLow(); + auto maxGain = gains.getGainHigh(); + auto nBinsToUseForEncoding = 253; + + // we will simplify later (not everything is needed....) + gainForHLTonHost_->minPed_ = minPed; + gainForHLTonHost_->maxPed_ = maxPed; + gainForHLTonHost_->minGain_ = minGain; + gainForHLTonHost_->maxGain_ = maxGain; + + gainForHLTonHost_->numberOfRowsAveragedOver_ = 80; + gainForHLTonHost_->nBinsToUseForEncoding_ = nBinsToUseForEncoding; + gainForHLTonHost_->deadFlag_ = 255; + gainForHLTonHost_->noisyFlag_ = 254; + + gainForHLTonHost_->pedPrecision_ = static_cast(maxPed - minPed) / nBinsToUseForEncoding; + gainForHLTonHost_->gainPrecision_ = static_cast(maxGain - minGain) / nBinsToUseForEncoding; + + LogDebug("SiPixelGainCalibrationForHLTGPU") + << "precisions g " << gainForHLTonHost_->pedPrecision_ << ' ' << gainForHLTonHost_->gainPrecision_; + + // fill the index map + auto const& ind = gains.getIndexes(); + LogDebug("SiPixelGainCalibrationForHLTGPU") << ind.size() << " " << n_detectors; + + for (auto i = 0U; i < n_detectors; ++i) { + auto p = std::lower_bound( + ind.begin(), ind.end(), dus[i]->geographicalId().rawId(), SiPixelGainCalibrationForHLT::StrictWeakOrdering()); + assert(p != ind.end() && p->detid == dus[i]->geographicalId()); + assert(p->iend <= gains.data().size()); + assert(p->iend >= p->ibegin); + assert(0 == p->ibegin % 2); + assert(0 == p->iend % 2); + assert(p->ibegin != p->iend); + assert(p->ncols > 0); + gainForHLTonHost_->rangeAndCols_[i] = std::make_pair(SiPixelGainForHLTonGPU::Range(p->ibegin, p->iend), p->ncols); + if (ind[i].detid != dus[i]->geographicalId()) + LogDebug("SiPixelGainCalibrationForHLTGPU") << ind[i].detid << "!=" << dus[i]->geographicalId(); + } +} + +SiPixelGainCalibrationForHLTGPU::~SiPixelGainCalibrationForHLTGPU() { cudaCheck(cudaFreeHost(gainForHLTonHost_)); } + +SiPixelGainCalibrationForHLTGPU::GPUData::~GPUData() { + cudaCheck(cudaFree(gainForHLTonGPU)); + cudaCheck(cudaFree(gainDataOnGPU)); +} + +const SiPixelGainForHLTonGPU* SiPixelGainCalibrationForHLTGPU::getGPUProductAsync(cudaStream_t cudaStream) const { + const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cudaStream_t stream) { + cudaCheck(cudaMalloc((void**)&data.gainForHLTonGPU, sizeof(SiPixelGainForHLTonGPU))); + cudaCheck(cudaMalloc((void**)&data.gainDataOnGPU, this->gains_->data().size())); + // gains.data().data() is used also for non-GPU code, we cannot allocate it on aligned and write-combined memory + cudaCheck(cudaMemcpyAsync( + data.gainDataOnGPU, this->gains_->data().data(), this->gains_->data().size(), cudaMemcpyDefault, stream)); + + cudaCheck(cudaMemcpyAsync( + data.gainForHLTonGPU, this->gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU), cudaMemcpyDefault, stream)); + cudaCheck(cudaMemcpyAsync(&(data.gainForHLTonGPU->v_pedestals_), + &(data.gainDataOnGPU), + sizeof(SiPixelGainForHLTonGPU_DecodingStructure*), + cudaMemcpyDefault, + stream)); + }); + return data.gainForHLTonGPU; +} diff --git a/CalibTracker/SiPixelESProducers/src/SiPixelROCsStatusAndMappingWrapper.cc b/CalibTracker/SiPixelESProducers/src/SiPixelROCsStatusAndMappingWrapper.cc new file mode 100644 index 0000000000000..665d31b97ead2 --- /dev/null +++ b/CalibTracker/SiPixelESProducers/src/SiPixelROCsStatusAndMappingWrapper.cc @@ -0,0 +1,171 @@ +// C++ includes +#include +#include +#include +#include + +// CUDA includes +#include + +// CMSSW includes +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" +#include "CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelQuality.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "Geometry/CommonDetUnit/interface/GeomDetType.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" + +SiPixelROCsStatusAndMappingWrapper::SiPixelROCsStatusAndMappingWrapper(SiPixelFedCablingMap const& cablingMap, + TrackerGeometry const& trackerGeom, + SiPixelQuality const* badPixelInfo) + : cablingMap_(&cablingMap), modToUnpDefault(pixelgpudetails::MAX_SIZE), hasQuality_(badPixelInfo != nullptr) { + cudaCheck(cudaMallocHost(&cablingMapHost, sizeof(SiPixelROCsStatusAndMapping))); + + std::vector const& fedIds = cablingMap.fedIds(); + std::unique_ptr const& cabling = cablingMap.cablingTree(); + + unsigned int startFed = *(fedIds.begin()); + unsigned int endFed = *(fedIds.end() - 1); + + sipixelobjects::CablingPathToDetUnit path; + int index = 1; + + for (unsigned int fed = startFed; fed <= endFed; fed++) { + for (unsigned int link = 1; link <= pixelgpudetails::MAX_LINK; link++) { + for (unsigned int roc = 1; roc <= pixelgpudetails::MAX_ROC; roc++) { + path = {fed, link, roc}; + const sipixelobjects::PixelROC* pixelRoc = cabling->findItem(path); + cablingMapHost->fed[index] = fed; + cablingMapHost->link[index] = link; + cablingMapHost->roc[index] = roc; + if (pixelRoc != nullptr) { + cablingMapHost->rawId[index] = pixelRoc->rawId(); + cablingMapHost->rocInDet[index] = pixelRoc->idInDetUnit(); + modToUnpDefault[index] = false; + if (badPixelInfo != nullptr) + cablingMapHost->badRocs[index] = badPixelInfo->IsRocBad(pixelRoc->rawId(), pixelRoc->idInDetUnit()); + else + cablingMapHost->badRocs[index] = false; + } else { // store some dummy number + cablingMapHost->rawId[index] = gpuClustering::invalidModuleId; + cablingMapHost->rocInDet[index] = gpuClustering::invalidModuleId; + cablingMapHost->badRocs[index] = true; + modToUnpDefault[index] = true; + } + index++; + } + } + } // end of FED loop + + // Given FedId, Link and idinLnk; use the following formula + // to get the rawId and idinDU + // index = (FedID-1200) * MAX_LINK* MAX_ROC + (Link-1)* MAX_ROC + idinLnk; + // where, MAX_LINK = 48, MAX_ROC = 8 for Phase1 as mentioned Danek's email + // FedID varies between 1200 to 1338 (In total 108 FED's) + // Link varies between 1 to 48 + // idinLnk varies between 1 to 8 + + for (int i = 1; i < index; i++) { + if (cablingMapHost->rawId[i] == gpuClustering::invalidModuleId) { + cablingMapHost->moduleId[i] = gpuClustering::invalidModuleId; + } else { + /* + std::cout << cablingMapHost->rawId[i] << std::endl; + */ + auto gdet = trackerGeom.idToDetUnit(cablingMapHost->rawId[i]); + if (!gdet) { + LogDebug("SiPixelROCsStatusAndMapping") << " Not found: " << cablingMapHost->rawId[i] << std::endl; + continue; + } + cablingMapHost->moduleId[i] = gdet->index(); + } + LogDebug("SiPixelROCsStatusAndMapping") + << "----------------------------------------------------------------------------" << std::endl; + LogDebug("SiPixelROCsStatusAndMapping") + << i << std::setw(20) << cablingMapHost->fed[i] << std::setw(20) << cablingMapHost->link[i] << std::setw(20) + << cablingMapHost->roc[i] << std::endl; + LogDebug("SiPixelROCsStatusAndMapping") + << i << std::setw(20) << cablingMapHost->rawId[i] << std::setw(20) << cablingMapHost->rocInDet[i] + << std::setw(20) << cablingMapHost->moduleId[i] << std::endl; + LogDebug("SiPixelROCsStatusAndMapping") + << i << std::setw(20) << (bool)cablingMapHost->badRocs[i] << std::setw(20) << std::endl; + LogDebug("SiPixelROCsStatusAndMapping") + << "----------------------------------------------------------------------------" << std::endl; + } + + cablingMapHost->size = index - 1; +} + +SiPixelROCsStatusAndMappingWrapper::~SiPixelROCsStatusAndMappingWrapper() { cudaCheck(cudaFreeHost(cablingMapHost)); } + +const SiPixelROCsStatusAndMapping* SiPixelROCsStatusAndMappingWrapper::getGPUProductAsync( + cudaStream_t cudaStream) const { + const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cudaStream_t stream) { + // allocate + cudaCheck(cudaMalloc(&data.cablingMapDevice, sizeof(SiPixelROCsStatusAndMapping))); + + // transfer + cudaCheck(cudaMemcpyAsync( + data.cablingMapDevice, this->cablingMapHost, sizeof(SiPixelROCsStatusAndMapping), cudaMemcpyDefault, stream)); + }); + return data.cablingMapDevice; +} + +const unsigned char* SiPixelROCsStatusAndMappingWrapper::getModToUnpAllAsync(cudaStream_t cudaStream) const { + const auto& data = + modToUnp_.dataForCurrentDeviceAsync(cudaStream, [this](ModulesToUnpack& data, cudaStream_t stream) { + cudaCheck(cudaMalloc((void**)&data.modToUnpDefault, pixelgpudetails::MAX_SIZE_BYTE_BOOL)); + cudaCheck(cudaMemcpyAsync(data.modToUnpDefault, + this->modToUnpDefault.data(), + this->modToUnpDefault.size() * sizeof(unsigned char), + cudaMemcpyDefault, + stream)); + }); + return data.modToUnpDefault; +} + +cms::cuda::device::unique_ptr SiPixelROCsStatusAndMappingWrapper::getModToUnpRegionalAsync( + std::set const& modules, cudaStream_t cudaStream) const { + auto modToUnpDevice = cms::cuda::make_device_unique(pixelgpudetails::MAX_SIZE, cudaStream); + auto modToUnpHost = cms::cuda::make_host_unique(pixelgpudetails::MAX_SIZE, cudaStream); + + std::vector const& fedIds = cablingMap_->fedIds(); + std::unique_ptr const& cabling = cablingMap_->cablingTree(); + + unsigned int startFed = *(fedIds.begin()); + unsigned int endFed = *(fedIds.end() - 1); + + sipixelobjects::CablingPathToDetUnit path; + int index = 1; + + for (unsigned int fed = startFed; fed <= endFed; fed++) { + for (unsigned int link = 1; link <= pixelgpudetails::MAX_LINK; link++) { + for (unsigned int roc = 1; roc <= pixelgpudetails::MAX_ROC; roc++) { + path = {fed, link, roc}; + const sipixelobjects::PixelROC* pixelRoc = cabling->findItem(path); + if (pixelRoc != nullptr) { + modToUnpHost[index] = (not modules.empty()) and (modules.find(pixelRoc->rawId()) == modules.end()); + } else { // store some dummy number + modToUnpHost[index] = true; + } + index++; + } + } + } + + cudaCheck(cudaMemcpyAsync(modToUnpDevice.get(), + modToUnpHost.get(), + pixelgpudetails::MAX_SIZE * sizeof(unsigned char), + cudaMemcpyHostToDevice, + cudaStream)); + return modToUnpDevice; +} + +SiPixelROCsStatusAndMappingWrapper::GPUData::~GPUData() { cudaCheck(cudaFree(cablingMapDevice)); } + +SiPixelROCsStatusAndMappingWrapper::ModulesToUnpack::~ModulesToUnpack() { cudaCheck(cudaFree(modToUnpDefault)); } diff --git a/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCRandom_cff.py b/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCRandom_cff.py index 8185c8cfbb089..149eb1d7f3854 100644 --- a/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCRandom_cff.py +++ b/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCRandom_cff.py @@ -10,12 +10,14 @@ ) from EventFilter.SiPixelRawToDigi.SiPixelRawToDigi_cfi import siPixelDigis -siPixelDigisForLumiR = siPixelDigis.clone() -siPixelDigisForLumiR.InputLabel = cms.InputTag("hltFEDSelectorLumiPixels") +siPixelDigisForLumiR = siPixelDigis.cpu.clone( + InputLabel = "hltFEDSelectorLumiPixels" +) from RecoLocalTracker.SiPixelClusterizer.SiPixelClusterizerPreSplitting_cfi import siPixelClustersPreSplitting -siPixelClustersForLumiR = siPixelClustersPreSplitting.clone() -siPixelClustersForLumiR.src = cms.InputTag("siPixelDigisForLumiR") +siPixelClustersForLumiR = siPixelClustersPreSplitting.cpu.clone( + src = "siPixelDigisForLumiR" +) from Calibration.LumiAlCaRecoProducers.alcaPCCProducer_cfi import alcaPCCProducer alcaPCCProducerRandom = alcaPCCProducer.clone() diff --git a/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCZeroBias_cff.py b/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCZeroBias_cff.py index 0ef9e074cc817..cce52734afeb0 100644 --- a/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCZeroBias_cff.py +++ b/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCZeroBias_cff.py @@ -10,12 +10,14 @@ ) from EventFilter.SiPixelRawToDigi.SiPixelRawToDigi_cfi import siPixelDigis -siPixelDigisForLumiZB = siPixelDigis.clone() -siPixelDigisForLumiZB.InputLabel = cms.InputTag("hltFEDSelectorLumiPixels") +siPixelDigisForLumiZB = siPixelDigis.cpu.clone( + InputLabel = "hltFEDSelectorLumiPixels" +) from RecoLocalTracker.SiPixelClusterizer.SiPixelClusterizerPreSplitting_cfi import siPixelClustersPreSplitting -siPixelClustersForLumiZB = siPixelClustersPreSplitting.clone() -siPixelClustersForLumiZB.src = cms.InputTag("siPixelDigisForLumiZB") +siPixelClustersForLumiZB = siPixelClustersPreSplitting.cpu.clone( + src = "siPixelDigisForLumiZB" +) from Calibration.LumiAlCaRecoProducers.alcaPCCProducer_cfi import alcaPCCProducer alcaPCCProducerZeroBias = alcaPCCProducer.clone() diff --git a/Calibration/LumiAlCaRecoProducers/python/ALCARECOLumiPixels_cff.py b/Calibration/LumiAlCaRecoProducers/python/ALCARECOLumiPixels_cff.py index d88e1d4c27506..25c7e5e60cb26 100644 --- a/Calibration/LumiAlCaRecoProducers/python/ALCARECOLumiPixels_cff.py +++ b/Calibration/LumiAlCaRecoProducers/python/ALCARECOLumiPixels_cff.py @@ -9,12 +9,14 @@ ) from EventFilter.SiPixelRawToDigi.SiPixelRawToDigi_cfi import siPixelDigis -siPixelDigisForLumi = siPixelDigis.clone() -siPixelDigisForLumi.InputLabel = cms.InputTag("hltFEDSelectorLumiPixels") +siPixelDigisForLumi = siPixelDigis.cpu.clone( + InputLabel = "hltFEDSelectorLumiPixels" +) from RecoLocalTracker.SiPixelClusterizer.SiPixelClusterizerPreSplitting_cfi import siPixelClustersPreSplitting -siPixelClustersForLumi = siPixelClustersPreSplitting.clone() -siPixelClustersForLumi.src = cms.InputTag("siPixelDigisForLumi") +siPixelClustersForLumi = siPixelClustersPreSplitting.cpu.clone( + src = "siPixelDigisForLumi" +) # Sequence # seqALCARECOLumiPixels = cms.Sequence(ALCARECOLumiPixelsHLT + siPixelDigisForLumi + siPixelClustersForLumi) diff --git a/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h b/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h new file mode 100644 index 0000000000000..aa5a127927b90 --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h @@ -0,0 +1,74 @@ +#ifndef CondFormats_SiPixelObjects_interface_SiPixelGainForHLTonGPU_h +#define CondFormats_SiPixelObjects_interface_SiPixelGainForHLTonGPU_h + +#include +#include +#include + +// including would pull in the dependency on all of CUDA; +// instead, just define away the CUDA specific attributes to keep GCC happy. +#ifndef __CUDACC__ +#ifndef __host__ +#define __host__ +#endif // __host__ +#ifndef __device__ +#define __device__ +#endif // __device__ +#endif // __CUDACC__ + +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" + +struct SiPixelGainForHLTonGPU_DecodingStructure { + uint8_t gain; + uint8_t ped; +}; + +// copy of SiPixelGainCalibrationForHLT +class SiPixelGainForHLTonGPU { +public: + using DecodingStructure = SiPixelGainForHLTonGPU_DecodingStructure; + + using Range = std::pair; + + inline __host__ __device__ std::pair getPedAndGain( + uint32_t moduleInd, int col, int row, bool& isDeadColumn, bool& isNoisyColumn) const { + auto range = rangeAndCols_[moduleInd].first; + auto nCols = rangeAndCols_[moduleInd].second; + + // determine what averaged data block we are in (there should be 1 or 2 of these depending on if plaquette is 1 by X or 2 by X + unsigned int lengthOfColumnData = (range.second - range.first) / nCols; + unsigned int lengthOfAveragedDataInEachColumn = 2; // we always only have two values per column averaged block + unsigned int numberOfDataBlocksToSkip = row / numberOfRowsAveragedOver_; + + auto offset = range.first + col * lengthOfColumnData + lengthOfAveragedDataInEachColumn * numberOfDataBlocksToSkip; + + assert(offset < range.second); + assert(offset < 3088384); + assert(0 == offset % 2); + + DecodingStructure const* __restrict__ lp = v_pedestals_; + auto s = lp[offset / 2]; + + isDeadColumn = (s.ped & 0xFF) == deadFlag_; + isNoisyColumn = (s.ped & 0xFF) == noisyFlag_; + + return std::make_pair(decodePed(s.ped & 0xFF), decodeGain(s.gain & 0xFF)); + } + + constexpr float decodeGain(unsigned int gain) const { return gain * gainPrecision_ + minGain_; } + constexpr float decodePed(unsigned int ped) const { return ped * pedPrecision_ + minPed_; } + + DecodingStructure* v_pedestals_; + std::pair rangeAndCols_[gpuClustering::maxNumModules]; + + float minPed_, maxPed_, minGain_, maxGain_; + float pedPrecision_, gainPrecision_; + + unsigned int numberOfRowsAveragedOver_; // this is 80!!!! + unsigned int nBinsToUseForEncoding_; + unsigned int deadFlag_; + unsigned int noisyFlag_; +}; + +#endif // CondFormats_SiPixelObjects_interface_SiPixelGainForHLTonGPU_h diff --git a/CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h b/CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h new file mode 100644 index 0000000000000..f7cd8dedca941 --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h @@ -0,0 +1,25 @@ +#ifndef CondFormats_SiPixelObjects_interface_SiPixelROCsStatusAndMapping_h +#define CondFormats_SiPixelObjects_interface_SiPixelROCsStatusAndMapping_h + +namespace pixelgpudetails { + // Maximum fed for phase1 is 150 but not all of them are filled + // Update the number FED based on maximum fed found in the cabling map + constexpr unsigned int MAX_FED = 150; + constexpr unsigned int MAX_LINK = 48; // maximum links/channels for Phase 1 + constexpr unsigned int MAX_ROC = 8; + constexpr unsigned int MAX_SIZE = MAX_FED * MAX_LINK * MAX_ROC; + constexpr unsigned int MAX_SIZE_BYTE_BOOL = MAX_SIZE * sizeof(unsigned char); +} // namespace pixelgpudetails + +struct SiPixelROCsStatusAndMapping { + alignas(128) unsigned int fed[pixelgpudetails::MAX_SIZE]; + alignas(128) unsigned int link[pixelgpudetails::MAX_SIZE]; + alignas(128) unsigned int roc[pixelgpudetails::MAX_SIZE]; + alignas(128) unsigned int rawId[pixelgpudetails::MAX_SIZE]; + alignas(128) unsigned int rocInDet[pixelgpudetails::MAX_SIZE]; + alignas(128) unsigned int moduleId[pixelgpudetails::MAX_SIZE]; + alignas(128) unsigned char badRocs[pixelgpudetails::MAX_SIZE]; + alignas(128) unsigned int size = 0; +}; + +#endif // CondFormats_SiPixelObjects_interface_SiPixelROCsStatusAndMapping_h diff --git a/Configuration/StandardSequences/python/RawToDigi_cff.py b/Configuration/StandardSequences/python/RawToDigi_cff.py index d0af52de00a47..dd3bf675faf0d 100644 --- a/Configuration/StandardSequences/python/RawToDigi_cff.py +++ b/Configuration/StandardSequences/python/RawToDigi_cff.py @@ -3,7 +3,7 @@ # This object is used to selectively make changes for different running # scenarios. In this case it makes changes for Run 2. -from EventFilter.SiPixelRawToDigi.SiPixelRawToDigi_cfi import * +from EventFilter.SiPixelRawToDigi.siPixelDigis_cff import * from EventFilter.SiStripRawToDigi.SiStripDigis_cfi import * @@ -45,7 +45,7 @@ from EventFilter.CTPPSRawToDigi.ctppsRawToDigi_cff import * RawToDigiTask = cms.Task(L1TRawToDigiTask, - siPixelDigis, + siPixelDigisTask, siStripDigis, ecalDigisTask, ecalPreshowerDigis, @@ -60,10 +60,10 @@ ) RawToDigi = cms.Sequence(RawToDigiTask) -RawToDigiTask_noTk = RawToDigiTask.copyAndExclude([siPixelDigis, siStripDigis]) +RawToDigiTask_noTk = RawToDigiTask.copyAndExclude([siPixelDigisTask, siStripDigis]) RawToDigi_noTk = cms.Sequence(RawToDigiTask_noTk) -RawToDigiTask_pixelOnly = cms.Task(siPixelDigis) +RawToDigiTask_pixelOnly = cms.Task(siPixelDigisTask, scalersRawToDigi) RawToDigi_pixelOnly = cms.Sequence(RawToDigiTask_pixelOnly) RawToDigiTask_ecalOnly = cms.Task(ecalDigisTask, ecalPreshowerDigis, scalersRawToDigi) @@ -73,7 +73,7 @@ RawToDigi_hcalOnly = cms.Sequence(RawToDigiTask_hcalOnly) scalersRawToDigi.scalersInputTag = 'rawDataCollector' -siPixelDigis.InputLabel = 'rawDataCollector' +siPixelDigis.cpu.InputLabel = 'rawDataCollector' ecalDigis.InputLabel = 'rawDataCollector' ecalPreshowerDigis.sourceTag = 'rawDataCollector' hcalDigis.InputLabel = 'rawDataCollector' diff --git a/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py index e0d5d05bc193a..efdde1512fcf7 100644 --- a/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py @@ -294,7 +294,7 @@ process.muonDTDigis.inputLabel = rawDataInputTag process.muonRPCDigis.InputLabel = rawDataInputTag process.scalersRawToDigi.scalersInputTag = rawDataInputTag -process.siPixelDigis.InputLabel = rawDataInputTag +process.siPixelDigis.cpu.InputLabel = rawDataInputTag process.siStripDigis.ProductLabel = rawDataInputTag process.load("RecoVertex.BeamSpotProducer.BeamSpot_cfi") diff --git a/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py index 75f0545a5c5ba..f909104a39834 100644 --- a/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py @@ -122,7 +122,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataCollector") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataCollector") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataCollector") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataCollector") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataCollector") process.siStripDigis.ProductLabel = cms.InputTag("rawDataCollector") @@ -175,7 +175,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataRepacker") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") diff --git a/DQM/Integration/python/clients/csc_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/csc_dqm_sourceclient-live_cfg.py index 77b1e6b88b699..747ec4d01b07a 100644 --- a/DQM/Integration/python/clients/csc_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/csc_dqm_sourceclient-live_cfg.py @@ -189,7 +189,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataCollector") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataCollector") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataCollector") -process.siPixelDigis.InputLabel = cms.InputTag("rawDataCollector") +process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataCollector") process.siStripDigis.ProductLabel = cms.InputTag("rawDataCollector") process.cscMonitor.FEDRawDataCollectionTag = cms.InputTag("rawDataCollector") process.dqmCSCClient.InputObjects = cms.untracked.InputTag("rawDataCollector") @@ -214,7 +214,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataRepacker") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") process.cscMonitor.FEDRawDataCollectionTag = cms.InputTag("rawDataRepacker") process.dqmCSCClient.InputObjects = cms.untracked.InputTag("rawDataRepacker") diff --git a/DQM/Integration/python/clients/fed_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/fed_dqm_sourceclient-live_cfg.py index 6feff0263b749..b28f92d0f8d4e 100644 --- a/DQM/Integration/python/clients/fed_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/fed_dqm_sourceclient-live_cfg.py @@ -48,8 +48,8 @@ # Pixel sequence: process.load('Configuration.StandardSequences.MagneticField_cff') process.load('EventFilter.SiPixelRawToDigi.SiPixelRawToDigi_cfi') -process.siPixelDigis.Timing = False -process.siPixelDigis.IncludeErrors = True +process.siPixelDigis.cpu.Timing = False +process.siPixelDigis.cpu.IncludeErrors = True process.load('DQM.SiPixelMonitorRawData.SiPixelMonitorHLT_cfi') process.SiPixelHLTSource.saveFile = False process.SiPixelHLTSource.slowDown = False @@ -92,7 +92,7 @@ # Setting raw data collection label for all subsytem modules, depending on run type: if (process.runType.getRunType() == process.runType.hi_run): process.l1tStage2Fed.rawTag = cms.InputTag('rawDataRepacker') - process.siPixelDigis.InputLabel = cms.InputTag('rawDataRepacker') + process.siPixelDigis.cpu.InputLabel = cms.InputTag('rawDataRepacker') process.SiPixelHLTSource.RawInput = cms.InputTag('rawDataRepacker') process.siStripFEDCheck.RawDataTag = cms.InputTag('rawDataRepacker') process.esRawToDigi.sourceTag = cms.InputTag('rawDataRepacker') @@ -105,7 +105,7 @@ process.cscDQMEvF.InputObjects = cms.untracked.InputTag('rawDataRepacker') else: process.l1tStage2Fed.rawTag = cms.InputTag('rawDataCollector') - process.siPixelDigis.InputLabel = cms.InputTag('rawDataCollector') + process.siPixelDigis.cpu.InputLabel = cms.InputTag('rawDataCollector') process.SiPixelHLTSource.RawInput = cms.InputTag('rawDataCollector') process.siStripFEDCheck.RawDataTag = cms.InputTag('rawDataCollector') process.esRawToDigi.sourceTag = cms.InputTag('rawDataCollector') @@ -126,7 +126,7 @@ # Modules for the FED process.FEDModulesPath = cms.Path( process.l1tStage2Fed - + process.siPixelDigis + + process.siPixelDigis + process.SiPixelHLTSource + process.siStripFEDCheck + process.esRawToDigi diff --git a/DQM/Integration/python/clients/l1t_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1t_dqm_sourceclient-live_cfg.py index 301a79f2b2865..b2358e79a1bb6 100644 --- a/DQM/Integration/python/clients/l1t_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1t_dqm_sourceclient-live_cfg.py @@ -182,7 +182,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataCollector") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataCollector") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataCollector") -process.siPixelDigis.InputLabel = cms.InputTag("rawDataCollector") +process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataCollector") process.siStripDigis.ProductLabel = cms.InputTag("rawDataCollector") process.bxTiming.FedSource = cms.untracked.InputTag("rawDataCollector") process.l1s.fedRawData = cms.InputTag("rawDataCollector") @@ -201,7 +201,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataRepacker") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") process.bxTiming.FedSource = cms.untracked.InputTag("rawDataRepacker") process.l1s.fedRawData = cms.InputTag("rawDataRepacker") diff --git a/DQM/Integration/python/clients/l1temulator_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1temulator_dqm_sourceclient-live_cfg.py index 9701c71d14a3c..e2c9f057f04b1 100644 --- a/DQM/Integration/python/clients/l1temulator_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1temulator_dqm_sourceclient-live_cfg.py @@ -197,7 +197,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataCollector") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataCollector") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataCollector") -process.siPixelDigis.InputLabel = cms.InputTag("rawDataCollector") +process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataCollector") process.siStripDigis.ProductLabel = cms.InputTag("rawDataCollector") #-------------------------------------------------- @@ -219,7 +219,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataRepacker") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") diff --git a/DQM/Integration/python/clients/l1tstage1_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1tstage1_dqm_sourceclient-live_cfg.py index b8ae0bcf233b4..93df769c2dfe4 100644 --- a/DQM/Integration/python/clients/l1tstage1_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1tstage1_dqm_sourceclient-live_cfg.py @@ -192,7 +192,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataCollector") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataCollector") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataCollector") -process.siPixelDigis.InputLabel = cms.InputTag("rawDataCollector") +process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataCollector") process.siStripDigis.ProductLabel = cms.InputTag("rawDataCollector") process.bxTiming.FedSource = cms.untracked.InputTag("rawDataCollector") process.l1s.fedRawData = cms.InputTag("rawDataCollector") @@ -211,7 +211,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataRepacker") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") process.bxTiming.FedSource = cms.untracked.InputTag("rawDataRepacker") process.l1s.fedRawData = cms.InputTag("rawDataRepacker") diff --git a/DQM/Integration/python/clients/l1tstage1emulator_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1tstage1emulator_dqm_sourceclient-live_cfg.py index 6200618c0fe44..4ed9c5e298890 100644 --- a/DQM/Integration/python/clients/l1tstage1emulator_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1tstage1emulator_dqm_sourceclient-live_cfg.py @@ -206,7 +206,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataCollector") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataCollector") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataCollector") -process.siPixelDigis.InputLabel = cms.InputTag("rawDataCollector") +process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataCollector") process.siStripDigis.ProductLabel = cms.InputTag("rawDataCollector") #-------------------------------------------------- @@ -228,7 +228,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataRepacker") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") diff --git a/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py index c5788228cac1f..0c7f4707b6d8b 100644 --- a/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py @@ -138,7 +138,7 @@ process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") process.muonGEMDigis.InputLabel = cms.InputTag("rawDataRepacker") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") process.tcdsDigis.InputLabel = cms.InputTag("rawDataRepacker") process.tcdsRawToDigi.InputLabel = cms.InputTag("rawDataRepacker") diff --git a/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py index 91f1f564366d0..7b6372229555c 100644 --- a/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py @@ -137,7 +137,7 @@ process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") process.muonGEMDigis.InputLabel = cms.InputTag("rawDataRepacker") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") process.tcdsDigis.InputLabel = cms.InputTag("rawDataRepacker") process.tcdsRawToDigi.InputLabel = cms.InputTag("rawDataRepacker") diff --git a/DQM/Integration/python/clients/lumi_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/lumi_dqm_sourceclient-live_cfg.py index 47acfcdc471f2..e439454513880 100644 --- a/DQM/Integration/python/clients/lumi_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/lumi_dqm_sourceclient-live_cfg.py @@ -49,7 +49,7 @@ process.load("Configuration.StandardSequences.EndOfProcess_cff") process.load("Configuration.EventContent.EventContent_cff") process.load("Configuration.StandardSequences.Reconstruction_cff") -process.siPixelDigis.InputLabel = cms.InputTag("hltFEDSelectorLumiPixels") +process.siPixelDigis.cpu.InputLabel = cms.InputTag("hltFEDSelectorLumiPixels") process.reconstruction_step = cms.Sequence( process.siPixelDigis + diff --git a/DQM/Integration/python/clients/physics_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/physics_dqm_sourceclient-live_cfg.py index 76b6ae553949c..fef661979c427 100644 --- a/DQM/Integration/python/clients/physics_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/physics_dqm_sourceclient-live_cfg.py @@ -56,7 +56,7 @@ process.dqmSaverPB ) -process.siPixelDigis.InputLabel = cms.InputTag("rawDataCollector") +process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataCollector") ### process customizations included here from DQM.Integration.config.online_customizations_cfi import * @@ -69,4 +69,4 @@ print("Running with run type = ", process.runType.getRunType()) if (process.runType.getRunType() == process.runType.hi_run): - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") diff --git a/DQM/Integration/python/clients/pixel_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/pixel_dqm_sourceclient-live_cfg.py index 5c45b5f46ca9c..b849be77b6e63 100644 --- a/DQM/Integration/python/clients/pixel_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/pixel_dqm_sourceclient-live_cfg.py @@ -104,18 +104,18 @@ # PixelPhase1 Real data raw to digi process.load("EventFilter.SiPixelRawToDigi.SiPixelRawToDigi_cfi") -process.siPixelDigis.IncludeErrors = True +process.siPixelDigis.cpu.IncludeErrors = True if (process.runType.getRunType() == process.runType.hi_run): #-------------------------------- # Heavy Ion Configuration Changes #-------------------------------- - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") else : - process.siPixelDigis.InputLabel = cms.InputTag("rawDataCollector") - process.siStripDigis.InputLabel = cms.InputTag("rawDataCollector") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataCollector") + process.siStripDigis.InputLabel = cms.InputTag("rawDataCollector") ## Collision Reconstruction process.load("Configuration.StandardSequences.RawToDigi_Data_cff") diff --git a/DQM/Integration/python/clients/pixellumi_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/pixellumi_dqm_sourceclient-live_cfg.py index bd15690ede85a..50213e303b8de 100644 --- a/DQM/Integration/python/clients/pixellumi_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/pixellumi_dqm_sourceclient-live_cfg.py @@ -79,7 +79,7 @@ #----------------------- # Real data raw to digi process.load("EventFilter.SiPixelRawToDigi.SiPixelRawToDigi_cfi") -process.siPixelDigis.IncludeErrors = True +process.siPixelDigis.cpu.IncludeErrors = True # Local Reconstruction process.load("RecoLocalTracker.SiPixelClusterizer.SiPixelClusterizer_cfi") @@ -92,13 +92,13 @@ # SelectEvents = cms.vstring('HLT_600Tower*','HLT_L1*','HLT_Jet*','HLT_*Cosmic*','HLT_HT*','HLT_MinBias_*','HLT_Physics*', 'HLT_ZeroBias*','HLT_HcalNZS*')) -process.siPixelDigis.InputLabel = cms.InputTag("rawDataCollector") +process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataCollector") #-------------------------------- # Heavy Ion Configuration Changes #-------------------------------- if (process.runType.getRunType() == process.runType.hi_run): process.load('Configuration.StandardSequences.RawToDigi_Repacked_cff') - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") if not unitTest: process.source.SelectEvents = cms.untracked.vstring('HLT_HIL1MinimumBiasHF2AND*') diff --git a/DQM/Integration/python/clients/scal_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/scal_dqm_sourceclient-live_cfg.py index 893443f74f1a2..2decc1b774251 100644 --- a/DQM/Integration/python/clients/scal_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/scal_dqm_sourceclient-live_cfg.py @@ -100,7 +100,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataRepacker") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") diff --git a/DQM/Integration/python/clients/sistrip_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/sistrip_dqm_sourceclient-live_cfg.py index 37e219bd456c2..889fc8a978d22 100644 --- a/DQM/Integration/python/clients/sistrip_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/sistrip_dqm_sourceclient-live_cfg.py @@ -502,7 +502,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataCollector") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataCollector") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataCollector") -process.siPixelDigis.InputLabel = cms.InputTag("rawDataCollector") +process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataCollector") process.siStripDigis.ProductLabel = cms.InputTag("rawDataCollector") process.siStripFEDMonitor.RawDataTag = cms.untracked.InputTag("rawDataCollector") #-------------------------------------------------- @@ -523,7 +523,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataRepacker") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") process.siStripFEDMonitor.RawDataTag = cms.untracked.InputTag("rawDataRepacker") diff --git a/DataFormats/SiPixelCluster/interface/SiPixelCluster.h b/DataFormats/SiPixelCluster/interface/SiPixelCluster.h index ab4ae1add2132..453d41555a65d 100644 --- a/DataFormats/SiPixelCluster/interface/SiPixelCluster.h +++ b/DataFormats/SiPixelCluster/interface/SiPixelCluster.h @@ -21,6 +21,7 @@ #include #include #include +#include class PixelDigi; @@ -68,19 +69,22 @@ class SiPixelCluster { static constexpr unsigned int MAXSPAN = 255; static constexpr unsigned int MAXPOS = 2047; + static constexpr uint16_t invalidClusterId = std::numeric_limits::max(); + /** Construct from a range of digis that form a cluster and from * a DetID. The range is assumed to be non-empty. */ - SiPixelCluster() {} + SiPixelCluster() = default; SiPixelCluster(unsigned int isize, uint16_t const* adcs, uint16_t const* xpos, uint16_t const* ypos, - uint16_t const xmin, - uint16_t const ymin) - : thePixelOffset(2 * isize), thePixelADC(adcs, adcs + isize) { + uint16_t xmin, + uint16_t ymin, + uint16_t id = invalidClusterId) + : thePixelOffset(2 * isize), thePixelADC(adcs, adcs + isize), theOriginalClusterId(id) { uint16_t maxCol = 0; uint16_t maxRow = 0; for (unsigned int i = 0; i != isize; ++i) { @@ -189,6 +193,10 @@ class SiPixelCluster { float getSplitClusterErrorX() const { return err_x; } float getSplitClusterErrorY() const { return err_y; } + // the original id (they get sorted) + auto originalId() const { return theOriginalClusterId; } + void setOriginalId(uint16_t id) { theOriginalClusterId = id; } + private: std::vector thePixelOffset; std::vector thePixelADC; @@ -198,6 +206,8 @@ class SiPixelCluster { uint8_t thePixelRowSpan = 0; // Span pixel index in the x direction (low edge). uint8_t thePixelColSpan = 0; // Span pixel index in the y direction (left edge). + uint16_t theOriginalClusterId = invalidClusterId; + float err_x = -99999.9f; float err_y = -99999.9f; }; diff --git a/DataFormats/SiPixelCluster/src/classes_def.xml b/DataFormats/SiPixelCluster/src/classes_def.xml index 55c9fd8538417..d43f062877eb0 100644 --- a/DataFormats/SiPixelCluster/src/classes_def.xml +++ b/DataFormats/SiPixelCluster/src/classes_def.xml @@ -4,6 +4,7 @@ + diff --git a/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h b/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h new file mode 100644 index 0000000000000..6c016155b1cb0 --- /dev/null +++ b/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h @@ -0,0 +1,39 @@ +#ifndef DataFormats_SiPixelDigi_interface_SiPixelDigisSoA_h +#define DataFormats_SiPixelDigi_interface_SiPixelDigisSoA_h + +#include +#include + +// The main purpose of this class is to deliver digi and cluster data +// from an EDProducer that transfers the data from GPU to host to an +// EDProducer that converts the SoA to legacy data products. The class +// is independent of any GPU technology, and in prunciple could be +// produced by host code, and be used for other purposes than +// conversion-to-legacy as well. +class SiPixelDigisSoA { +public: + SiPixelDigisSoA() = default; + explicit SiPixelDigisSoA( + size_t nDigis, const uint32_t* pdigi, const uint32_t* rawIdArr, const uint16_t* adc, const int32_t* clus); + ~SiPixelDigisSoA() = default; + + auto size() const { return pdigi_.size(); } + + uint32_t pdigi(size_t i) const { return pdigi_[i]; } + uint32_t rawIdArr(size_t i) const { return rawIdArr_[i]; } + uint16_t adc(size_t i) const { return adc_[i]; } + int32_t clus(size_t i) const { return clus_[i]; } + + const std::vector& pdigiVector() const { return pdigi_; } + const std::vector& rawIdArrVector() const { return rawIdArr_; } + const std::vector& adcVector() const { return adc_; } + const std::vector& clusVector() const { return clus_; } + +private: + std::vector pdigi_; // packed digi (row, col, adc) of each pixel + std::vector rawIdArr_; // DetId of each pixel + std::vector adc_; // ADC of each pixel + std::vector clus_; // cluster id of each pixel +}; + +#endif diff --git a/DataFormats/SiPixelDigi/src/SiPixelDigisSoA.cc b/DataFormats/SiPixelDigi/src/SiPixelDigisSoA.cc new file mode 100644 index 0000000000000..b95c004a50a25 --- /dev/null +++ b/DataFormats/SiPixelDigi/src/SiPixelDigisSoA.cc @@ -0,0 +1,10 @@ +#include "DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h" + +#include + +SiPixelDigisSoA::SiPixelDigisSoA( + size_t nDigis, const uint32_t *pdigi, const uint32_t *rawIdArr, const uint16_t *adc, const int32_t *clus) + : pdigi_(pdigi, pdigi + nDigis), + rawIdArr_(rawIdArr, rawIdArr + nDigis), + adc_(adc, adc + nDigis), + clus_(clus, clus + nDigis) {} diff --git a/DataFormats/SiPixelDigi/src/classes.h b/DataFormats/SiPixelDigi/src/classes.h index 2f36b72ca7df8..1360ee6e469d9 100644 --- a/DataFormats/SiPixelDigi/src/classes.h +++ b/DataFormats/SiPixelDigi/src/classes.h @@ -5,6 +5,7 @@ #include "DataFormats/SiPixelDigi/interface/PixelDigiCollection.h" #include "DataFormats/SiPixelDigi/interface/SiPixelCalibDigi.h" #include "DataFormats/SiPixelDigi/interface/SiPixelCalibDigiError.h" +#include "DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h" #include "DataFormats/Common/interface/Wrapper.h" #include "DataFormats/Common/interface/DetSetVector.h" #include "DataFormats/Common/interface/DetSetVectorNew.h" diff --git a/DataFormats/SiPixelDigi/src/classes_def.xml b/DataFormats/SiPixelDigi/src/classes_def.xml index de7779a5c00ea..e6bc08de161fa 100755 --- a/DataFormats/SiPixelDigi/src/classes_def.xml +++ b/DataFormats/SiPixelDigi/src/classes_def.xml @@ -49,4 +49,7 @@ + + + diff --git a/DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h b/DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h new file mode 100644 index 0000000000000..0b1a80868594f --- /dev/null +++ b/DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h @@ -0,0 +1,13 @@ +#ifndef DataFormats_SiPixelRawData_interface_SiPixelErrorCompact_h +#define DataFormats_SiPixelRawData_interface_SiPixelErrorCompact_h + +#include + +struct SiPixelErrorCompact { + uint32_t rawId; + uint32_t word; + uint8_t errorType; + uint8_t fedId; +}; + +#endif // DataFormats_SiPixelRawData_interface_SiPixelErrorCompact_h diff --git a/DataFormats/SiPixelRawData/interface/SiPixelErrorsSoA.h b/DataFormats/SiPixelRawData/interface/SiPixelErrorsSoA.h new file mode 100644 index 0000000000000..c72c19ce5fda4 --- /dev/null +++ b/DataFormats/SiPixelRawData/interface/SiPixelErrorsSoA.h @@ -0,0 +1,30 @@ +#ifndef DataFormats_SiPixelDigi_interface_SiPixelErrorsSoA_h +#define DataFormats_SiPixelDigi_interface_SiPixelErrorsSoA_h + +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" + +#include +#include + +class SiPixelErrorsSoA { +public: + SiPixelErrorsSoA() = default; + explicit SiPixelErrorsSoA(size_t nErrors, const SiPixelErrorCompact *error, const SiPixelFormatterErrors *err) + : error_(error, error + nErrors), formatterErrors_(err) {} + ~SiPixelErrorsSoA() = default; + + auto size() const { return error_.size(); } + + const SiPixelFormatterErrors *formatterErrors() const { return formatterErrors_; } + + const SiPixelErrorCompact &error(size_t i) const { return error_[i]; } + + const std::vector &errorVector() const { return error_; } + +private: + std::vector error_; + const SiPixelFormatterErrors *formatterErrors_ = nullptr; +}; + +#endif diff --git a/DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h b/DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h new file mode 100644 index 0000000000000..9d372737300d4 --- /dev/null +++ b/DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h @@ -0,0 +1,12 @@ +#ifndef DataFormats_SiPixelRawData_interface_SiPixelFormatterErrors_h +#define DataFormats_SiPixelRawData_interface_SiPixelFormatterErrors_h + +#include +#include + +#include "DataFormats/SiPixelRawData/interface/SiPixelRawDataError.h" +#include "FWCore/Utilities/interface/typedefs.h" + +using SiPixelFormatterErrors = std::map>; + +#endif // DataFormats_SiPixelRawData_interface_SiPixelFormatterErrors_h diff --git a/DataFormats/SiPixelRawData/src/classes.h b/DataFormats/SiPixelRawData/src/classes.h index 73768cc373013..7a07e9f35f388 100644 --- a/DataFormats/SiPixelRawData/src/classes.h +++ b/DataFormats/SiPixelRawData/src/classes.h @@ -2,6 +2,7 @@ #define SIPIXELRAWDATA_CLASSES_H #include "DataFormats/SiPixelRawData/interface/SiPixelRawDataError.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorsSoA.h" #include "DataFormats/Common/interface/Wrapper.h" #include "DataFormats/Common/interface/DetSetVector.h" #include diff --git a/DataFormats/SiPixelRawData/src/classes_def.xml b/DataFormats/SiPixelRawData/src/classes_def.xml index 827d4b1191cf6..fd2b5dcf27965 100644 --- a/DataFormats/SiPixelRawData/src/classes_def.xml +++ b/DataFormats/SiPixelRawData/src/classes_def.xml @@ -14,4 +14,7 @@ + + + diff --git a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml index f92aa68373927..212738e941533 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml +++ b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml @@ -1,4 +1,7 @@ + + + diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc new file mode 100644 index 0000000000000..d09e703c36a00 --- /dev/null +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc @@ -0,0 +1,182 @@ +#include + +#include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" +#include "DataFormats/Common/interface/DetSetVector.h" +#include "DataFormats/Common/interface/Handle.h" +#include "DataFormats/DetId/interface/DetIdCollection.h" +#include "DataFormats/FEDRawData/interface/FEDNumbering.h" +#include "DataFormats/SiPixelDetId/interface/PixelFEDChannel.h" +#include "DataFormats/SiPixelDigi/interface/PixelDigi.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorsSoA.h" +#include "EventFilter/SiPixelRawToDigi/interface/PixelDataFormatter.h" +#include "FWCore/Framework/interface/ESTransientHandle.h" +#include "FWCore/Framework/interface/ESWatcher.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" + +class SiPixelDigiErrorsFromSoA : public edm::stream::EDProducer<> { +public: + explicit SiPixelDigiErrorsFromSoA(const edm::ParameterSet& iConfig); + ~SiPixelDigiErrorsFromSoA() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override; + + const edm::ESGetToken cablingToken_; + const edm::EDGetTokenT digiErrorSoAGetToken_; + const edm::EDPutTokenT> errorPutToken_; + const edm::EDPutTokenT tkErrorPutToken_; + const edm::EDPutTokenT userErrorPutToken_; + const edm::EDPutTokenT> disabledChannelPutToken_; + + edm::ESWatcher cablingWatcher_; + std::unique_ptr cabling_; + + const std::vector tkerrorlist_; + const std::vector usererrorlist_; + + const bool usePhase1_; +}; + +SiPixelDigiErrorsFromSoA::SiPixelDigiErrorsFromSoA(const edm::ParameterSet& iConfig) + : cablingToken_(esConsumes(edm::ESInputTag("", iConfig.getParameter("CablingMapLabel")))), + digiErrorSoAGetToken_{consumes(iConfig.getParameter("digiErrorSoASrc"))}, + errorPutToken_{produces>()}, + tkErrorPutToken_{produces()}, + userErrorPutToken_{produces("UserErrorModules")}, + disabledChannelPutToken_{produces>()}, + tkerrorlist_(iConfig.getParameter>("ErrorList")), + usererrorlist_(iConfig.getParameter>("UserErrorList")), + usePhase1_(iConfig.getParameter("UsePhase1")) {} + +void SiPixelDigiErrorsFromSoA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("digiErrorSoASrc", edm::InputTag("siPixelDigiErrorsSoA")); + // the configuration parameters here are named following those in SiPixelRawToDigi + desc.add("CablingMapLabel", "")->setComment("CablingMap label"); + desc.add("UsePhase1", false)->setComment("## Use phase1"); + desc.add>("ErrorList", std::vector{29}) + ->setComment("## ErrorList: list of error codes used by tracking to invalidate modules"); + desc.add>("UserErrorList", std::vector{40}) + ->setComment("## UserErrorList: list of error codes used by Pixel experts for investigation"); + descriptions.addWithDefaultLabel(desc); +} + +void SiPixelDigiErrorsFromSoA::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) { + // pack errors into collection + + // initialize cabling map or update if necessary + if (cablingWatcher_.check(iSetup)) { + // cabling map, which maps online address (fed->link->ROC->local pixel) to offline (DetId->global pixel) + const SiPixelFedCablingMap* cablingMap = &iSetup.getData(cablingToken_); + cabling_ = cablingMap->cablingTree(); + LogDebug("map version:") << cabling_->version(); + } + + const auto& digiErrors = iEvent.get(digiErrorSoAGetToken_); + + edm::DetSetVector errorcollection{}; + DetIdCollection tkerror_detidcollection{}; + DetIdCollection usererror_detidcollection{}; + edmNew::DetSetVector disabled_channelcollection{}; + + PixelDataFormatter formatter(cabling_.get(), usePhase1_); // for phase 1 & 0 + const PixelDataFormatter::Errors* formatterErrors = digiErrors.formatterErrors(); + assert(formatterErrors != nullptr); + auto errors = *formatterErrors; // make a copy + PixelDataFormatter::DetErrors nodeterrors; + + auto size = digiErrors.size(); + for (auto i = 0U; i < size; i++) { + SiPixelErrorCompact err = digiErrors.error(i); + if (err.errorType != 0) { + SiPixelRawDataError error(err.word, err.errorType, err.fedId + FEDNumbering::MINSiPixeluTCAFEDID); + errors[err.rawId].push_back(error); + } + } + + constexpr uint32_t dummydetid = 0xffffffff; + typedef PixelDataFormatter::Errors::iterator IE; + for (auto& error : errors) { + uint32_t errordetid = error.first; + if (errordetid == dummydetid) { // errors given dummy detId must be sorted by Fed + nodeterrors.insert(nodeterrors.end(), errors[errordetid].begin(), errors[errordetid].end()); + } else { + edm::DetSet& errorDetSet = errorcollection.find_or_insert(errordetid); + errorDetSet.data.insert(errorDetSet.data.end(), error.second.begin(), error.second.end()); + // Fill detid of the detectors where there is error AND the error number is listed + // in the configurable error list in the job option cfi. + // Code needs to be here, because there can be a set of errors for each + // entry in the for loop over PixelDataFormatter::Errors + + std::vector disabledChannelsDetSet; + + for (auto const& aPixelError : errorDetSet) { + // For the time being, we extend the error handling functionality with ErrorType 25 + // In the future, we should sort out how the usage of tkerrorlist can be generalized + if (aPixelError.getType() == 25) { + int fedId = aPixelError.getFedId(); + const sipixelobjects::PixelFEDCabling* fed = cabling_->fed(fedId); + if (fed) { + cms_uint32_t linkId = formatter.linkId(aPixelError.getWord32()); + const sipixelobjects::PixelFEDLink* link = fed->link(linkId); + if (link) { + // The "offline" 0..15 numbering is fixed by definition, also, the FrameConversion depends on it + // in contrast, the ROC-in-channel numbering is determined by hardware --> better to use the "offline" scheme + PixelFEDChannel ch = {fed->id(), linkId, 25, 0}; + for (unsigned int iRoc = 1; iRoc <= link->numberOfROCs(); iRoc++) { + const sipixelobjects::PixelROC* roc = link->roc(iRoc); + if (roc->idInDetUnit() < ch.roc_first) + ch.roc_first = roc->idInDetUnit(); + if (roc->idInDetUnit() > ch.roc_last) + ch.roc_last = roc->idInDetUnit(); + } + disabledChannelsDetSet.push_back(ch); + } + } + } else { + // fill list of detIds to be turned off by tracking + if (!tkerrorlist_.empty()) { + auto it_find = std::find(tkerrorlist_.begin(), tkerrorlist_.end(), aPixelError.getType()); + if (it_find != tkerrorlist_.end()) { + tkerror_detidcollection.push_back(errordetid); + } + } + } + + // fill list of detIds with errors to be studied + if (!usererrorlist_.empty()) { + auto it_find = std::find(usererrorlist_.begin(), usererrorlist_.end(), aPixelError.getType()); + if (it_find != usererrorlist_.end()) { + usererror_detidcollection.push_back(errordetid); + } + } + + } // loop on DetSet of errors + + if (!disabledChannelsDetSet.empty()) { + disabled_channelcollection.insert(errordetid, disabledChannelsDetSet.data(), disabledChannelsDetSet.size()); + } + + } // if error assigned to a real DetId + } // loop on errors in event for this FED + + edm::DetSet& errorDetSet = errorcollection.find_or_insert(dummydetid); + errorDetSet.data = nodeterrors; + + iEvent.emplace(errorPutToken_, std::move(errorcollection)); + iEvent.emplace(tkErrorPutToken_, std::move(tkerror_detidcollection)); + iEvent.emplace(userErrorPutToken_, std::move(usererror_detidcollection)); + iEvent.emplace(disabledChannelPutToken_, std::move(disabled_channelcollection)); +} + +DEFINE_FWK_MODULE(SiPixelDigiErrorsFromSoA); diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc new file mode 100644 index 0000000000000..f2c7d0de5fe24 --- /dev/null +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc @@ -0,0 +1,79 @@ +#include "CUDADataFormats/Common/interface/Product.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorsSoA.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" + +class SiPixelDigiErrorsSoAFromCUDA : public edm::stream::EDProducer { +public: + explicit SiPixelDigiErrorsSoAFromCUDA(const edm::ParameterSet& iConfig); + ~SiPixelDigiErrorsSoAFromCUDA() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void acquire(const edm::Event& iEvent, + const edm::EventSetup& iSetup, + edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; + void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override; + + edm::EDGetTokenT> digiErrorGetToken_; + edm::EDPutTokenT digiErrorPutToken_; + + cms::cuda::host::unique_ptr data_; + cms::cuda::SimpleVector error_; + const SiPixelFormatterErrors* formatterErrors_ = nullptr; +}; + +SiPixelDigiErrorsSoAFromCUDA::SiPixelDigiErrorsSoAFromCUDA(const edm::ParameterSet& iConfig) + : digiErrorGetToken_( + consumes>(iConfig.getParameter("src"))), + digiErrorPutToken_(produces()) {} + +void SiPixelDigiErrorsSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("src", edm::InputTag("siPixelClustersCUDA")); + descriptions.addWithDefaultLabel(desc); +} + +void SiPixelDigiErrorsSoAFromCUDA::acquire(const edm::Event& iEvent, + const edm::EventSetup& iSetup, + edm::WaitingTaskWithArenaHolder waitingTaskHolder) { + // Do the transfer in a CUDA stream parallel to the computation CUDA stream + cms::cuda::ScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder)}; + + const auto& gpuDigiErrors = ctx.get(iEvent, digiErrorGetToken_); + + auto tmp = gpuDigiErrors.dataErrorToHostAsync(ctx.stream()); + error_ = tmp.first; + data_ = std::move(tmp.second); + formatterErrors_ = &(gpuDigiErrors.formatterErrors()); +} + +void SiPixelDigiErrorsSoAFromCUDA::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) { + // The following line copies the data from the pinned host memory to + // regular host memory. In principle that feels unnecessary (why not + // just use the pinned host memory?). There are a few arguments for + // doing it though + // - Now can release the pinned host memory back to the (caching) allocator + // * if we'd like to keep the pinned memory, we'd need to also + // keep the CUDA stream around as long as that, or allow pinned + // host memory to be allocated without a CUDA stream + // - What if a CPU algorithm would produce the same SoA? We can't + // use cudaMallocHost without a GPU... + iEvent.emplace(digiErrorPutToken_, error_.size(), error_.data(), formatterErrors_); + + error_ = cms::cuda::make_SimpleVector(0, nullptr); + data_.reset(); + formatterErrors_ = nullptr; +} + +// define as framework plugin +DEFINE_FWK_MODULE(SiPixelDigiErrorsSoAFromCUDA); diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc new file mode 100644 index 0000000000000..dbec74585998f --- /dev/null +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc @@ -0,0 +1,83 @@ +#include "CUDADataFormats/Common/interface/Product.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" +#include "DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" + +class SiPixelDigisSoAFromCUDA : public edm::stream::EDProducer { +public: + explicit SiPixelDigisSoAFromCUDA(const edm::ParameterSet& iConfig); + ~SiPixelDigisSoAFromCUDA() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void acquire(const edm::Event& iEvent, + const edm::EventSetup& iSetup, + edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; + void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override; + + edm::EDGetTokenT> digiGetToken_; + edm::EDPutTokenT digiPutToken_; + + cms::cuda::host::unique_ptr pdigi_; + cms::cuda::host::unique_ptr rawIdArr_; + cms::cuda::host::unique_ptr adc_; + cms::cuda::host::unique_ptr clus_; + + int nDigis_; +}; + +SiPixelDigisSoAFromCUDA::SiPixelDigisSoAFromCUDA(const edm::ParameterSet& iConfig) + : digiGetToken_(consumes>(iConfig.getParameter("src"))), + digiPutToken_(produces()) {} + +void SiPixelDigisSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("src", edm::InputTag("siPixelClustersCUDA")); + descriptions.addWithDefaultLabel(desc); +} + +void SiPixelDigisSoAFromCUDA::acquire(const edm::Event& iEvent, + const edm::EventSetup& iSetup, + edm::WaitingTaskWithArenaHolder waitingTaskHolder) { + // Do the transfer in a CUDA stream parallel to the computation CUDA stream + cms::cuda::ScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder)}; + + const auto& gpuDigis = ctx.get(iEvent, digiGetToken_); + + nDigis_ = gpuDigis.nDigis(); + pdigi_ = gpuDigis.pdigiToHostAsync(ctx.stream()); + rawIdArr_ = gpuDigis.rawIdArrToHostAsync(ctx.stream()); + adc_ = gpuDigis.adcToHostAsync(ctx.stream()); + clus_ = gpuDigis.clusToHostAsync(ctx.stream()); +} + +void SiPixelDigisSoAFromCUDA::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) { + // The following line copies the data from the pinned host memory to + // regular host memory. In principle that feels unnecessary (why not + // just use the pinned host memory?). There are a few arguments for + // doing it though + // - Now can release the pinned host memory back to the (caching) allocator + // * if we'd like to keep the pinned memory, we'd need to also + // keep the CUDA stream around as long as that, or allow pinned + // host memory to be allocated without a CUDA stream + // - What if a CPU algorithm would produce the same SoA? We can't + // use cudaMallocHost without a GPU... + iEvent.emplace(digiPutToken_, nDigis_, pdigi_.get(), rawIdArr_.get(), adc_.get(), clus_.get()); + + pdigi_.reset(); + rawIdArr_.reset(); + adc_.reset(); + clus_.reset(); +} + +// define as framework plugin +DEFINE_FWK_MODULE(SiPixelDigisSoAFromCUDA); diff --git a/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py b/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py index 12ff657cefd8e..50c8f0fcabd3c 100644 --- a/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py +++ b/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py @@ -1,7 +1,24 @@ import FWCore.ParameterSet.Config as cms -import EventFilter.SiPixelRawToDigi.siPixelRawToDigi_cfi +from EventFilter.SiPixelRawToDigi.siPixelRawToDigi_cfi import siPixelRawToDigi as _siPixelRawToDigi -siPixelDigis = EventFilter.SiPixelRawToDigi.siPixelRawToDigi_cfi.siPixelRawToDigi.clone() +from HeterogeneousCore.CUDACore.SwitchProducerCUDA import SwitchProducerCUDA +siPixelDigis = SwitchProducerCUDA( + cpu = _siPixelRawToDigi.clone() +) from Configuration.Eras.Modifier_phase1Pixel_cff import phase1Pixel -phase1Pixel.toModify(siPixelDigis, UsePhase1=True) +phase1Pixel.toModify(siPixelDigis.cpu, UsePhase1=True) + +from Configuration.ProcessModifiers.gpu_cff import gpu +gpu.toModify(siPixelDigis, + cuda = cms.EDAlias( + siPixelDigiErrors = cms.VPSet( + cms.PSet(type = cms.string("DetIdedmEDCollection")), + cms.PSet(type = cms.string("SiPixelRawDataErroredmDetSetVector")), + cms.PSet(type = cms.string("PixelFEDChanneledmNewDetSetVector")) + ), + siPixelDigisClustersPreSplitting = cms.VPSet( + cms.PSet(type = cms.string("PixelDigiedmDetSetVector")) + ) + ) +) diff --git a/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py b/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py new file mode 100644 index 0000000000000..5c1ff74be9c69 --- /dev/null +++ b/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py @@ -0,0 +1,30 @@ +import FWCore.ParameterSet.Config as cms + +from EventFilter.SiPixelRawToDigi.SiPixelRawToDigi_cfi import siPixelDigis +from EventFilter.SiPixelRawToDigi.siPixelDigisSoAFromCUDA_cfi import siPixelDigisSoAFromCUDA as _siPixelDigisSoAFromCUDA +from EventFilter.SiPixelRawToDigi.siPixelDigiErrorsSoAFromCUDA_cfi import siPixelDigiErrorsSoAFromCUDA as _siPixelDigiErrorsSoAFromCUDA +from EventFilter.SiPixelRawToDigi.siPixelDigiErrorsFromSoA_cfi import siPixelDigiErrorsFromSoA as _siPixelDigiErrorsFromSoA + +siPixelDigisTask = cms.Task(siPixelDigis) + +siPixelDigisSoA = _siPixelDigisSoAFromCUDA.clone( + src = "siPixelClustersPreSplittingCUDA" +) +siPixelDigiErrorsSoA = _siPixelDigiErrorsSoAFromCUDA.clone( + src = "siPixelClustersPreSplittingCUDA" +) +siPixelDigiErrors = _siPixelDigiErrorsFromSoA.clone() + +from Configuration.Eras.Modifier_phase1Pixel_cff import phase1Pixel +phase1Pixel.toModify(siPixelDigiErrors, UsePhase1=True) + +siPixelDigisTaskCUDA = cms.Task( + siPixelDigisSoA, + siPixelDigiErrorsSoA, + siPixelDigiErrors +) + +from Configuration.ProcessModifiers.gpu_cff import gpu +_siPixelDigisTask_gpu = siPixelDigisTask.copy() +_siPixelDigisTask_gpu.add(siPixelDigisTaskCUDA) +gpu.toReplaceWith(siPixelDigisTask, _siPixelDigisTask_gpu) diff --git a/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h b/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h index cefdbe4b3296a..c2b5bc9d95f83 100644 --- a/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h +++ b/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h @@ -2,6 +2,7 @@ #define Geometry_TrackerGeometryBuilder_phase1PixelTopology_h #include +#include namespace phase1PixelTopology { @@ -20,6 +21,96 @@ namespace phase1PixelTopology { constexpr uint32_t numPixsInModule = uint32_t(numRowsInModule) * uint32_t(numColsInModule); + constexpr uint32_t numberOfModules = 1856; + constexpr uint32_t numberOfLayers = 10; + constexpr uint32_t layerStart[numberOfLayers + 1] = {0, + 96, + 320, + 672, // barrel + 1184, + 1296, + 1408, // positive endcap + 1520, + 1632, + 1744, // negative endcap + numberOfModules}; + constexpr char const* layerName[numberOfLayers] = { + "BL1", + "BL2", + "BL3", + "BL4", // barrel + "E+1", + "E+2", + "E+3", // positive endcap + "E-1", + "E-2", + "E-3" // negative endcap + }; + + constexpr uint32_t numberOfModulesInBarrel = 1184; + constexpr uint32_t numberOfLaddersInBarrel = numberOfModulesInBarrel / 8; + + template + constexpr auto map_to_array_helper(Function f, std::index_sequence) + -> std::array::type, sizeof...(Indices)> { + return {{f(Indices)...}}; + } + + template + constexpr auto map_to_array(Function f) -> std::array::type, N> { + return map_to_array_helper(f, std::make_index_sequence{}); + } + + constexpr uint32_t findMaxModuleStride() { + bool go = true; + int n = 2; + while (go) { + for (uint8_t i = 1; i < std::size(layerStart); ++i) { + if (layerStart[i] % n != 0) { + go = false; + break; + } + } + if (!go) + break; + n *= 2; + } + return n / 2; + } + + constexpr uint32_t maxModuleStride = findMaxModuleStride(); + + constexpr uint8_t findLayer(uint32_t detId) { + for (uint8_t i = 0; i < std::size(layerStart); ++i) + if (detId < layerStart[i + 1]) + return i; + return std::size(layerStart); + } + + constexpr uint8_t findLayerFromCompact(uint32_t detId) { + detId *= maxModuleStride; + for (uint8_t i = 0; i < std::size(layerStart); ++i) + if (detId < layerStart[i + 1]) + return i; + return std::size(layerStart); + } + + constexpr uint32_t layerIndexSize = numberOfModules / maxModuleStride; + constexpr std::array layer = map_to_array(findLayerFromCompact); + + constexpr bool validateLayerIndex() { + bool res = true; + for (auto i = 0U; i < numberOfModules; ++i) { + auto j = i / maxModuleStride; + res &= (layer[j] < 10); + res &= (i >= layerStart[layer[j]]); + res &= (i < layerStart[layer[j] + 1]); + } + return res; + } + + static_assert(validateLayerIndex(), "layer from detIndex algo is buggy"); + // this is for the ROC n<512 (upgrade 1024) constexpr inline uint16_t divu52(uint16_t n) { n = n >> 2; @@ -31,9 +122,11 @@ namespace phase1PixelTopology { } constexpr inline bool isEdgeX(uint16_t px) { return (px == 0) | (px == lastRowInModule); } + constexpr inline bool isEdgeY(uint16_t py) { return (py == 0) | (py == lastColInModule); } constexpr inline uint16_t toRocX(uint16_t px) { return (px < numRowsInRoc) ? px : px - numRowsInRoc; } + constexpr inline uint16_t toRocY(uint16_t py) { auto roc = divu52(py); return py - 52 * roc; @@ -64,6 +157,18 @@ namespace phase1PixelTopology { return py + shift; } + //FIXME move it elsewhere? + struct AverageGeometry { + static constexpr auto numberOfLaddersInBarrel = phase1PixelTopology::numberOfLaddersInBarrel; + float ladderZ[numberOfLaddersInBarrel]; + float ladderX[numberOfLaddersInBarrel]; + float ladderY[numberOfLaddersInBarrel]; + float ladderR[numberOfLaddersInBarrel]; + float ladderMinZ[numberOfLaddersInBarrel]; + float ladderMaxZ[numberOfLaddersInBarrel]; + float endCapZ[2]; // just for pos and neg Layer1 + }; + } // namespace phase1PixelTopology #endif // Geometry_TrackerGeometryBuilder_phase1PixelTopology_h diff --git a/Geometry/TrackerGeometryBuilder/test/phase1PixelTopology_t.cpp b/Geometry/TrackerGeometryBuilder/test/phase1PixelTopology_t.cpp index 9a00efbff9a9a..8dfae57b685b4 100644 --- a/Geometry/TrackerGeometryBuilder/test/phase1PixelTopology_t.cpp +++ b/Geometry/TrackerGeometryBuilder/test/phase1PixelTopology_t.cpp @@ -142,5 +142,19 @@ int main() { assert(std::get<1>(ori) == bp); } + for (auto i = 0U; i < phase1PixelTopology::numberOfLayers; ++i) { + std::cout << "layer " << i << ", \"" << phase1PixelTopology::layerName[i] << "\", [" + << phase1PixelTopology::layerStart[i] << ", " << phase1PixelTopology::layerStart[i + 1] << ")" + << std::endl; + } + + for (auto i = 0U; i < phase1PixelTopology::numberOfModules; ++i) { + int layer = phase1PixelTopology::layer[i / phase1PixelTopology::maxModuleStride]; + //std::cout << "module " << i << ": " << "layer " << layer << ", \"" << phase1PixelTopology::layerName[layer] << "\", [" << phase1PixelTopology::layerStart[layer] << ", " << phase1PixelTopology::layerStart[layer+1] << ")" << std::endl; + assert(layer < 10); + assert(i >= phase1PixelTopology::layerStart[layer]); + assert(i < phase1PixelTopology::layerStart[layer + 1]); + } + return 0; } diff --git a/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py b/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py index 3cae176059b3b..35a72f0edb08f 100644 --- a/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py +++ b/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py @@ -9,11 +9,11 @@ from RecoLocalTracker.SiStripRecHitConverter.StripCPEfromTrackAngle_cfi import * from RecoLocalTracker.SiStripZeroSuppression.SiStripZeroSuppression_cfi import * from RecoLocalTracker.SiStripClusterizer.SiStripClusterizer_cfi import * -from RecoLocalTracker.SiPixelClusterizer.SiPixelClusterizerPreSplitting_cfi import * +from RecoLocalTracker.SiPixelClusterizer.siPixelClustersPreSplitting_cff import * from RecoLocalTracker.SiPixelRecHits.SiPixelRecHits_cfi import * from RecoLocalTracker.SubCollectionProducers.clustersummaryproducer_cfi import * -pixeltrackerlocalrecoTask = cms.Task(siPixelClustersPreSplitting,siPixelRecHitsPreSplitting) +pixeltrackerlocalrecoTask = cms.Task(siPixelClustersPreSplittingTask,siPixelRecHitsPreSplittingTask) striptrackerlocalrecoTask = cms.Task(siStripZeroSuppression,siStripClusters,siStripMatchedRecHits) trackerlocalrecoTask = cms.Task(pixeltrackerlocalrecoTask,striptrackerlocalrecoTask,clusterSummaryProducer) diff --git a/RecoLocalTracker/SiPixelClusterizer/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/BuildFile.xml new file mode 100644 index 0000000000000..7e71c635c95b8 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/BuildFile.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml index c7b16a6ef4ee2..a4851e4b322be 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml @@ -1,8 +1,15 @@ - - - + + + - + + + + + + + + diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/PixelClusterizerBase.h b/RecoLocalTracker/SiPixelClusterizer/plugins/PixelClusterizerBase.h index 9e3aad606851c..eb622cccb051e 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/PixelClusterizerBase.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/PixelClusterizerBase.h @@ -1,13 +1,14 @@ #ifndef RecoLocalTracker_SiPixelClusterizer_PixelClusterizerBase_H #define RecoLocalTracker_SiPixelClusterizer_PixelClusterizerBase_H +#include + +#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationServiceBase.h" #include "DataFormats/Common/interface/DetSetVector.h" #include "DataFormats/Common/interface/DetSetVectorNew.h" #include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" #include "DataFormats/SiPixelDigi/interface/PixelDigi.h" #include "DataFormats/TrackerCommon/interface/TrackerTopology.h" -#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationServiceBase.h" -#include class PixelGeomDetUnit; @@ -20,29 +21,38 @@ class PixelClusterizerBase { typedef edmNew::DetSet::const_iterator ClusterIterator; struct AccretionCluster { - typedef unsigned short UShort; - static constexpr UShort MAXSIZE = 256; - UShort adc[MAXSIZE]; - UShort x[MAXSIZE]; - UShort y[MAXSIZE]; - UShort xmin = 16000; - UShort ymin = 16000; + static constexpr uint16_t MAXSIZE = 256; + uint16_t adc[MAXSIZE]; + uint16_t x[MAXSIZE]; + uint16_t y[MAXSIZE]; + uint16_t xmin = 16000; + uint16_t ymin = 16000; unsigned int isize = 0; - unsigned int curr = 0; + int charge = 0; // stack interface (unsafe ok for use below) - UShort top() const { return curr; } + unsigned int curr = 0; + uint16_t top() const { return curr; } void pop() { ++curr; } bool empty() { return curr == isize; } - bool add(SiPixelCluster::PixelPos const& p, UShort const iadc) { + void clear() { + xmin = 16000; + ymin = 16000; + isize = 0; + charge = 0; + curr = 0; + } + + bool add(SiPixelCluster::PixelPos const& p, uint16_t const iadc) { if (isize == MAXSIZE) return false; - xmin = std::min(xmin, (unsigned short)(p.row())); - ymin = std::min(ymin, (unsigned short)(p.col())); + xmin = std::min(xmin, p.row()); + ymin = std::min(ymin, p.col()); adc[isize] = iadc; x[isize] = p.row(); y[isize++] = p.col(); + charge += iadc; return true; } }; diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc index d35e04ebe16f7..3c0f8b9754089 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc @@ -122,6 +122,14 @@ void SiPixelClusterProducer::produce(edm::Event& e, const edm::EventSetup& es) { // Step D: write output to file output->shrink_to_fit(); + + // set sequential identifier + for (auto& clusters : *output) { + uint16_t id = 0; + for (auto& cluster : clusters) { + cluster.setOriginalId(id++); + } + } e.put(tPutPixelClusters, std::move(output)); } @@ -153,15 +161,14 @@ void SiPixelClusterProducer::run(const T& input, int numberOfClusters = 0; // Iterate on detector units - typename T::const_iterator DSViter = input.begin(); - for (; DSViter != input.end(); DSViter++) { + for (auto const& dsv : input) { ++numberOfDetUnits; // LogDebug takes very long time, get rid off. - //LogDebug("SiStripClusterizer") << "[SiPixelClusterProducer::run] DetID" << DSViter->id; + //LogDebug("SiStripClusterizer") << "[SiPixelClusterProducer::run] DetID" << dsv.id; std::vector badChannels; - DetId detIdObject(DSViter->detId()); + DetId detIdObject(dsv.detId()); // Comment: At the moment the clusterizer depends on geometry // to access information as the pixel topology (number of columns @@ -177,8 +184,8 @@ void SiPixelClusterProducer::run(const T& input, { // Produce clusters for this DetUnit and store them in // a DetSet - edmNew::DetSetVector::FastFiller spc(output, DSViter->detId()); - clusterizer_->clusterizeDetUnit(*DSViter, pixDet, tTopo_, badChannels, spc); + edmNew::DetSetVector::FastFiller spc(output, dsv.detId()); + clusterizer_->clusterizeDetUnit(dsv, pixDet, tTopo_, badChannels, spc); if (spc.empty()) { spc.abort(); } else { diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc new file mode 100644 index 0000000000000..0078bae38306a --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc @@ -0,0 +1,150 @@ +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" +#include "DataFormats/Common/interface/DetSetVector.h" +#include "DataFormats/Common/interface/Handle.h" +#include "DataFormats/DetId/interface/DetId.h" +#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" +#include "DataFormats/SiPixelDigi/interface/PixelDigi.h" +#include "DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h" +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" +#include "RecoLocalTracker/SiPixelClusterizer/plugins/PixelClusterizerBase.h" + +class SiPixelDigisClustersFromSoA : public edm::global::EDProducer<> { +public: + explicit SiPixelDigisClustersFromSoA(const edm::ParameterSet& iConfig); + ~SiPixelDigisClustersFromSoA() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + + const edm::ESGetToken topoToken_; + + edm::EDGetTokenT digiGetToken_; + + edm::EDPutTokenT> digiPutToken_; + edm::EDPutTokenT clusterPutToken_; +}; + +SiPixelDigisClustersFromSoA::SiPixelDigisClustersFromSoA(const edm::ParameterSet& iConfig) + : topoToken_(esConsumes()), + digiGetToken_(consumes(iConfig.getParameter("src"))), + digiPutToken_(produces>()), + clusterPutToken_(produces()) {} + +void SiPixelDigisClustersFromSoA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("src", edm::InputTag("siPixelDigisSoA")); + descriptions.addWithDefaultLabel(desc); +} + +void SiPixelDigisClustersFromSoA::produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const { + const auto& digis = iEvent.get(digiGetToken_); + const uint32_t nDigis = digis.size(); + const auto& ttopo = iSetup.getData(topoToken_); + + auto collection = std::make_unique>(); + auto outputClusters = std::make_unique(); + outputClusters->reserve(gpuClustering::maxNumModules, nDigis / 4); + + edm::DetSet* detDigis = nullptr; + for (uint32_t i = 0; i < nDigis; i++) { + if (digis.pdigi(i) == 0) + continue; + detDigis = &collection->find_or_insert(digis.rawIdArr(i)); + if ((*detDigis).empty()) + (*detDigis).data.reserve(64); // avoid the first relocations + break; + } + + int32_t nclus = -1; + std::vector aclusters(gpuClustering::maxNumClustersPerModules); +#ifdef EDM_ML_DEBUG + auto totClustersFilled = 0; +#endif + + auto fillClusters = [&](uint32_t detId) { + if (nclus < 0) + return; // this in reality should never happen + edmNew::DetSetVector::FastFiller spc(*outputClusters, detId); + auto layer = (DetId(detId).subdetId() == 1) ? ttopo.pxbLayer(detId) : 0; + auto clusterThreshold = (layer == 1) ? 2000 : 4000; + for (int32_t ic = 0; ic < nclus + 1; ++ic) { + auto const& acluster = aclusters[ic]; + // in any case we cannot go out of sync with gpu... + if (acluster.charge < clusterThreshold) + edm::LogWarning("SiPixelDigisClustersFromSoA") << "cluster below charge Threshold " + << "Layer/DetId/clusId " << layer << '/' << detId << '/' << ic + << " size/charge " << acluster.isize << '/' << acluster.charge; + SiPixelCluster cluster(acluster.isize, acluster.adc, acluster.x, acluster.y, acluster.xmin, acluster.ymin, ic); +#ifdef EDM_ML_DEBUG + ++totClustersFilled; +#endif + LogDebug("SiPixelDigisClustersFromSoA") + << "putting in this cluster " << ic << " " << cluster.charge() << " " << cluster.pixelADC().size(); + // sort by row (x) + spc.push_back(std::move(cluster)); + std::push_heap(spc.begin(), spc.end(), [](SiPixelCluster const& cl1, SiPixelCluster const& cl2) { + return cl1.minPixelRow() < cl2.minPixelRow(); + }); + } + for (int32_t ic = 0; ic < nclus + 1; ++ic) + aclusters[ic].clear(); + nclus = -1; + // sort by row (x) + std::sort_heap(spc.begin(), spc.end(), [](SiPixelCluster const& cl1, SiPixelCluster const& cl2) { + return cl1.minPixelRow() < cl2.minPixelRow(); + }); + if (spc.empty()) + spc.abort(); + }; + + for (uint32_t i = 0; i < nDigis; i++) { + if (digis.pdigi(i) == 0) + continue; + if (digis.clus(i) > 9000) + continue; // not in cluster; TODO add an assert for the size + assert(digis.rawIdArr(i) > 109999); + if ((*detDigis).detId() != digis.rawIdArr(i)) { + fillClusters((*detDigis).detId()); + assert(nclus == -1); + detDigis = &collection->find_or_insert(digis.rawIdArr(i)); + if ((*detDigis).empty()) + (*detDigis).data.reserve(64); // avoid the first relocations + else { + edm::LogWarning("SiPixelDigisClustersFromSoA") << "Problem det present twice in input! " << (*detDigis).detId(); + } + } + (*detDigis).data.emplace_back(digis.pdigi(i)); + auto const& dig = (*detDigis).data.back(); + // fill clusters + assert(digis.clus(i) >= 0); + assert(digis.clus(i) < gpuClustering::maxNumClustersPerModules); + nclus = std::max(digis.clus(i), nclus); + auto row = dig.row(); + auto col = dig.column(); + SiPixelCluster::PixelPos pix(row, col); + aclusters[digis.clus(i)].add(pix, digis.adc(i)); + } + + // fill final clusters + if (detDigis) + fillClusters((*detDigis).detId()); +#ifdef EDM_ML_DEBUG + LogDebug("SiPixelDigisClustersFromSoA") << "filled " << totClustersFilled << " clusters"; +#endif + + iEvent.put(digiPutToken_, std::move(collection)); + iEvent.put(clusterPutToken_, std::move(outputClusters)); +} + +DEFINE_FWK_MODULE(SiPixelDigisClustersFromSoA); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc new file mode 100644 index 0000000000000..93b92e145ec5c --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc @@ -0,0 +1,259 @@ +// C++ includes +#include +#include +#include + +// CMSSW includes +#include "CUDADataFormats/Common/interface/Product.h" +#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" +#include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h" +#include "CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h" +#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h" +#include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" +#include "DataFormats/FEDRawData/interface/FEDNumbering.h" +#include "DataFormats/FEDRawData/interface/FEDRawData.h" +#include "DataFormats/FEDRawData/interface/FEDRawDataCollection.h" +#include "EventFilter/SiPixelRawToDigi/interface/PixelDataFormatter.h" +#include "EventFilter/SiPixelRawToDigi/interface/PixelUnpackingRegions.h" +#include "FWCore/Framework/interface/ConsumesCollector.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/ESTransientHandle.h" +#include "FWCore/Framework/interface/ESWatcher.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "RecoTracker/Record/interface/CkfComponentsRecord.h" + +// local includes +#include "SiPixelRawToClusterGPUKernel.h" + +class SiPixelRawToClusterCUDA : public edm::stream::EDProducer { +public: + explicit SiPixelRawToClusterCUDA(const edm::ParameterSet& iConfig); + ~SiPixelRawToClusterCUDA() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void acquire(const edm::Event& iEvent, + const edm::EventSetup& iSetup, + edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; + void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override; + + edm::EDGetTokenT rawGetToken_; + + edm::EDPutTokenT> digiPutToken_; + edm::EDPutTokenT> digiErrorPutToken_; + edm::EDPutTokenT> clusterPutToken_; + + cms::cuda::ContextState ctxState_; + + edm::ESWatcher recordWatcher_; + edm::ESGetToken gpuMapToken_; + edm::ESGetToken gainsToken_; + edm::ESGetToken cablingMapToken_; + + std::unique_ptr cabling_; + std::vector fedIds_; + const SiPixelFedCablingMap* cablingMap_ = nullptr; + std::unique_ptr regions_; + + pixelgpudetails::SiPixelRawToClusterGPUKernel gpuAlgo_; + std::unique_ptr wordFedAppender_; + PixelDataFormatter::Errors errors_; + + const bool isRun2_; + const bool includeErrors_; + const bool useQuality_; +}; + +SiPixelRawToClusterCUDA::SiPixelRawToClusterCUDA(const edm::ParameterSet& iConfig) + : rawGetToken_(consumes(iConfig.getParameter("InputLabel"))), + digiPutToken_(produces>()), + clusterPutToken_(produces>()), + gpuMapToken_(esConsumes()), + gainsToken_(esConsumes()), + cablingMapToken_(esConsumes( + edm::ESInputTag("", iConfig.getParameter("CablingMapLabel")))), + isRun2_(iConfig.getParameter("isRun2")), + includeErrors_(iConfig.getParameter("IncludeErrors")), + useQuality_(iConfig.getParameter("UseQualityInfo")) { + if (includeErrors_) { + digiErrorPutToken_ = produces>(); + } + + // regions + if (!iConfig.getParameter("Regions").getParameterNames().empty()) { + regions_ = std::make_unique(iConfig, consumesCollector()); + } + + edm::Service cs; + if (cs->enabled()) { + wordFedAppender_ = std::make_unique(); + } +} + +void SiPixelRawToClusterCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("isRun2", true); + desc.add("IncludeErrors", true); + desc.add("UseQualityInfo", false); + desc.add("InputLabel", edm::InputTag("rawDataCollector")); + { + edm::ParameterSetDescription psd0; + psd0.addOptional>("inputs"); + psd0.addOptional>("deltaPhi"); + psd0.addOptional>("maxZ"); + psd0.addOptional("beamSpot"); + desc.add("Regions", psd0) + ->setComment("## Empty Regions PSet means complete unpacking"); + } + desc.add("CablingMapLabel", "")->setComment("CablingMap label"); //Tav + descriptions.addWithDefaultLabel(desc); +} + +void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, + const edm::EventSetup& iSetup, + edm::WaitingTaskWithArenaHolder waitingTaskHolder) { + cms::cuda::ScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder), ctxState_}; + + auto hgpuMap = iSetup.getHandle(gpuMapToken_); + if (hgpuMap->hasQuality() != useQuality_) { + throw cms::Exception("LogicError") + << "UseQuality of the module (" << useQuality_ + << ") differs the one from SiPixelROCsStatusAndMappingWrapper. Please fix your configuration."; + } + // get the GPU product already here so that the async transfer can begin + const auto* gpuMap = hgpuMap->getGPUProductAsync(ctx.stream()); + + auto hgains = iSetup.getHandle(gainsToken_); + // get the GPU product already here so that the async transfer can begin + const auto* gpuGains = hgains->getGPUProductAsync(ctx.stream()); + + cms::cuda::device::unique_ptr modulesToUnpackRegional; + const unsigned char* gpuModulesToUnpack; + + if (regions_) { + regions_->run(iEvent, iSetup); + LogDebug("SiPixelRawToCluster") << "region2unpack #feds: " << regions_->nFEDs(); + LogDebug("SiPixelRawToCluster") << "region2unpack #modules (BPIX,EPIX,total): " << regions_->nBarrelModules() << " " + << regions_->nForwardModules() << " " << regions_->nModules(); + modulesToUnpackRegional = hgpuMap->getModToUnpRegionalAsync(*(regions_->modulesToUnpack()), ctx.stream()); + gpuModulesToUnpack = modulesToUnpackRegional.get(); + } else { + gpuModulesToUnpack = hgpuMap->getModToUnpAllAsync(ctx.stream()); + } + + // initialize cabling map or update if necessary + if (recordWatcher_.check(iSetup)) { + // cabling map, which maps online address (fed->link->ROC->local pixel) to offline (DetId->global pixel) + auto cablingMap = iSetup.getTransientHandle(cablingMapToken_); + cablingMap_ = cablingMap.product(); + fedIds_ = cablingMap->fedIds(); + cabling_ = cablingMap->cablingTree(); + LogDebug("map version:") << cabling_->version(); + } + + const auto& buffers = iEvent.get(rawGetToken_); + + errors_.clear(); + + // GPU specific: Data extraction for RawToDigi GPU + unsigned int wordCounterGPU = 0; + unsigned int fedCounter = 0; + bool errorsInEvent = false; + + // In CPU algorithm this loop is part of PixelDataFormatter::interpretRawData() + ErrorChecker errorcheck; + for (int fedId : fedIds_) { + if (regions_ && !regions_->mayUnpackFED(fedId)) + continue; + + // for GPU + // first 150 index stores the fedId and next 150 will store the + // start index of word in that fed + assert(fedId >= FEDNumbering::MINSiPixeluTCAFEDID); + fedCounter++; + + // get event data for this fed + const FEDRawData& rawData = buffers.FEDData(fedId); + + // GPU specific + int nWords = rawData.size() / sizeof(cms_uint64_t); + if (nWords == 0) { + continue; + } + + // check CRC bit + const cms_uint64_t* trailer = reinterpret_cast(rawData.data()) + (nWords - 1); + if (not errorcheck.checkCRC(errorsInEvent, fedId, trailer, errors_)) { + continue; + } + + // check headers + const cms_uint64_t* header = reinterpret_cast(rawData.data()); + header--; + bool moreHeaders = true; + while (moreHeaders) { + header++; + bool headerStatus = errorcheck.checkHeader(errorsInEvent, fedId, header, errors_); + moreHeaders = headerStatus; + } + + // check trailers + bool moreTrailers = true; + trailer++; + while (moreTrailers) { + trailer--; + bool trailerStatus = errorcheck.checkTrailer(errorsInEvent, fedId, nWords, trailer, errors_); + moreTrailers = trailerStatus; + } + + const cms_uint32_t* bw = (const cms_uint32_t*)(header + 1); + const cms_uint32_t* ew = (const cms_uint32_t*)(trailer); + + assert(0 == (ew - bw) % 2); + wordFedAppender_->initializeWordFed(fedId, wordCounterGPU, bw, (ew - bw)); + wordCounterGPU += (ew - bw); + + } // end of for loop + + gpuAlgo_.makeClustersAsync(isRun2_, + gpuMap, + gpuModulesToUnpack, + gpuGains, + *wordFedAppender_, + std::move(errors_), + wordCounterGPU, + fedCounter, + useQuality_, + includeErrors_, + edm::MessageDrop::instance()->debugEnabled, + ctx.stream()); +} + +void SiPixelRawToClusterCUDA::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) { + cms::cuda::ScopedContextProduce ctx{ctxState_}; + + auto tmp = gpuAlgo_.getResults(); + ctx.emplace(iEvent, digiPutToken_, std::move(tmp.first)); + ctx.emplace(iEvent, clusterPutToken_, std::move(tmp.second)); + if (includeErrors_) { + ctx.emplace(iEvent, digiErrorPutToken_, gpuAlgo_.getErrors()); + } +} + +// define as framework plugin +DEFINE_FWK_MODULE(SiPixelRawToClusterCUDA); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu new file mode 100644 index 0000000000000..25e5c925990f8 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -0,0 +1,669 @@ +/* Sushil Dubey, Shashi Dugad, TIFR, July 2017 + * + * File Name: RawToClusterGPU.cu + * Description: It converts Raw data into Digi Format on GPU + * Finaly the Output of RawToDigi data is given to pixelClusterizer +**/ + +// C++ includes +#include +#include +#include +#include +#include +#include +#include + +// CUDA includes +#include + +// CMSSW includes +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h" +#include "DataFormats/FEDRawData/interface/FEDNumbering.h" +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" +#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h" +#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h" +#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" + +// local includes +#include "SiPixelRawToClusterGPUKernel.h" + +namespace pixelgpudetails { + + // number of words for all the FEDs + constexpr uint32_t MAX_FED_WORDS = pixelgpudetails::MAX_FED * pixelgpudetails::MAX_WORD; + + SiPixelRawToClusterGPUKernel::WordFedAppender::WordFedAppender() { + word_ = cms::cuda::make_host_noncached_unique(MAX_FED_WORDS, cudaHostAllocWriteCombined); + fedId_ = cms::cuda::make_host_noncached_unique(MAX_FED_WORDS, cudaHostAllocWriteCombined); + } + + void SiPixelRawToClusterGPUKernel::WordFedAppender::initializeWordFed(int fedId, + unsigned int wordCounterGPU, + const cms_uint32_t *src, + unsigned int length) { + std::memcpy(word_.get() + wordCounterGPU, src, sizeof(cms_uint32_t) * length); + std::memset(fedId_.get() + wordCounterGPU / 2, fedId - FEDNumbering::MINSiPixeluTCAFEDID, length / 2); + } + + //////////////////// + + __device__ uint32_t getLink(uint32_t ww) { + return ((ww >> pixelgpudetails::LINK_shift) & pixelgpudetails::LINK_mask); + } + + __device__ uint32_t getRoc(uint32_t ww) { return ((ww >> pixelgpudetails::ROC_shift) & pixelgpudetails::ROC_mask); } + + __device__ uint32_t getADC(uint32_t ww) { return ((ww >> pixelgpudetails::ADC_shift) & pixelgpudetails::ADC_mask); } + + __device__ bool isBarrel(uint32_t rawId) { + return (PixelSubdetector::PixelBarrel == ((rawId >> DetId::kSubdetOffset) & DetId::kSubdetMask)); + } + + __device__ pixelgpudetails::DetIdGPU getRawId(const SiPixelROCsStatusAndMapping *cablingMap, + uint8_t fed, + uint32_t link, + uint32_t roc) { + uint32_t index = fed * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc; + pixelgpudetails::DetIdGPU detId = { + cablingMap->rawId[index], cablingMap->rocInDet[index], cablingMap->moduleId[index]}; + return detId; + } + + //reference http://cmsdoxygen.web.cern.ch/cmsdoxygen/CMSSW_9_2_0/doc/html/dd/d31/FrameConversion_8cc_source.html + //http://cmslxr.fnal.gov/source/CondFormats/SiPixelObjects/src/PixelROC.cc?v=CMSSW_9_2_0#0071 + // Convert local pixel to pixelgpudetails::global pixel + __device__ pixelgpudetails::Pixel frameConversion( + bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, pixelgpudetails::Pixel local) { + int slopeRow = 0, slopeCol = 0; + int rowOffset = 0, colOffset = 0; + + if (bpix) { + if (side == -1 && layer != 1) { // -Z side: 4 non-flipped modules oriented like 'dddd', except Layer 1 + if (rocIdInDetUnit < 8) { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (8 - rocIdInDetUnit) * pixelgpudetails::numColsInRoc - 1; + } else { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2 * pixelgpudetails::numRowsInRoc - 1; + colOffset = (rocIdInDetUnit - 8) * pixelgpudetails::numColsInRoc; + } // if roc + } else { // +Z side: 4 non-flipped modules oriented like 'pppp', but all 8 in layer1 + if (rocIdInDetUnit < 8) { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2 * pixelgpudetails::numRowsInRoc - 1; + colOffset = rocIdInDetUnit * pixelgpudetails::numColsInRoc; + } else { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (16 - rocIdInDetUnit) * pixelgpudetails::numColsInRoc - 1; + } + } + + } else { // fpix + if (side == -1) { // pannel 1 + if (rocIdInDetUnit < 8) { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (8 - rocIdInDetUnit) * pixelgpudetails::numColsInRoc - 1; + } else { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2 * pixelgpudetails::numRowsInRoc - 1; + colOffset = (rocIdInDetUnit - 8) * pixelgpudetails::numColsInRoc; + } + } else { // pannel 2 + if (rocIdInDetUnit < 8) { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (8 - rocIdInDetUnit) * pixelgpudetails::numColsInRoc - 1; + } else { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2 * pixelgpudetails::numRowsInRoc - 1; + colOffset = (rocIdInDetUnit - 8) * pixelgpudetails::numColsInRoc; + } + + } // side + } + + uint32_t gRow = rowOffset + slopeRow * local.row; + uint32_t gCol = colOffset + slopeCol * local.col; + // inside frameConversion row: gRow, column: gCol + pixelgpudetails::Pixel global = {gRow, gCol}; + return global; + } + + // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc + __device__ uint8_t conversionError(uint8_t fedId, uint8_t status, bool debug = false) { + uint8_t errorType = 0; + + switch (status) { + case (1): { + if (debug) + printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId); + errorType = 35; + break; + } + case (2): { + if (debug) + printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId); + errorType = 36; + break; + } + case (3): { + if (debug) + printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId); + errorType = 37; + break; + } + case (4): { + if (debug) + printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId); + errorType = 38; + break; + } + default: + if (debug) + printf("Cabling check returned unexpected result, status = %i\n", status); + }; + + return errorType; + } + + __device__ bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol) { + /// row and column in ROC representation + return ((rocRow < pixelgpudetails::numRowsInRoc) & (rocCol < pixelgpudetails::numColsInRoc)); + } + + __device__ bool dcolIsValid(uint32_t dcol, uint32_t pxid) { return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); } + + // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc + __device__ uint8_t checkROC(uint32_t errorWord, + uint8_t fedId, + uint32_t link, + const SiPixelROCsStatusAndMapping *cablingMap, + bool debug = false) { + uint8_t errorType = (errorWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ERROR_mask; + if (errorType < 25) + return 0; + bool errorFound = false; + + switch (errorType) { + case (25): { + errorFound = true; + uint32_t index = fedId * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + 1; + if (index > 1 && index <= cablingMap->size) { + if (!(link == cablingMap->link[index] && 1 == cablingMap->roc[index])) + errorFound = false; + } + if (debug and errorFound) + printf("Invalid ROC = 25 found (errorType = 25)\n"); + break; + } + case (26): { + if (debug) + printf("Gap word found (errorType = 26)\n"); + errorFound = true; + break; + } + case (27): { + if (debug) + printf("Dummy word found (errorType = 27)\n"); + errorFound = true; + break; + } + case (28): { + if (debug) + printf("Error fifo nearly full (errorType = 28)\n"); + errorFound = true; + break; + } + case (29): { + if (debug) + printf("Timeout on a channel (errorType = 29)\n"); + if ((errorWord >> pixelgpudetails::OMIT_ERR_shift) & pixelgpudetails::OMIT_ERR_mask) { + if (debug) + printf("...first errorType=29 error, this gets masked out\n"); + } + errorFound = true; + break; + } + case (30): { + if (debug) + printf("TBM error trailer (errorType = 30)\n"); + int stateMatch_bits = 4; + int stateMatch_shift = 8; + uint32_t stateMatch_mask = ~(~uint32_t(0) << stateMatch_bits); + int stateMatch = (errorWord >> stateMatch_shift) & stateMatch_mask; + if (stateMatch != 1 && stateMatch != 8) { + if (debug) + printf("FED error 30 with unexpected State Bits (errorType = 30)\n"); + } + if (stateMatch == 1) + errorType = 40; // 1=Overflow -> 40, 8=number of ROCs -> 30 + errorFound = true; + break; + } + case (31): { + if (debug) + printf("Event number error (errorType = 31)\n"); + errorFound = true; + break; + } + default: + errorFound = false; + }; + + return errorFound ? errorType : 0; + } + + // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc + __device__ uint32_t getErrRawID(uint8_t fedId, + uint32_t errWord, + uint32_t errorType, + const SiPixelROCsStatusAndMapping *cablingMap, + bool debug = false) { + uint32_t rID = 0xffffffff; + + switch (errorType) { + case 25: + case 30: + case 31: + case 36: + case 40: { + uint32_t roc = 1; + uint32_t link = (errWord >> pixelgpudetails::LINK_shift) & pixelgpudetails::LINK_mask; + uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId; + if (rID_temp != gpuClustering::invalidModuleId) + rID = rID_temp; + break; + } + case 29: { + int chanNmbr = 0; + const int DB0_shift = 0; + const int DB1_shift = DB0_shift + 1; + const int DB2_shift = DB1_shift + 1; + const int DB3_shift = DB2_shift + 1; + const int DB4_shift = DB3_shift + 1; + const uint32_t DataBit_mask = ~(~uint32_t(0) << 1); + + int CH1 = (errWord >> DB0_shift) & DataBit_mask; + int CH2 = (errWord >> DB1_shift) & DataBit_mask; + int CH3 = (errWord >> DB2_shift) & DataBit_mask; + int CH4 = (errWord >> DB3_shift) & DataBit_mask; + int CH5 = (errWord >> DB4_shift) & DataBit_mask; + int BLOCK_bits = 3; + int BLOCK_shift = 8; + uint32_t BLOCK_mask = ~(~uint32_t(0) << BLOCK_bits); + int BLOCK = (errWord >> BLOCK_shift) & BLOCK_mask; + int localCH = 1 * CH1 + 2 * CH2 + 3 * CH3 + 4 * CH4 + 5 * CH5; + if (BLOCK % 2 == 0) + chanNmbr = (BLOCK / 2) * 9 + localCH; + else + chanNmbr = ((BLOCK - 1) / 2) * 9 + 4 + localCH; + if ((chanNmbr < 1) || (chanNmbr > 36)) + break; // signifies unexpected result + + uint32_t roc = 1; + uint32_t link = chanNmbr; + uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId; + if (rID_temp != gpuClustering::invalidModuleId) + rID = rID_temp; + break; + } + case 37: + case 38: { + uint32_t roc = (errWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ROC_mask; + uint32_t link = (errWord >> pixelgpudetails::LINK_shift) & pixelgpudetails::LINK_mask; + uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId; + if (rID_temp != gpuClustering::invalidModuleId) + rID = rID_temp; + break; + } + default: + break; + }; + + return rID; + } + + // Kernel to perform Raw to Digi conversion + __global__ void RawToDigi_kernel(const SiPixelROCsStatusAndMapping *cablingMap, + const unsigned char *modToUnp, + const uint32_t wordCounter, + const uint32_t *word, + const uint8_t *fedIds, + uint16_t *xx, + uint16_t *yy, + uint16_t *adc, + uint32_t *pdigi, + uint32_t *rawIdArr, + uint16_t *moduleId, + cms::cuda::SimpleVector *err, + bool useQualityInfo, + bool includeErrors, + bool debug) { + //if (threadIdx.x==0) printf("Event: %u blockIdx.x: %u start: %u end: %u\n", eventno, blockIdx.x, begin, end); + + int32_t first = threadIdx.x + blockIdx.x * blockDim.x; + for (int32_t iloop = first, nend = wordCounter; iloop < nend; iloop += blockDim.x * gridDim.x) { + auto gIndex = iloop; + xx[gIndex] = 0; + yy[gIndex] = 0; + adc[gIndex] = 0; + bool skipROC = false; + + uint8_t fedId = fedIds[gIndex / 2]; // +1200; + + // initialize (too many coninue below) + pdigi[gIndex] = 0; + rawIdArr[gIndex] = 0; + moduleId[gIndex] = gpuClustering::invalidModuleId; + + uint32_t ww = word[gIndex]; // Array containing 32 bit raw data + if (ww == 0) { + // 0 is an indicator of a noise/dead channel, skip these pixels during clusterization + continue; + } + + uint32_t link = getLink(ww); // Extract link + uint32_t roc = getRoc(ww); // Extract Roc in link + pixelgpudetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc); + + uint8_t errorType = checkROC(ww, fedId, link, cablingMap, debug); + skipROC = (roc < pixelgpudetails::maxROCIndex) ? false : (errorType != 0); + if (includeErrors and skipROC) { + uint32_t rID = getErrRawID(fedId, ww, errorType, cablingMap, debug); + err->push_back(SiPixelErrorCompact{rID, ww, errorType, fedId}); + continue; + } + + uint32_t rawId = detId.rawId; + uint32_t rocIdInDetUnit = detId.rocInDet; + bool barrel = isBarrel(rawId); + + uint32_t index = fedId * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc; + if (useQualityInfo) { + skipROC = cablingMap->badRocs[index]; + if (skipROC) + continue; + } + skipROC = modToUnp[index]; + if (skipROC) + continue; + + uint32_t layer = 0; + int side = 0, panel = 0, module = 0; + + if (barrel) { + layer = (rawId >> pixelgpudetails::layerStartBit) & pixelgpudetails::layerMask; + module = (rawId >> pixelgpudetails::moduleStartBit) & pixelgpudetails::moduleMask; + side = (module < 5) ? -1 : 1; + } else { + // endcap ids + layer = 0; + panel = (rawId >> pixelgpudetails::panelStartBit) & pixelgpudetails::panelMask; + side = (panel == 1) ? -1 : 1; + } + + // ***special case of layer to 1 be handled here + pixelgpudetails::Pixel localPix; + if (layer == 1) { + uint32_t col = (ww >> pixelgpudetails::COL_shift) & pixelgpudetails::COL_mask; + uint32_t row = (ww >> pixelgpudetails::ROW_shift) & pixelgpudetails::ROW_mask; + localPix.row = row; + localPix.col = col; + if (includeErrors) { + if (not rocRowColIsValid(row, col)) { + uint8_t error = conversionError(fedId, 3, debug); //use the device function and fill the arrays + err->push_back(SiPixelErrorCompact{rawId, ww, error, fedId}); + if (debug) + printf("BPIX1 Error status: %i\n", error); + continue; + } + } + } else { + // ***conversion rules for dcol and pxid + uint32_t dcol = (ww >> pixelgpudetails::DCOL_shift) & pixelgpudetails::DCOL_mask; + uint32_t pxid = (ww >> pixelgpudetails::PXID_shift) & pixelgpudetails::PXID_mask; + uint32_t row = pixelgpudetails::numRowsInRoc - pxid / 2; + uint32_t col = dcol * 2 + pxid % 2; + localPix.row = row; + localPix.col = col; + if (includeErrors and not dcolIsValid(dcol, pxid)) { + uint8_t error = conversionError(fedId, 3, debug); + err->push_back(SiPixelErrorCompact{rawId, ww, error, fedId}); + if (debug) + printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc); + continue; + } + } + + pixelgpudetails::Pixel globalPix = frameConversion(barrel, side, layer, rocIdInDetUnit, localPix); + xx[gIndex] = globalPix.row; // origin shifting by 1 0-159 + yy[gIndex] = globalPix.col; // origin shifting by 1 0-415 + adc[gIndex] = getADC(ww); + pdigi[gIndex] = pixelgpudetails::pack(globalPix.row, globalPix.col, adc[gIndex]); + moduleId[gIndex] = detId.moduleId; + rawIdArr[gIndex] = rawId; + } // end of loop (gIndex < end) + + } // end of Raw to Digi kernel + + __global__ void fillHitsModuleStart(uint32_t const *__restrict__ cluStart, uint32_t *__restrict__ moduleStart) { + assert(gpuClustering::maxNumModules < 2048); // easy to extend at least till 32*1024 + assert(1 == gridDim.x); + assert(0 == blockIdx.x); + + int first = threadIdx.x; + + // limit to maxHitsInModule() + for (int i = first, iend = gpuClustering::maxNumModules; i < iend; i += blockDim.x) { + moduleStart[i + 1] = std::min(gpuClustering::maxHitsInModule(), cluStart[i]); + } + + __shared__ uint32_t ws[32]; + cms::cuda::blockPrefixScan(moduleStart + 1, moduleStart + 1, 1024, ws); + cms::cuda::blockPrefixScan(moduleStart + 1025, moduleStart + 1025, gpuClustering::maxNumModules - 1024, ws); + + for (int i = first + 1025, iend = gpuClustering::maxNumModules + 1; i < iend; i += blockDim.x) { + moduleStart[i] += moduleStart[1024]; + } + __syncthreads(); + +#ifdef GPU_DEBUG + assert(0 == moduleStart[0]); + auto c0 = std::min(gpuClustering::maxHitsInModule(), cluStart[0]); + assert(c0 == moduleStart[1]); + assert(moduleStart[1024] >= moduleStart[1023]); + assert(moduleStart[1025] >= moduleStart[1024]); + assert(moduleStart[gpuClustering::maxNumModules] >= moduleStart[1025]); + + for (int i = first, iend = gpuClustering::maxNumModules + 1; i < iend; i += blockDim.x) { + if (0 != i) + assert(moduleStart[i] >= moduleStart[i - i]); + // [BPX1, BPX2, BPX3, BPX4, FP1, FP2, FP3, FN1, FN2, FN3, LAST_VALID] + // [ 0, 96, 320, 672, 1184, 1296, 1408, 1520, 1632, 1744, 1856] + if (i == 96 || i == 1184 || i == 1744 || i == gpuClustering::maxNumModules) + printf("moduleStart %d %d\n", i, moduleStart[i]); + } +#endif + + // avoid overflow + auto constexpr maxNumClusters = gpuClustering::maxNumClusters; + for (int i = first, iend = gpuClustering::maxNumModules + 1; i < iend; i += blockDim.x) { + moduleStart[i] = std::clamp(moduleStart[i], 0U, maxNumClusters); + } + } + + // Interface to outside + void SiPixelRawToClusterGPUKernel::makeClustersAsync(bool isRun2, + const SiPixelROCsStatusAndMapping *cablingMap, + const unsigned char *modToUnp, + const SiPixelGainForHLTonGPU *gains, + const WordFedAppender &wordFed, + SiPixelFormatterErrors &&errors, + const uint32_t wordCounter, + const uint32_t fedCounter, + bool useQualityInfo, + bool includeErrors, + bool debug, + cudaStream_t stream) { + nDigis = wordCounter; + +#ifdef GPU_DEBUG + std::cout << "decoding " << wordCounter << " digis. Max is " << pixelgpudetails::MAX_FED_WORDS << std::endl; +#endif + + digis_d = SiPixelDigisCUDA(pixelgpudetails::MAX_FED_WORDS, stream); + if (includeErrors) { + digiErrors_d = SiPixelDigiErrorsCUDA(pixelgpudetails::MAX_FED_WORDS, std::move(errors), stream); + } + clusters_d = SiPixelClustersCUDA(gpuClustering::maxNumModules, stream); + + nModules_Clusters_h = cms::cuda::make_host_unique(2, stream); + + if (wordCounter) // protect in case of empty event.... + { + const int threadsPerBlock = 512; + const int blocks = (wordCounter + threadsPerBlock - 1) / threadsPerBlock; // fill it all + + assert(0 == wordCounter % 2); + // wordCounter is the total no of words in each event to be trasfered on device + auto word_d = cms::cuda::make_device_unique(wordCounter, stream); + auto fedId_d = cms::cuda::make_device_unique(wordCounter, stream); + + cudaCheck( + cudaMemcpyAsync(word_d.get(), wordFed.word(), wordCounter * sizeof(uint32_t), cudaMemcpyDefault, stream)); + cudaCheck(cudaMemcpyAsync( + fedId_d.get(), wordFed.fedId(), wordCounter * sizeof(uint8_t) / 2, cudaMemcpyDefault, stream)); + + // Launch rawToDigi kernel + RawToDigi_kernel<<>>( + cablingMap, + modToUnp, + wordCounter, + word_d.get(), + fedId_d.get(), + digis_d.xx(), + digis_d.yy(), + digis_d.adc(), + digis_d.pdigi(), + digis_d.rawIdArr(), + digis_d.moduleInd(), + digiErrors_d.error(), // returns nullptr if default-constructed + useQualityInfo, + includeErrors, + debug); + cudaCheck(cudaGetLastError()); +#ifdef GPU_DEBUG + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); +#endif + + if (includeErrors) { + digiErrors_d.copyErrorToHostAsync(stream); + } + } + // End of Raw2Digi and passing data for clustering + + { + // clusterizer ... + using namespace gpuClustering; + int threadsPerBlock = 256; + int blocks = + (std::max(int(wordCounter), int(gpuClustering::maxNumModules)) + threadsPerBlock - 1) / threadsPerBlock; + + gpuCalibPixel::calibDigis<<>>(isRun2, + digis_d.moduleInd(), + digis_d.xx(), + digis_d.yy(), + digis_d.adc(), + gains, + wordCounter, + clusters_d.moduleStart(), + clusters_d.clusInModule(), + clusters_d.clusModuleStart()); + cudaCheck(cudaGetLastError()); +#ifdef GPU_DEBUG + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); +#endif + +#ifdef GPU_DEBUG + std::cout << "CUDA countModules kernel launch with " << blocks << " blocks of " << threadsPerBlock + << " threads\n"; +#endif + + countModules<<>>( + digis_d.moduleInd(), clusters_d.moduleStart(), digis_d.clus(), wordCounter); + cudaCheck(cudaGetLastError()); + + // read the number of modules into a data member, used by getProduct()) + cudaCheck(cudaMemcpyAsync( + &(nModules_Clusters_h[0]), clusters_d.moduleStart(), sizeof(uint32_t), cudaMemcpyDefault, stream)); + + threadsPerBlock = 256; + blocks = maxNumModules; +#ifdef GPU_DEBUG + std::cout << "CUDA findClus kernel launch with " << blocks << " blocks of " << threadsPerBlock << " threads\n"; +#endif + findClus<<>>(digis_d.moduleInd(), + digis_d.xx(), + digis_d.yy(), + clusters_d.moduleStart(), + clusters_d.clusInModule(), + clusters_d.moduleId(), + digis_d.clus(), + wordCounter); + cudaCheck(cudaGetLastError()); +#ifdef GPU_DEBUG + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); +#endif + + // apply charge cut + clusterChargeCut<<>>(digis_d.moduleInd(), + digis_d.adc(), + clusters_d.moduleStart(), + clusters_d.clusInModule(), + clusters_d.moduleId(), + digis_d.clus(), + wordCounter); + cudaCheck(cudaGetLastError()); + + // count the module start indices already here (instead of + // rechits) so that the number of clusters/hits can be made + // available in the rechit producer without additional points of + // synchronization/ExternalWork + + // MUST be ONE block + fillHitsModuleStart<<<1, 1024, 0, stream>>>(clusters_d.clusInModule(), clusters_d.clusModuleStart()); + + // last element holds the number of all clusters + cudaCheck(cudaMemcpyAsync(&(nModules_Clusters_h[1]), + clusters_d.clusModuleStart() + gpuClustering::maxNumModules, + sizeof(uint32_t), + cudaMemcpyDefault, + stream)); + +#ifdef GPU_DEBUG + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); +#endif + + } // end clusterizer scope + } +} // namespace pixelgpudetails diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h new file mode 100644 index 0000000000000..75eeab2606dd5 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -0,0 +1,212 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_plugins_SiPixelRawToClusterGPUKernel_h +#define RecoLocalTracker_SiPixelClusterizer_plugins_SiPixelRawToClusterGPUKernel_h + +#include +#include + +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" +#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" +#include "FWCore/Utilities/interface/typedefs.h" +#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" + +struct SiPixelROCsStatusAndMapping; +class SiPixelGainForHLTonGPU; + +namespace pixelgpudetails { + + // Phase 1 geometry constants + const uint32_t layerStartBit = 20; + const uint32_t ladderStartBit = 12; + const uint32_t moduleStartBit = 2; + + const uint32_t panelStartBit = 10; + const uint32_t diskStartBit = 18; + const uint32_t bladeStartBit = 12; + + const uint32_t layerMask = 0xF; + const uint32_t ladderMask = 0xFF; + const uint32_t moduleMask = 0x3FF; + const uint32_t panelMask = 0x3; + const uint32_t diskMask = 0xF; + const uint32_t bladeMask = 0x3F; + + const uint32_t LINK_bits = 6; + const uint32_t ROC_bits = 5; + const uint32_t DCOL_bits = 5; + const uint32_t PXID_bits = 8; + const uint32_t ADC_bits = 8; + + // special for layer 1 + const uint32_t LINK_bits_l1 = 6; + const uint32_t ROC_bits_l1 = 5; + const uint32_t COL_bits_l1 = 6; + const uint32_t ROW_bits_l1 = 7; + const uint32_t OMIT_ERR_bits = 1; + + const uint32_t maxROCIndex = 8; + const uint32_t numRowsInRoc = 80; + const uint32_t numColsInRoc = 52; + + const uint32_t MAX_WORD = 2000; + + const uint32_t ADC_shift = 0; + const uint32_t PXID_shift = ADC_shift + ADC_bits; + const uint32_t DCOL_shift = PXID_shift + PXID_bits; + const uint32_t ROC_shift = DCOL_shift + DCOL_bits; + const uint32_t LINK_shift = ROC_shift + ROC_bits_l1; + // special for layer 1 ROC + const uint32_t ROW_shift = ADC_shift + ADC_bits; + const uint32_t COL_shift = ROW_shift + ROW_bits_l1; + const uint32_t OMIT_ERR_shift = 20; + + const uint32_t LINK_mask = ~(~uint32_t(0) << LINK_bits_l1); + const uint32_t ROC_mask = ~(~uint32_t(0) << ROC_bits_l1); + const uint32_t COL_mask = ~(~uint32_t(0) << COL_bits_l1); + const uint32_t ROW_mask = ~(~uint32_t(0) << ROW_bits_l1); + const uint32_t DCOL_mask = ~(~uint32_t(0) << DCOL_bits); + const uint32_t PXID_mask = ~(~uint32_t(0) << PXID_bits); + const uint32_t ADC_mask = ~(~uint32_t(0) << ADC_bits); + const uint32_t ERROR_mask = ~(~uint32_t(0) << ROC_bits_l1); + const uint32_t OMIT_ERR_mask = ~(~uint32_t(0) << OMIT_ERR_bits); + + struct DetIdGPU { + uint32_t rawId; + uint32_t rocInDet; + uint32_t moduleId; + }; + + struct Pixel { + uint32_t row; + uint32_t col; + }; + + class Packing { + public: + using PackedDigiType = uint32_t; + + // Constructor: pre-computes masks and shifts from field widths + __host__ __device__ inline constexpr Packing(unsigned int row_w, + unsigned int column_w, + unsigned int time_w, + unsigned int adc_w) + : row_width(row_w), + column_width(column_w), + adc_width(adc_w), + row_shift(0), + column_shift(row_shift + row_w), + time_shift(column_shift + column_w), + adc_shift(time_shift + time_w), + row_mask(~(~0U << row_w)), + column_mask(~(~0U << column_w)), + time_mask(~(~0U << time_w)), + adc_mask(~(~0U << adc_w)), + rowcol_mask(~(~0U << (column_w + row_w))), + max_row(row_mask), + max_column(column_mask), + max_adc(adc_mask) {} + + uint32_t row_width; + uint32_t column_width; + uint32_t adc_width; + + uint32_t row_shift; + uint32_t column_shift; + uint32_t time_shift; + uint32_t adc_shift; + + PackedDigiType row_mask; + PackedDigiType column_mask; + PackedDigiType time_mask; + PackedDigiType adc_mask; + PackedDigiType rowcol_mask; + + uint32_t max_row; + uint32_t max_column; + uint32_t max_adc; + }; + + __host__ __device__ inline constexpr Packing packing() { return Packing(11, 11, 0, 10); } + + __host__ __device__ inline uint32_t pack(uint32_t row, uint32_t col, uint32_t adc) { + constexpr Packing thePacking = packing(); + adc = std::min(adc, thePacking.max_adc); + + return (row << thePacking.row_shift) | (col << thePacking.column_shift) | (adc << thePacking.adc_shift); + } + + constexpr uint32_t pixelToChannel(int row, int col) { + constexpr Packing thePacking = packing(); + return (row << thePacking.column_width) | col; + } + + class SiPixelRawToClusterGPUKernel { + public: + class WordFedAppender { + public: + WordFedAppender(); + ~WordFedAppender() = default; + + void initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t* src, unsigned int length); + + const unsigned int* word() const { return word_.get(); } + const unsigned char* fedId() const { return fedId_.get(); } + + private: + cms::cuda::host::noncached::unique_ptr word_; + cms::cuda::host::noncached::unique_ptr fedId_; + }; + + SiPixelRawToClusterGPUKernel() = default; + ~SiPixelRawToClusterGPUKernel() = default; + + SiPixelRawToClusterGPUKernel(const SiPixelRawToClusterGPUKernel&) = delete; + SiPixelRawToClusterGPUKernel(SiPixelRawToClusterGPUKernel&&) = delete; + SiPixelRawToClusterGPUKernel& operator=(const SiPixelRawToClusterGPUKernel&) = delete; + SiPixelRawToClusterGPUKernel& operator=(SiPixelRawToClusterGPUKernel&&) = delete; + + void makeClustersAsync(bool isRun2, + const SiPixelROCsStatusAndMapping* cablingMap, + const unsigned char* modToUnp, + const SiPixelGainForHLTonGPU* gains, + const WordFedAppender& wordFed, + SiPixelFormatterErrors&& errors, + const uint32_t wordCounter, + const uint32_t fedCounter, + bool useQualityInfo, + bool includeErrors, + bool debug, + cudaStream_t stream); + + std::pair getResults() { + digis_d.setNModulesDigis(nModules_Clusters_h[0], nDigis); + clusters_d.setNClusters(nModules_Clusters_h[1]); + // need to explicitly deallocate while the associated CUDA + // stream is still alive + // + // technically the statement above is not true anymore now that + // the CUDA streams are cached within the cms::cuda::StreamCache, but it is + // still better to release as early as possible + nModules_Clusters_h.reset(); + return std::make_pair(std::move(digis_d), std::move(clusters_d)); + } + + SiPixelDigiErrorsCUDA&& getErrors() { return std::move(digiErrors_d); } + + private: + uint32_t nDigis = 0; + + // Data to be put in the event + cms::cuda::host::unique_ptr nModules_Clusters_h; + SiPixelDigisCUDA digis_d; + SiPixelClustersCUDA clusters_d; + SiPixelDigiErrorsCUDA digiErrors_d; + }; + +} // namespace pixelgpudetails + +#endif // RecoLocalTracker_SiPixelClusterizer_plugins_SiPixelRawToClusterGPUKernel_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h new file mode 100644 index 0000000000000..c21c792f39c30 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h @@ -0,0 +1,68 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuCalibPixel_h +#define RecoLocalTracker_SiPixelClusterizer_plugins_gpuCalibPixel_h + +#include +#include + +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" + +namespace gpuCalibPixel { + + using gpuClustering::invalidModuleId; + + // valid for run2 + constexpr float VCaltoElectronGain = 47; // L2-4: 47 +- 4.7 + constexpr float VCaltoElectronGain_L1 = 50; // L1: 49.6 +- 2.6 + constexpr float VCaltoElectronOffset = -60; // L2-4: -60 +- 130 + constexpr float VCaltoElectronOffset_L1 = -670; // L1: -670 +- 220 + + __global__ void calibDigis(bool isRun2, + uint16_t* id, + uint16_t const* __restrict__ x, + uint16_t const* __restrict__ y, + uint16_t* adc, + SiPixelGainForHLTonGPU const* __restrict__ ped, + int numElements, + uint32_t* __restrict__ moduleStart, // just to zero first + uint32_t* __restrict__ nClustersInModule, // just to zero them + uint32_t* __restrict__ clusModuleStart // just to zero first + ) { + int first = blockDim.x * blockIdx.x + threadIdx.x; + + // zero for next kernels... + if (0 == first) + clusModuleStart[0] = moduleStart[0] = 0; + for (int i = first; i < gpuClustering::maxNumModules; i += gridDim.x * blockDim.x) { + nClustersInModule[i] = 0; + } + + for (int i = first; i < numElements; i += gridDim.x * blockDim.x) { + if (invalidModuleId == id[i]) + continue; + + float conversionFactor = (isRun2) ? (id[i] < 96 ? VCaltoElectronGain_L1 : VCaltoElectronGain) : 1.f; + float offset = (isRun2) ? (id[i] < 96 ? VCaltoElectronOffset_L1 : VCaltoElectronOffset) : 0; + + bool isDeadColumn = false, isNoisyColumn = false; + + int row = x[i]; + int col = y[i]; + auto ret = ped->getPedAndGain(id[i], col, row, isDeadColumn, isNoisyColumn); + float pedestal = ret.first; + float gain = ret.second; + // float pedestal = 0; float gain = 1.; + if (isDeadColumn | isNoisyColumn) { + id[i] = invalidModuleId; + adc[i] = 0; + printf("bad pixel at %d in %d\n", i, id[i]); + } else { + float vcal = adc[i] * gain - pedestal * gain; + adc[i] = std::max(100, int(vcal * conversionFactor + offset)); + } + } + } +} // namespace gpuCalibPixel + +#endif // RecoLocalTracker_SiPixelClusterizer_plugins_gpuCalibPixel_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h new file mode 100644 index 0000000000000..d9520da80b695 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h @@ -0,0 +1,125 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h +#define RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h + +#include +#include + +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" +#include "HeterogeneousCore/CUDAUtilities/interface/prefixScan.h" + +namespace gpuClustering { + + __global__ void clusterChargeCut( + uint16_t* __restrict__ id, // module id of each pixel (modified if bad cluster) + uint16_t const* __restrict__ adc, // charge of each pixel + uint32_t const* __restrict__ moduleStart, // index of the first pixel of each module + uint32_t* __restrict__ nClustersInModule, // modified: number of clusters found in each module + uint32_t const* __restrict__ moduleId, // module id of each module + int32_t* __restrict__ clusterId, // modified: cluster id of each pixel + uint32_t numElements) { + __shared__ int32_t charge[maxNumClustersPerModules]; + __shared__ uint8_t ok[maxNumClustersPerModules]; + __shared__ uint16_t newclusId[maxNumClustersPerModules]; + + auto firstModule = blockIdx.x; + auto endModule = moduleStart[0]; + for (auto module = firstModule; module < endModule; module += gridDim.x) { + auto firstPixel = moduleStart[1 + module]; + auto thisModuleId = id[firstPixel]; + assert(thisModuleId < maxNumModules); + assert(thisModuleId == moduleId[module]); + + auto nclus = nClustersInModule[thisModuleId]; + if (nclus == 0) + continue; + + if (threadIdx.x == 0 && nclus > maxNumClustersPerModules) + printf("Warning too many clusters in module %d in block %d: %d > %d\n", + thisModuleId, + blockIdx.x, + nclus, + maxNumClustersPerModules); + + auto first = firstPixel + threadIdx.x; + + if (nclus > maxNumClustersPerModules) { + // remove excess FIXME find a way to cut charge first.... + for (auto i = first; i < numElements; i += blockDim.x) { + if (id[i] == invalidModuleId) + continue; // not valid + if (id[i] != thisModuleId) + break; // end of module + if (clusterId[i] >= maxNumClustersPerModules) { + id[i] = invalidModuleId; + clusterId[i] = invalidModuleId; + } + } + nclus = maxNumClustersPerModules; + } + +#ifdef GPU_DEBUG + if (thisModuleId % 100 == 1) + if (threadIdx.x == 0) + printf("start cluster charge cut for module %d in block %d\n", thisModuleId, blockIdx.x); +#endif + + assert(nclus <= maxNumClustersPerModules); + for (auto i = threadIdx.x; i < nclus; i += blockDim.x) { + charge[i] = 0; + } + __syncthreads(); + + for (auto i = first; i < numElements; i += blockDim.x) { + if (id[i] == invalidModuleId) + continue; // not valid + if (id[i] != thisModuleId) + break; // end of module + atomicAdd(&charge[clusterId[i]], adc[i]); + } + __syncthreads(); + + auto chargeCut = thisModuleId < 96 ? 2000 : 4000; // move in constants (calib?) + for (auto i = threadIdx.x; i < nclus; i += blockDim.x) { + newclusId[i] = ok[i] = charge[i] > chargeCut ? 1 : 0; + } + + __syncthreads(); + + // renumber + __shared__ uint16_t ws[32]; + cms::cuda::blockPrefixScan(newclusId, nclus, ws); + + assert(nclus >= newclusId[nclus - 1]); + + if (nclus == newclusId[nclus - 1]) + continue; + + nClustersInModule[thisModuleId] = newclusId[nclus - 1]; + __syncthreads(); + + // mark bad cluster again + for (auto i = threadIdx.x; i < nclus; i += blockDim.x) { + if (0 == ok[i]) + newclusId[i] = invalidModuleId + 1; + } + __syncthreads(); + + // reassign id + for (auto i = first; i < numElements; i += blockDim.x) { + if (id[i] == invalidModuleId) + continue; // not valid + if (id[i] != thisModuleId) + break; // end of module + clusterId[i] = newclusId[clusterId[i]] - 1; + if (clusterId[i] == invalidModuleId) + id[i] = invalidModuleId; + } + + //done + } // loop on modules + } + +} // namespace gpuClustering + +#endif // RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h new file mode 100644 index 0000000000000..8467c1d74e2d9 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h @@ -0,0 +1,303 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuClustering_h +#define RecoLocalTracker_SiPixelClusterizer_plugins_gpuClustering_h + +#include +#include + +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" +#include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" +#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" + +namespace gpuClustering { + +#ifdef GPU_DEBUG + __device__ uint32_t gMaxHit = 0; +#endif + + __global__ void countModules(uint16_t const* __restrict__ id, + uint32_t* __restrict__ moduleStart, + int32_t* __restrict__ clusterId, + int numElements) { + int first = blockDim.x * blockIdx.x + threadIdx.x; + for (int i = first; i < numElements; i += gridDim.x * blockDim.x) { + clusterId[i] = i; + if (invalidModuleId == id[i]) + continue; + auto j = i - 1; + while (j >= 0 and id[j] == invalidModuleId) + --j; + if (j < 0 or id[j] != id[i]) { + // boundary... + auto loc = atomicInc(moduleStart, maxNumModules); + moduleStart[loc + 1] = i; + } + } + } + + __global__ void findClus(uint16_t const* __restrict__ id, // module id of each pixel + uint16_t const* __restrict__ x, // local coordinates of each pixel + uint16_t const* __restrict__ y, // + uint32_t const* __restrict__ moduleStart, // index of the first pixel of each module + uint32_t* __restrict__ nClustersInModule, // output: number of clusters found in each module + uint32_t* __restrict__ moduleId, // output: module id of each module + int32_t* __restrict__ clusterId, // output: cluster id of each pixel + int numElements) { + __shared__ int msize; + + auto firstModule = blockIdx.x; + auto endModule = moduleStart[0]; + for (auto module = firstModule; module < endModule; module += gridDim.x) { + auto firstPixel = moduleStart[1 + module]; + auto thisModuleId = id[firstPixel]; + assert(thisModuleId < maxNumModules); + +#ifdef GPU_DEBUG + if (thisModuleId % 100 == 1) + if (threadIdx.x == 0) + printf("start clusterizer for module %d in block %d\n", thisModuleId, blockIdx.x); +#endif + + auto first = firstPixel + threadIdx.x; + + // find the index of the first pixel not belonging to this module (or invalid) + msize = numElements; + __syncthreads(); + + // skip threads not associated to an existing pixel + for (int i = first; i < numElements; i += blockDim.x) { + if (id[i] == invalidModuleId) // skip invalid pixels + continue; + if (id[i] != thisModuleId) { // find the first pixel in a different module + atomicMin(&msize, i); + break; + } + } + + //init hist (ymax=416 < 512 : 9bits) + constexpr uint32_t maxPixInModule = 4000; + constexpr auto nbins = phase1PixelTopology::numColsInModule + 2; //2+2; + using Hist = cms::cuda::HistoContainer; + __shared__ Hist hist; + __shared__ typename Hist::Counter ws[32]; + for (auto j = threadIdx.x; j < Hist::totbins(); j += blockDim.x) { + hist.off[j] = 0; + } + __syncthreads(); + + assert((msize == numElements) or ((msize < numElements) and (id[msize] != thisModuleId))); + + // limit to maxPixInModule (FIXME if recurrent (and not limited to simulation with low threshold) one will need to implement something cleverer) + if (0 == threadIdx.x) { + if (msize - firstPixel > maxPixInModule) { + printf("too many pixels in module %d: %d > %d\n", thisModuleId, msize - firstPixel, maxPixInModule); + msize = maxPixInModule + firstPixel; + } + } + + __syncthreads(); + assert(msize - firstPixel <= maxPixInModule); + +#ifdef GPU_DEBUG + __shared__ uint32_t totGood; + totGood = 0; + __syncthreads(); +#endif + + // fill histo + for (int i = first; i < msize; i += blockDim.x) { + if (id[i] == invalidModuleId) // skip invalid pixels + continue; + hist.count(y[i]); +#ifdef GPU_DEBUG + atomicAdd(&totGood, 1); +#endif + } + __syncthreads(); + if (threadIdx.x < 32) + ws[threadIdx.x] = 0; // used by prefix scan... + __syncthreads(); + hist.finalize(ws); + __syncthreads(); +#ifdef GPU_DEBUG + assert(hist.size() == totGood); + if (thisModuleId % 100 == 1) + if (threadIdx.x == 0) + printf("histo size %d\n", hist.size()); +#endif + for (int i = first; i < msize; i += blockDim.x) { + if (id[i] == invalidModuleId) // skip invalid pixels + continue; + hist.fill(y[i], i - firstPixel); + } + +#ifdef __CUDA_ARCH__ + // assume that we can cover the whole module with up to 16 blockDim.x-wide iterations + constexpr int maxiter = 16; +#else + auto maxiter = hist.size(); +#endif + // allocate space for duplicate pixels: a pixel can appear more than once with different charge in the same event + constexpr int maxNeighbours = 10; + assert((hist.size() / blockDim.x) <= maxiter); + // nearest neighbour + uint16_t nn[maxiter][maxNeighbours]; + uint8_t nnn[maxiter]; // number of nn + for (uint32_t k = 0; k < maxiter; ++k) + nnn[k] = 0; + + __syncthreads(); // for hit filling! + +#ifdef GPU_DEBUG + // look for anomalous high occupancy + __shared__ uint32_t n40, n60; + n40 = n60 = 0; + __syncthreads(); + for (auto j = threadIdx.x; j < Hist::nbins(); j += blockDim.x) { + if (hist.size(j) > 60) + atomicAdd(&n60, 1); + if (hist.size(j) > 40) + atomicAdd(&n40, 1); + } + __syncthreads(); + if (0 == threadIdx.x) { + if (n60 > 0) + printf("columns with more than 60 px %d in %d\n", n60, thisModuleId); + else if (n40 > 0) + printf("columns with more than 40 px %d in %d\n", n40, thisModuleId); + } + __syncthreads(); +#endif + + // fill NN + for (auto j = threadIdx.x, k = 0U; j < hist.size(); j += blockDim.x, ++k) { + assert(k < maxiter); + auto p = hist.begin() + j; + auto i = *p + firstPixel; + assert(id[i] != invalidModuleId); + assert(id[i] == thisModuleId); // same module + int be = Hist::bin(y[i] + 1); + auto e = hist.end(be); + ++p; + assert(0 == nnn[k]); + for (; p < e; ++p) { + auto m = (*p) + firstPixel; + assert(m != i); + assert(int(y[m]) - int(y[i]) >= 0); + assert(int(y[m]) - int(y[i]) <= 1); + if (std::abs(int(x[m]) - int(x[i])) > 1) + continue; + auto l = nnn[k]++; + assert(l < maxNeighbours); + nn[k][l] = *p; + } + } + + // for each pixel, look at all the pixels until the end of the module; + // when two valid pixels within +/- 1 in x or y are found, set their id to the minimum; + // after the loop, all the pixel in each cluster should have the id equeal to the lowest + // pixel in the cluster ( clus[i] == i ). + bool more = true; + int nloops = 0; + while (__syncthreads_or(more)) { + if (1 == nloops % 2) { + for (auto j = threadIdx.x, k = 0U; j < hist.size(); j += blockDim.x, ++k) { + auto p = hist.begin() + j; + auto i = *p + firstPixel; + auto m = clusterId[i]; + while (m != clusterId[m]) + m = clusterId[m]; + clusterId[i] = m; + } + } else { + more = false; + for (auto j = threadIdx.x, k = 0U; j < hist.size(); j += blockDim.x, ++k) { + auto p = hist.begin() + j; + auto i = *p + firstPixel; + for (int kk = 0; kk < nnn[k]; ++kk) { + auto l = nn[k][kk]; + auto m = l + firstPixel; + assert(m != i); + auto old = atomicMin(&clusterId[m], clusterId[i]); + if (old != clusterId[i]) { + // end the loop only if no changes were applied + more = true; + } + atomicMin(&clusterId[i], old); + } // nnloop + } // pixel loop + } + ++nloops; + } // end while + +#ifdef GPU_DEBUG + { + __shared__ int n0; + if (threadIdx.x == 0) + n0 = nloops; + __syncthreads(); + auto ok = n0 == nloops; + assert(__syncthreads_and(ok)); + if (thisModuleId % 100 == 1) + if (threadIdx.x == 0) + printf("# loops %d\n", nloops); + } +#endif + + __shared__ unsigned int foundClusters; + foundClusters = 0; + __syncthreads(); + + // find the number of different clusters, identified by a pixels with clus[i] == i; + // mark these pixels with a negative id. + for (int i = first; i < msize; i += blockDim.x) { + if (id[i] == invalidModuleId) // skip invalid pixels + continue; + if (clusterId[i] == i) { + auto old = atomicInc(&foundClusters, 0xffffffff); + clusterId[i] = -(old + 1); + } + } + __syncthreads(); + + // propagate the negative id to all the pixels in the cluster. + for (int i = first; i < msize; i += blockDim.x) { + if (id[i] == invalidModuleId) // skip invalid pixels + continue; + if (clusterId[i] >= 0) { + // mark each pixel in a cluster with the same id as the first one + clusterId[i] = clusterId[clusterId[i]]; + } + } + __syncthreads(); + + // adjust the cluster id to be a positive value starting from 0 + for (int i = first; i < msize; i += blockDim.x) { + if (id[i] == invalidModuleId) { // skip invalid pixels + clusterId[i] = -9999; + continue; + } + clusterId[i] = -clusterId[i] - 1; + } + __syncthreads(); + + if (threadIdx.x == 0) { + nClustersInModule[thisModuleId] = foundClusters; + moduleId[module] = thisModuleId; +#ifdef GPU_DEBUG + if (foundClusters > gMaxHit) { + gMaxHit = foundClusters; + if (foundClusters > 8) + printf("max hit %d in %d\n", foundClusters, thisModuleId); + } +#endif +#ifdef GPU_DEBUG + if (thisModuleId % 100 == 1) + printf("%d clusters in module %d\n", foundClusters, thisModuleId); +#endif + } + } // module loop + } +} // namespace gpuClustering + +#endif // RecoLocalTracker_SiPixelClusterizer_plugins_gpuClustering_h diff --git a/RecoLocalTracker/SiPixelClusterizer/python/SiPixelClusterizerPreSplitting_cfi.py b/RecoLocalTracker/SiPixelClusterizer/python/SiPixelClusterizerPreSplitting_cfi.py index ba8d492c5f610..b9c6862b015bf 100644 --- a/RecoLocalTracker/SiPixelClusterizer/python/SiPixelClusterizerPreSplitting_cfi.py +++ b/RecoLocalTracker/SiPixelClusterizer/python/SiPixelClusterizerPreSplitting_cfi.py @@ -1,7 +1,17 @@ - import FWCore.ParameterSet.Config as cms -# from CondTools.SiPixel.SiPixelGainCalibrationService_cfi import * from RecoLocalTracker.SiPixelClusterizer.SiPixelClusterizer_cfi import siPixelClusters as _siPixelClusters -siPixelClustersPreSplitting = _siPixelClusters.clone() +from HeterogeneousCore.CUDACore.SwitchProducerCUDA import SwitchProducerCUDA +siPixelClustersPreSplitting = SwitchProducerCUDA( + cpu = _siPixelClusters.clone() +) + +from Configuration.ProcessModifiers.gpu_cff import gpu +gpu.toModify(siPixelClustersPreSplitting, + cuda = cms.EDAlias( + siPixelDigisClustersPreSplitting = cms.VPSet( + cms.PSet(type = cms.string("SiPixelClusteredmNewDetSetVector")) + ) + ) +) diff --git a/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py b/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py new file mode 100644 index 0000000000000..8bbf47e9ebf90 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py @@ -0,0 +1,26 @@ +import FWCore.ParameterSet.Config as cms + +from RecoLocalTracker.SiPixelClusterizer.SiPixelClusterizerPreSplitting_cfi import siPixelClustersPreSplitting +from RecoLocalTracker.SiPixelClusterizer.siPixelRawToClusterCUDA_cfi import siPixelRawToClusterCUDA as _siPixelRawToClusterCUDA +from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoA_cfi import siPixelDigisClustersFromSoA as _siPixelDigisClustersFromSoA +from CalibTracker.SiPixelESProducers.siPixelROCsStatusAndMappingWrapperESProducer_cfi import * +from CalibTracker.SiPixelESProducers.siPixelGainCalibrationForHLTGPU_cfi import * + +siPixelClustersPreSplittingTask = cms.Task(siPixelClustersPreSplitting) + +siPixelClustersPreSplittingCUDA = _siPixelRawToClusterCUDA.clone() +from Configuration.Eras.Modifier_run3_common_cff import run3_common +run3_common.toModify(siPixelClustersPreSplittingCUDA, + isRun2=False +) + +siPixelDigisClustersPreSplitting = _siPixelDigisClustersFromSoA.clone() +siPixelClustersPreSplittingTaskCUDA = cms.Task( + siPixelClustersPreSplittingCUDA, + siPixelDigisClustersPreSplitting, +) + +from Configuration.ProcessModifiers.gpu_cff import gpu +_siPixelClustersPreSplittingTask_gpu = siPixelClustersPreSplittingTask.copy() +_siPixelClustersPreSplittingTask_gpu.add(siPixelClustersPreSplittingTaskCUDA) +gpu.toReplaceWith(siPixelClustersPreSplittingTask, _siPixelClustersPreSplittingTask_gpu) diff --git a/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml index e4a31cc26cd56..4420adb507027 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml @@ -16,6 +16,7 @@ + @@ -31,3 +32,26 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/RecoLocalTracker/SiPixelClusterizer/test/cpuClustering_t.cpp b/RecoLocalTracker/SiPixelClusterizer/test/cpuClustering_t.cpp new file mode 100644 index 0000000000000..19a3b8d014c9c --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/test/cpuClustering_t.cpp @@ -0,0 +1 @@ +#include "gpuClustering_t.h" diff --git a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.cu b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.cu new file mode 100644 index 0000000000000..19a3b8d014c9c --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.cu @@ -0,0 +1 @@ +#include "gpuClustering_t.h" diff --git a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h new file mode 100644 index 0000000000000..02611ab1cac1d --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h @@ -0,0 +1,382 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __CUDACC__ +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/launch.h" +#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" +#endif // __CUDACC__ + +#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" +#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h" + +int main(void) { +#ifdef __CUDACC__ + cms::cudatest::requireDevices(); +#endif // __CUDACC__ + + using namespace gpuClustering; + + constexpr int numElements = 256 * maxNumModules; + + // these in reality are already on GPU + auto h_id = std::make_unique(numElements); + auto h_x = std::make_unique(numElements); + auto h_y = std::make_unique(numElements); + auto h_adc = std::make_unique(numElements); + auto h_clus = std::make_unique(numElements); + +#ifdef __CUDACC__ + auto d_id = cms::cuda::make_device_unique(numElements, nullptr); + auto d_x = cms::cuda::make_device_unique(numElements, nullptr); + auto d_y = cms::cuda::make_device_unique(numElements, nullptr); + auto d_adc = cms::cuda::make_device_unique(numElements, nullptr); + auto d_clus = cms::cuda::make_device_unique(numElements, nullptr); + auto d_moduleStart = cms::cuda::make_device_unique(maxNumModules + 1, nullptr); + auto d_clusInModule = cms::cuda::make_device_unique(maxNumModules, nullptr); + auto d_moduleId = cms::cuda::make_device_unique(maxNumModules, nullptr); +#else // __CUDACC__ + auto h_moduleStart = std::make_unique(maxNumModules + 1); + auto h_clusInModule = std::make_unique(maxNumModules); + auto h_moduleId = std::make_unique(maxNumModules); +#endif // __CUDACC__ + + // later random number + int n = 0; + int ncl = 0; + int y[10] = {5, 7, 9, 1, 3, 0, 4, 8, 2, 6}; + + auto generateClusters = [&](int kn) { + auto addBigNoise = 1 == kn % 2; + if (addBigNoise) { + constexpr int MaxPixels = 1000; + int id = 666; + for (int x = 0; x < 140; x += 3) { + for (int yy = 0; yy < 400; yy += 3) { + h_id[n] = id; + h_x[n] = x; + h_y[n] = yy; + h_adc[n] = 1000; + ++n; + ++ncl; + if (MaxPixels <= ncl) + break; + } + if (MaxPixels <= ncl) + break; + } + } + + { + // isolated + int id = 42; + int x = 10; + ++ncl; + h_id[n] = id; + h_x[n] = x; + h_y[n] = x; + h_adc[n] = kn == 0 ? 100 : 5000; + ++n; + + // first column + ++ncl; + h_id[n] = id; + h_x[n] = x; + h_y[n] = 0; + h_adc[n] = 5000; + ++n; + // first columns + ++ncl; + h_id[n] = id; + h_x[n] = x + 80; + h_y[n] = 2; + h_adc[n] = 5000; + ++n; + h_id[n] = id; + h_x[n] = x + 80; + h_y[n] = 1; + h_adc[n] = 5000; + ++n; + + // last column + ++ncl; + h_id[n] = id; + h_x[n] = x; + h_y[n] = 415; + h_adc[n] = 5000; + ++n; + // last columns + ++ncl; + h_id[n] = id; + h_x[n] = x + 80; + h_y[n] = 415; + h_adc[n] = 2500; + ++n; + h_id[n] = id; + h_x[n] = x + 80; + h_y[n] = 414; + h_adc[n] = 2500; + ++n; + + // diagonal + ++ncl; + for (int x = 20; x < 25; ++x) { + h_id[n] = id; + h_x[n] = x; + h_y[n] = x; + h_adc[n] = 1000; + ++n; + } + ++ncl; + // reversed + for (int x = 45; x > 40; --x) { + h_id[n] = id; + h_x[n] = x; + h_y[n] = x; + h_adc[n] = 1000; + ++n; + } + ++ncl; + h_id[n++] = invalidModuleId; // error + // messy + int xx[5] = {21, 25, 23, 24, 22}; + for (int k = 0; k < 5; ++k) { + h_id[n] = id; + h_x[n] = xx[k]; + h_y[n] = 20 + xx[k]; + h_adc[n] = 1000; + ++n; + } + // holes + ++ncl; + for (int k = 0; k < 5; ++k) { + h_id[n] = id; + h_x[n] = xx[k]; + h_y[n] = 100; + h_adc[n] = kn == 2 ? 100 : 1000; + ++n; + if (xx[k] % 2 == 0) { + h_id[n] = id; + h_x[n] = xx[k]; + h_y[n] = 101; + h_adc[n] = 1000; + ++n; + } + } + } + { + // id == 0 (make sure it works! + int id = 0; + int x = 10; + ++ncl; + h_id[n] = id; + h_x[n] = x; + h_y[n] = x; + h_adc[n] = 5000; + ++n; + } + // all odd id + for (int id = 11; id <= 1800; id += 2) { + if ((id / 20) % 2) + h_id[n++] = invalidModuleId; // error + for (int x = 0; x < 40; x += 4) { + ++ncl; + if ((id / 10) % 2) { + for (int k = 0; k < 10; ++k) { + h_id[n] = id; + h_x[n] = x; + h_y[n] = x + y[k]; + h_adc[n] = 100; + ++n; + h_id[n] = id; + h_x[n] = x + 1; + h_y[n] = x + y[k] + 2; + h_adc[n] = 1000; + ++n; + } + } else { + for (int k = 0; k < 10; ++k) { + h_id[n] = id; + h_x[n] = x; + h_y[n] = x + y[9 - k]; + h_adc[n] = kn == 2 ? 10 : 1000; + ++n; + if (y[k] == 3) + continue; // hole + if (id == 51) { + h_id[n++] = invalidModuleId; + h_id[n++] = invalidModuleId; + } // error + h_id[n] = id; + h_x[n] = x + 1; + h_y[n] = x + y[k] + 2; + h_adc[n] = kn == 2 ? 10 : 1000; + ++n; + } + } + } + } + }; // end lambda + for (auto kkk = 0; kkk < 5; ++kkk) { + n = 0; + ncl = 0; + generateClusters(kkk); + + std::cout << "created " << n << " digis in " << ncl << " clusters" << std::endl; + assert(n <= numElements); + + uint32_t nModules = 0; +#ifdef __CUDACC__ + size_t size32 = n * sizeof(unsigned int); + size_t size16 = n * sizeof(unsigned short); + // size_t size8 = n * sizeof(uint8_t); + + cudaCheck(cudaMemcpy(d_moduleStart.get(), &nModules, sizeof(uint32_t), cudaMemcpyHostToDevice)); + cudaCheck(cudaMemcpy(d_id.get(), h_id.get(), size16, cudaMemcpyHostToDevice)); + cudaCheck(cudaMemcpy(d_x.get(), h_x.get(), size16, cudaMemcpyHostToDevice)); + cudaCheck(cudaMemcpy(d_y.get(), h_y.get(), size16, cudaMemcpyHostToDevice)); + cudaCheck(cudaMemcpy(d_adc.get(), h_adc.get(), size16, cudaMemcpyHostToDevice)); + + // Launch CUDA Kernels + int threadsPerBlock = (kkk == 5) ? 512 : ((kkk == 3) ? 128 : 256); + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA countModules kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock + << " threads\n"; + + cms::cuda::launch(countModules, {blocksPerGrid, threadsPerBlock}, d_id.get(), d_moduleStart.get(), d_clus.get(), n); + + blocksPerGrid = maxNumModules; //nModules; + + std::cout << "CUDA findModules kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock + << " threads\n"; + cudaCheck(cudaMemset(d_clusInModule.get(), 0, maxNumModules * sizeof(uint32_t))); + + cms::cuda::launch(findClus, + {blocksPerGrid, threadsPerBlock}, + d_id.get(), + d_x.get(), + d_y.get(), + d_moduleStart.get(), + d_clusInModule.get(), + d_moduleId.get(), + d_clus.get(), + n); + cudaDeviceSynchronize(); + cudaCheck(cudaMemcpy(&nModules, d_moduleStart.get(), sizeof(uint32_t), cudaMemcpyDeviceToHost)); + + uint32_t nclus[maxNumModules], moduleId[nModules]; + cudaCheck(cudaMemcpy(&nclus, d_clusInModule.get(), maxNumModules * sizeof(uint32_t), cudaMemcpyDeviceToHost)); + + std::cout << "before charge cut found " << std::accumulate(nclus, nclus + maxNumModules, 0) << " clusters" + << std::endl; + for (auto i = maxNumModules; i > 0; i--) + if (nclus[i - 1] > 0) { + std::cout << "last module is " << i - 1 << ' ' << nclus[i - 1] << std::endl; + break; + } + if (ncl != std::accumulate(nclus, nclus + maxNumModules, 0)) + std::cout << "ERROR!!!!! wrong number of cluster found" << std::endl; + + cms::cuda::launch(clusterChargeCut, + {blocksPerGrid, threadsPerBlock}, + d_id.get(), + d_adc.get(), + d_moduleStart.get(), + d_clusInModule.get(), + d_moduleId.get(), + d_clus.get(), + n); + + cudaDeviceSynchronize(); +#else // __CUDACC__ + h_moduleStart[0] = nModules; + countModules(h_id.get(), h_moduleStart.get(), h_clus.get(), n); + memset(h_clusInModule.get(), 0, maxNumModules * sizeof(uint32_t)); + findClus( + h_id.get(), h_x.get(), h_y.get(), h_moduleStart.get(), h_clusInModule.get(), h_moduleId.get(), h_clus.get(), n); + + nModules = h_moduleStart[0]; + auto nclus = h_clusInModule.get(); + + std::cout << "before charge cut found " << std::accumulate(nclus, nclus + maxNumModules, 0) << " clusters" + << std::endl; + for (auto i = maxNumModules; i > 0; i--) + if (nclus[i - 1] > 0) { + std::cout << "last module is " << i - 1 << ' ' << nclus[i - 1] << std::endl; + break; + } + if (ncl != std::accumulate(nclus, nclus + maxNumModules, 0)) + std::cout << "ERROR!!!!! wrong number of cluster found" << std::endl; + + clusterChargeCut( + h_id.get(), h_adc.get(), h_moduleStart.get(), h_clusInModule.get(), h_moduleId.get(), h_clus.get(), n); +#endif // __CUDACC__ + + std::cout << "found " << nModules << " Modules active" << std::endl; + +#ifdef __CUDACC__ + cudaCheck(cudaMemcpy(h_id.get(), d_id.get(), size16, cudaMemcpyDeviceToHost)); + cudaCheck(cudaMemcpy(h_clus.get(), d_clus.get(), size32, cudaMemcpyDeviceToHost)); + cudaCheck(cudaMemcpy(&nclus, d_clusInModule.get(), maxNumModules * sizeof(uint32_t), cudaMemcpyDeviceToHost)); + cudaCheck(cudaMemcpy(&moduleId, d_moduleId.get(), nModules * sizeof(uint32_t), cudaMemcpyDeviceToHost)); +#endif // __CUDACC__ + + std::set clids; + for (int i = 0; i < n; ++i) { + assert(h_id[i] != 666); // only noise + if (h_id[i] == invalidModuleId) + continue; + assert(h_clus[i] >= 0); + assert(h_clus[i] < int(nclus[h_id[i]])); + clids.insert(h_id[i] * 1000 + h_clus[i]); + // clids.insert(h_clus[i]); + } + + // verify no hole in numbering + auto p = clids.begin(); + auto cmid = (*p) / 1000; + assert(0 == (*p) % 1000); + auto c = p; + ++c; + std::cout << "first clusters " << *p << ' ' << *c << ' ' << nclus[cmid] << ' ' << nclus[(*c) / 1000] << std::endl; + std::cout << "last cluster " << *clids.rbegin() << ' ' << nclus[(*clids.rbegin()) / 1000] << std::endl; + for (; c != clids.end(); ++c) { + auto cc = *c; + auto pp = *p; + auto mid = cc / 1000; + auto pnc = pp % 1000; + auto nc = cc % 1000; + if (mid != cmid) { + assert(0 == cc % 1000); + assert(nclus[cmid] - 1 == pp % 1000); + // if (nclus[cmid]-1 != pp%1000) std::cout << "error size " << mid << ": " << nclus[mid] << ' ' << pp << std::endl; + cmid = mid; + p = c; + continue; + } + p = c; + // assert(nc==pnc+1); + if (nc != pnc + 1) + std::cout << "error " << mid << ": " << nc << ' ' << pnc << std::endl; + } + + std::cout << "found " << std::accumulate(nclus, nclus + maxNumModules, 0) << ' ' << clids.size() << " clusters" + << std::endl; + for (auto i = maxNumModules; i > 0; i--) + if (nclus[i - 1] > 0) { + std::cout << "last module is " << i - 1 << ' ' << nclus[i - 1] << std::endl; + break; + } + // << " and " << seeds.size() << " seeds" << std::endl; + } /// end loop kkk + return 0; +} diff --git a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml index d0f5f096dbb19..d9376d88f7bbd 100644 --- a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml @@ -1,12 +1,14 @@ - - + + + + + - + + - - diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h index 4a7ba119b0a5b..05e59585ba6ba 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h @@ -1,5 +1,5 @@ -#ifndef RecoLocalTracker_SiPixelRecHits_PixelCPEBase_H -#define RecoLocalTracker_SiPixelRecHits_PixelCPEBase_H 1 +#ifndef RecoLocalTracker_SiPixelRecHits_interface_PixelCPEBase_h +#define RecoLocalTracker_SiPixelRecHits_interface_PixelCPEBase_h 1 //----------------------------------------------------------------------------- // \class PixelCPEBase @@ -11,43 +11,32 @@ // Change to use Generic error & Template calibration from DB - D.Fehling 11/08 //----------------------------------------------------------------------------- +#ifdef EDM_ML_DEBUG +#include +#endif +#include #include #include -#include "TMath.h" -#include "RecoLocalTracker/ClusterParameterEstimator/interface/PixelClusterParameterEstimator.h" -#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitQuality.h" +#include +#include "CondFormats/SiPixelObjects/interface/SiPixelGenErrorDBObject.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelLorentzAngle.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelTemplateDBObject.h" +#include "DataFormats/GeometryCommonDetAlgo/interface/MeasurementError.h" +#include "DataFormats/GeometryCommonDetAlgo/interface/MeasurementPoint.h" +#include "DataFormats/GeometrySurface/interface/GloballyPositioned.h" #include "DataFormats/TrackerCommon/interface/TrackerTopology.h" -#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitQuality.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" #include "Geometry/CommonDetUnit/interface/GeomDetType.h" #include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" #include "Geometry/CommonTopologies/interface/PixelTopology.h" #include "Geometry/CommonTopologies/interface/Topology.h" - -//--- For the configuration: -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" - -#include "DataFormats/GeometryCommonDetAlgo/interface/MeasurementPoint.h" -#include "DataFormats/GeometryCommonDetAlgo/interface/MeasurementError.h" -#include "DataFormats/GeometrySurface/interface/GloballyPositioned.h" - -#include "CondFormats/SiPixelObjects/interface/SiPixelLorentzAngle.h" - -// new errors -#include "CondFormats/SiPixelObjects/interface/SiPixelGenErrorDBObject.h" -// old errors -//#include "CondFormats/SiPixelObjects/interface/SiPixelCPEGenericErrorParm.h" - -#include "CondFormats/SiPixelObjects/interface/SiPixelTemplateDBObject.h" - -#include - -#include -#ifdef EDM_ML_DEBUG -#include -#endif +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "RecoLocalTracker/ClusterParameterEstimator/interface/PixelClusterParameterEstimator.h" class RectangularPixelTopology; class MagneticField; @@ -78,11 +67,12 @@ class PixelCPEBase : public PixelClusterParameterEstimator { }; struct ClusterParam { + ClusterParam() {} ClusterParam(const SiPixelCluster& cl) : theCluster(&cl) {} virtual ~ClusterParam() = default; - const SiPixelCluster* theCluster; + const SiPixelCluster* theCluster = nullptr; //--- Cluster-level quantities (filled in computeAnglesFrom....) float cotalpha; @@ -143,7 +133,7 @@ class PixelCPEBase : public PixelClusterParameterEstimator { inline ReturnType getParameters(const SiPixelCluster& cl, const GeomDetUnit& det) const override { #ifdef EDM_ML_DEBUG nRecHitsTotal_++; - //std::cout<<" in PixelCPEBase:localParameters(all) - "< + +#include "CalibTracker/SiPixelESProducers/interface/SiPixelCPEGenericDBErrorParametrization.h" +#include "CondFormats/SiPixelTransient/interface/SiPixelGenError.h" +#include "CondFormats/SiPixelTransient/interface/SiPixelTemplate.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" + +class MagneticField; +class PixelCPEFast final : public PixelCPEBase { +public: + struct ClusterParamGeneric : ClusterParam { + ClusterParamGeneric() {} + ClusterParamGeneric(const SiPixelCluster &cl) : ClusterParam(cl) {} + + // The truncation value pix_maximum is an angle-dependent cutoff on the + // individual pixel signals. It should be applied to all pixels in the + // cluster [signal_i = fminf(signal_i, pixmax)] before the column and row + // sums are made. Morris + int pixmx; + + // These are errors predicted by PIXELAV + float sigmay; // CPE Generic y-error for multi-pixel cluster + float sigmax; // CPE Generic x-error for multi-pixel cluster + float sy1; // CPE Generic y-error for single single-pixel + float sy2; // CPE Generic y-error for single double-pixel cluster + float sx1; // CPE Generic x-error for single single-pixel cluster + float sx2; // CPE Generic x-error for single double-pixel cluster + }; + + PixelCPEFast(edm::ParameterSet const &conf, + const MagneticField *, + const TrackerGeometry &, + const TrackerTopology &, + const SiPixelLorentzAngle *, + const SiPixelGenErrorDBObject *, + const SiPixelLorentzAngle *); + + ~PixelCPEFast() override = default; + + static void fillPSetDescription(edm::ParameterSetDescription &desc); + + // The return value can only be used safely in kernels launched on + // the same cudaStream, or after cudaStreamSynchronize. + const pixelCPEforGPU::ParamsOnGPU *getGPUProductAsync(cudaStream_t cudaStream) const; + + pixelCPEforGPU::ParamsOnGPU const &getCPUProduct() const { return cpuData_; } + +private: + std::unique_ptr createClusterParam(const SiPixelCluster &cl) const override; + + LocalPoint localPosition(DetParam const &theDetParam, ClusterParam &theClusterParam) const override; + LocalError localError(DetParam const &theDetParam, ClusterParam &theClusterParam) const override; + + void errorFromTemplates(DetParam const &theDetParam, ClusterParamGeneric &theClusterParam, float qclus) const; + + static void collect_edge_charges(ClusterParam &theClusterParam, //!< input, the cluster + int &q_f_X, //!< output, Q first in X + int &q_l_X, //!< output, Q last in X + int &q_f_Y, //!< output, Q first in Y + int &q_l_Y, //!< output, Q last in Y + bool truncate); + + const float edgeClusterErrorX_; + const float edgeClusterErrorY_; + const bool useErrorsFromTemplates_; + const bool truncatePixelCharge_; + + std::vector xerr_barrel_l1_, yerr_barrel_l1_, xerr_barrel_ln_; + std::vector yerr_barrel_ln_, xerr_endcap_, yerr_endcap_; + float xerr_barrel_l1_def_, yerr_barrel_l1_def_, xerr_barrel_ln_def_; + float yerr_barrel_ln_def_, xerr_endcap_def_, yerr_endcap_def_; + + //--- DB Error Parametrization object, new light templates + std::vector thePixelGenError_; + + // allocate this with posix malloc to be compatible with the cpu workflow + std::vector detParamsGPU_; + pixelCPEforGPU::CommonParams commonParamsGPU_; + pixelCPEforGPU::LayerGeometry layerGeometry_; + pixelCPEforGPU::AverageGeometry averageGeometry_; + pixelCPEforGPU::ParamsOnGPU cpuData_; + + struct GPUData { + ~GPUData(); + // not needed if not used on CPU... + pixelCPEforGPU::ParamsOnGPU paramsOnGPU_h; + pixelCPEforGPU::ParamsOnGPU *paramsOnGPU_d = nullptr; // copy of the above on the Device + }; + cms::cuda::ESProduct gpuData_; + + void fillParamsForGpu(); +}; + +#endif // RecoLocalTracker_SiPixelRecHits_PixelCPEFast_h diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h new file mode 100644 index 0000000000000..03e136d8d23ef --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h @@ -0,0 +1,343 @@ +#ifndef RecoLocalTracker_SiPixelRecHits_pixelCPEforGPU_h +#define RecoLocalTracker_SiPixelRecHits_pixelCPEforGPU_h + +#include +#include +#include +#include + +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" +#include "DataFormats/GeometrySurface/interface/SOARotation.h" +#include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" + +namespace pixelCPEforGPU { + + using Frame = SOAFrame; + using Rotation = SOARotation; + + // all modules are identical! + struct CommonParams { + float theThicknessB; + float theThicknessE; + float thePitchX; + float thePitchY; + }; + + struct DetParams { + bool isBarrel; + bool isPosZ; + uint16_t layer; + uint16_t index; + uint32_t rawId; + + float shiftX; + float shiftY; + float chargeWidthX; + float chargeWidthY; + uint16_t pixmx; // max pix charge + + float x0, y0, z0; // the vertex in the local coord of the detector + + float sx[3], sy[3]; // the errors... + + Frame frame; + }; + + using phase1PixelTopology::AverageGeometry; + + struct LayerGeometry { + uint32_t layerStart[phase1PixelTopology::numberOfLayers + 1]; + uint8_t layer[phase1PixelTopology::layerIndexSize]; + }; + + struct ParamsOnGPU { + CommonParams const* m_commonParams; + DetParams const* m_detParams; + LayerGeometry const* m_layerGeometry; + AverageGeometry const* m_averageGeometry; + + constexpr CommonParams const& __restrict__ commonParams() const { + CommonParams const* __restrict__ l = m_commonParams; + return *l; + } + constexpr DetParams const& __restrict__ detParams(int i) const { + DetParams const* __restrict__ l = m_detParams; + return l[i]; + } + constexpr LayerGeometry const& __restrict__ layerGeometry() const { return *m_layerGeometry; } + constexpr AverageGeometry const& __restrict__ averageGeometry() const { return *m_averageGeometry; } + + __device__ uint8_t layer(uint16_t id) const { + return __ldg(m_layerGeometry->layer + id / phase1PixelTopology::maxModuleStride); + }; + }; + + // SOA (on device) + template + struct ClusParamsT { + uint32_t minRow[N]; + uint32_t maxRow[N]; + uint32_t minCol[N]; + uint32_t maxCol[N]; + + int32_t q_f_X[N]; + int32_t q_l_X[N]; + int32_t q_f_Y[N]; + int32_t q_l_Y[N]; + + int32_t charge[N]; + + float xpos[N]; + float ypos[N]; + + float xerr[N]; + float yerr[N]; + + int16_t xsize[N]; // clipped at 127 if negative is edge.... + int16_t ysize[N]; + }; + + constexpr int32_t MaxHitsInIter = gpuClustering::maxHitsInIter(); + using ClusParams = ClusParamsT; + + constexpr inline void computeAnglesFromDet( + DetParams const& __restrict__ detParams, float const x, float const y, float& cotalpha, float& cotbeta) { + // x,y local position on det + auto gvx = x - detParams.x0; + auto gvy = y - detParams.y0; + auto gvz = -1.f / detParams.z0; + // normalization not required as only ratio used... + // calculate angles + cotalpha = gvx * gvz; + cotbeta = gvy * gvz; + } + + constexpr inline float correction(int sizeM1, + int q_f, //!< Charge in the first pixel. + int q_l, //!< Charge in the last pixel. + uint16_t upper_edge_first_pix, //!< As the name says. + uint16_t lower_edge_last_pix, //!< As the name says. + float lorentz_shift, //!< L-shift at half thickness + float theThickness, //detector thickness + float cot_angle, //!< cot of alpha_ or beta_ + float pitch, //!< thePitchX or thePitchY + bool first_is_big, //!< true if the first is big + bool last_is_big) //!< true if the last is big + { + if (0 == sizeM1) // size 1 + return 0; + + float w_eff = 0; + bool simple = true; + if (1 == sizeM1) { // size 2 + //--- Width of the clusters minus the edge (first and last) pixels. + //--- In the note, they are denoted x_F and x_L (and y_F and y_L) + // assert(lower_edge_last_pix >= upper_edge_first_pix); + auto w_inner = pitch * float(lower_edge_last_pix - upper_edge_first_pix); // in cm + + //--- Predicted charge width from geometry + auto w_pred = theThickness * cot_angle // geometric correction (in cm) + - lorentz_shift; // (in cm) &&& check fpix! + + w_eff = std::abs(w_pred) - w_inner; + + //--- If the observed charge width is inconsistent with the expectations + //--- based on the track, do *not* use w_pred-w_inner. Instead, replace + //--- it with an *average* effective charge width, which is the average + //--- length of the edge pixels. + + // this can produce "large" regressions for very small numeric differences + simple = (w_eff < 0.0f) | (w_eff > pitch); + } + + if (simple) { + //--- Total length of the two edge pixels (first+last) + float sum_of_edge = 2.0f; + if (first_is_big) + sum_of_edge += 1.0f; + if (last_is_big) + sum_of_edge += 1.0f; + w_eff = pitch * 0.5f * sum_of_edge; // ave. length of edge pixels (first+last) (cm) + } + + //--- Finally, compute the position in this projection + float qdiff = q_l - q_f; + float qsum = q_l + q_f; + + //--- Temporary fix for clusters with both first and last pixel with charge = 0 + if (qsum == 0) + qsum = 1.0f; + + return 0.5f * (qdiff / qsum) * w_eff; + } + + constexpr inline void position(CommonParams const& __restrict__ comParams, + DetParams const& __restrict__ detParams, + ClusParams& cp, + uint32_t ic) { + //--- Upper Right corner of Lower Left pixel -- in measurement frame + uint16_t llx = cp.minRow[ic] + 1; + uint16_t lly = cp.minCol[ic] + 1; + + //--- Lower Left corner of Upper Right pixel -- in measurement frame + uint16_t urx = cp.maxRow[ic]; + uint16_t ury = cp.maxCol[ic]; + + auto llxl = phase1PixelTopology::localX(llx); + auto llyl = phase1PixelTopology::localY(lly); + auto urxl = phase1PixelTopology::localX(urx); + auto uryl = phase1PixelTopology::localY(ury); + + auto mx = llxl + urxl; + auto my = llyl + uryl; + + auto xsize = int(urxl) + 2 - int(llxl); + auto ysize = int(uryl) + 2 - int(llyl); + assert(xsize >= 0); // 0 if bixpix... + assert(ysize >= 0); + + if (phase1PixelTopology::isBigPixX(cp.minRow[ic])) + ++xsize; + if (phase1PixelTopology::isBigPixX(cp.maxRow[ic])) + ++xsize; + if (phase1PixelTopology::isBigPixY(cp.minCol[ic])) + ++ysize; + if (phase1PixelTopology::isBigPixY(cp.maxCol[ic])) + ++ysize; + + int unbalanceX = 8. * std::abs(float(cp.q_f_X[ic] - cp.q_l_X[ic])) / float(cp.q_f_X[ic] + cp.q_l_X[ic]); + int unbalanceY = 8. * std::abs(float(cp.q_f_Y[ic] - cp.q_l_Y[ic])) / float(cp.q_f_Y[ic] + cp.q_l_Y[ic]); + xsize = 8 * xsize - unbalanceX; + ysize = 8 * ysize - unbalanceY; + + cp.xsize[ic] = std::min(xsize, 1023); + cp.ysize[ic] = std::min(ysize, 1023); + + if (cp.minRow[ic] == 0 || cp.maxRow[ic] == phase1PixelTopology::lastRowInModule) + cp.xsize[ic] = -cp.xsize[ic]; + if (cp.minCol[ic] == 0 || cp.maxCol[ic] == phase1PixelTopology::lastColInModule) + cp.ysize[ic] = -cp.ysize[ic]; + + // apply the lorentz offset correction + auto xPos = detParams.shiftX + comParams.thePitchX * (0.5f * float(mx) + float(phase1PixelTopology::xOffset)); + auto yPos = detParams.shiftY + comParams.thePitchY * (0.5f * float(my) + float(phase1PixelTopology::yOffset)); + + float cotalpha = 0, cotbeta = 0; + + computeAnglesFromDet(detParams, xPos, yPos, cotalpha, cotbeta); + + auto thickness = detParams.isBarrel ? comParams.theThicknessB : comParams.theThicknessE; + + auto xcorr = correction(cp.maxRow[ic] - cp.minRow[ic], + cp.q_f_X[ic], + cp.q_l_X[ic], + llxl, + urxl, + detParams.chargeWidthX, // lorentz shift in cm + thickness, + cotalpha, + comParams.thePitchX, + phase1PixelTopology::isBigPixX(cp.minRow[ic]), + phase1PixelTopology::isBigPixX(cp.maxRow[ic])); + + auto ycorr = correction(cp.maxCol[ic] - cp.minCol[ic], + cp.q_f_Y[ic], + cp.q_l_Y[ic], + llyl, + uryl, + detParams.chargeWidthY, // lorentz shift in cm + thickness, + cotbeta, + comParams.thePitchY, + phase1PixelTopology::isBigPixY(cp.minCol[ic]), + phase1PixelTopology::isBigPixY(cp.maxCol[ic])); + + cp.xpos[ic] = xPos + xcorr; + cp.ypos[ic] = yPos + ycorr; + } + + constexpr inline void errorFromSize(CommonParams const& __restrict__ comParams, + DetParams const& __restrict__ detParams, + ClusParams& cp, + uint32_t ic) { + // Edge cluster errors + cp.xerr[ic] = 0.0050; + cp.yerr[ic] = 0.0085; + + // FIXME these are errors form Run1 + constexpr float xerr_barrel_l1[] = {0.00115, 0.00120, 0.00088}; + constexpr float xerr_barrel_l1_def = 0.00200; // 0.01030; + constexpr float yerr_barrel_l1[] = { + 0.00375, 0.00230, 0.00250, 0.00250, 0.00230, 0.00230, 0.00210, 0.00210, 0.00240}; + constexpr float yerr_barrel_l1_def = 0.00210; + constexpr float xerr_barrel_ln[] = {0.00115, 0.00120, 0.00088}; + constexpr float xerr_barrel_ln_def = 0.00200; // 0.01030; + constexpr float yerr_barrel_ln[] = { + 0.00375, 0.00230, 0.00250, 0.00250, 0.00230, 0.00230, 0.00210, 0.00210, 0.00240}; + constexpr float yerr_barrel_ln_def = 0.00210; + constexpr float xerr_endcap[] = {0.0020, 0.0020}; + constexpr float xerr_endcap_def = 0.0020; + constexpr float yerr_endcap[] = {0.00210}; + constexpr float yerr_endcap_def = 0.00210; + + auto sx = cp.maxRow[ic] - cp.minRow[ic]; + auto sy = cp.maxCol[ic] - cp.minCol[ic]; + + // is edgy ? + bool isEdgeX = cp.minRow[ic] == 0 or cp.maxRow[ic] == phase1PixelTopology::lastRowInModule; + bool isEdgeY = cp.minCol[ic] == 0 or cp.maxCol[ic] == phase1PixelTopology::lastColInModule; + // is one and big? + bool isBig1X = (0 == sx) && phase1PixelTopology::isBigPixX(cp.minRow[ic]); + bool isBig1Y = (0 == sy) && phase1PixelTopology::isBigPixY(cp.minCol[ic]); + + if (!isEdgeX && !isBig1X) { + if (not detParams.isBarrel) { + cp.xerr[ic] = sx < std::size(xerr_endcap) ? xerr_endcap[sx] : xerr_endcap_def; + } else if (detParams.layer == 1) { + cp.xerr[ic] = sx < std::size(xerr_barrel_l1) ? xerr_barrel_l1[sx] : xerr_barrel_l1_def; + } else { + cp.xerr[ic] = sx < std::size(xerr_barrel_ln) ? xerr_barrel_ln[sx] : xerr_barrel_ln_def; + } + } + + if (!isEdgeY && !isBig1Y) { + if (not detParams.isBarrel) { + cp.yerr[ic] = sy < std::size(yerr_endcap) ? yerr_endcap[sy] : yerr_endcap_def; + } else if (detParams.layer == 1) { + cp.yerr[ic] = sy < std::size(yerr_barrel_l1) ? yerr_barrel_l1[sy] : yerr_barrel_l1_def; + } else { + cp.yerr[ic] = sy < std::size(yerr_barrel_ln) ? yerr_barrel_ln[sy] : yerr_barrel_ln_def; + } + } + } + + constexpr inline void errorFromDB(CommonParams const& __restrict__ comParams, + DetParams const& __restrict__ detParams, + ClusParams& cp, + uint32_t ic) { + // Edge cluster errors + cp.xerr[ic] = 0.0050f; + cp.yerr[ic] = 0.0085f; + + auto sx = cp.maxRow[ic] - cp.minRow[ic]; + auto sy = cp.maxCol[ic] - cp.minCol[ic]; + + // is edgy ? + bool isEdgeX = cp.minRow[ic] == 0 or cp.maxRow[ic] == phase1PixelTopology::lastRowInModule; + bool isEdgeY = cp.minCol[ic] == 0 or cp.maxCol[ic] == phase1PixelTopology::lastColInModule; + // is one and big? + uint32_t ix = (0 == sx); + uint32_t iy = (0 == sy); + ix += (0 == sx) && phase1PixelTopology::isBigPixX(cp.minRow[ic]); + iy += (0 == sy) && phase1PixelTopology::isBigPixY(cp.minCol[ic]); + + if (not isEdgeX) + cp.xerr[ic] = detParams.sx[ix]; + if (not isEdgeY) + cp.yerr[ic] = detParams.sy[iy]; + } + +} // namespace pixelCPEforGPU + +#endif // RecoLocalTracker_SiPixelRecHits_pixelCPEforGPU_h diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml index e02a0b722c1ae..4457b02203e66 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml @@ -1,7 +1,12 @@ + + + + + - + diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc b/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc new file mode 100644 index 0000000000000..332baabe8842a --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc @@ -0,0 +1,98 @@ +#include +#include + +#include "CondFormats/DataRecord/interface/SiPixelGenErrorDBObjectRcd.h" +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/ESProducer.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/ModuleFactory.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "MagneticField/Engine/interface/MagneticField.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" +#include "RecoLocalTracker/ClusterParameterEstimator/interface/PixelClusterParameterEstimator.h" +#include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" + +class PixelCPEFastESProducer : public edm::ESProducer { +public: + PixelCPEFastESProducer(const edm::ParameterSet& p); + std::unique_ptr produce(const TkPixelCPERecord&); + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + edm::ESGetToken magfieldToken_; + edm::ESGetToken pDDToken_; + edm::ESGetToken hTTToken_; + edm::ESGetToken lorentzAngleToken_; + edm::ESGetToken lorentzAngleWidthToken_; + edm::ESGetToken genErrorDBObjectToken_; + + edm::ParameterSet pset_; + bool useErrorsFromTemplates_; +}; + +using namespace edm; + +PixelCPEFastESProducer::PixelCPEFastESProducer(const edm::ParameterSet& p) : pset_(p) { + auto const& myname = p.getParameter("ComponentName"); + auto const& magname = p.getParameter("MagneticFieldRecord"); + useErrorsFromTemplates_ = p.getParameter("UseErrorsFromTemplates"); + + auto cc = setWhatProduced(this, myname); + magfieldToken_ = cc.consumes(magname); + pDDToken_ = cc.consumes(); + hTTToken_ = cc.consumes(); + lorentzAngleToken_ = cc.consumes(edm::ESInputTag("")); + lorentzAngleWidthToken_ = cc.consumes(edm::ESInputTag("", "forWidth")); + if (useErrorsFromTemplates_) { + genErrorDBObjectToken_ = cc.consumes(); + } +} + +std::unique_ptr PixelCPEFastESProducer::produce(const TkPixelCPERecord& iRecord) { + // add the new la width object + const SiPixelLorentzAngle* lorentzAngleWidthProduct = nullptr; + lorentzAngleWidthProduct = &iRecord.get(lorentzAngleWidthToken_); + + const SiPixelGenErrorDBObject* genErrorDBObjectProduct = nullptr; + + // Errors take only from new GenError + if (useErrorsFromTemplates_) { // do only when generrors are needed + genErrorDBObjectProduct = &iRecord.get(genErrorDBObjectToken_); + //} else { + //std::cout<<" pass an empty GenError pointer"<(pset_, + &iRecord.get(magfieldToken_), + iRecord.get(pDDToken_), + iRecord.get(hTTToken_), + &iRecord.get(lorentzAngleToken_), + genErrorDBObjectProduct, + lorentzAngleWidthProduct); +} + +void PixelCPEFastESProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + // from PixelCPEBase + PixelCPEBase::fillPSetDescription(desc); + + // used by PixelCPEFast + desc.add("EdgeClusterErrorX", 50.0); + desc.add("EdgeClusterErrorY", 85.0); + desc.add("UseErrorsFromTemplates", true); + desc.add("TruncatePixelCharge", true); + + // specific to PixelCPEFastESProducer + desc.add("ComponentName", "PixelCPEFast"); + desc.add("MagneticFieldRecord", edm::ESInputTag()); + desc.add("useLAAlignmentOffsets", false); + desc.add("DoLorentz", false); + + descriptions.add("PixelCPEFastESProducer", desc); +} + +DEFINE_FWK_EVENTSETUP_MODULE(PixelCPEFastESProducer); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.cu b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.cu new file mode 100644 index 0000000000000..f75d5e3b3bef7 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.cu @@ -0,0 +1,78 @@ +// C++ headers +#include +#include + +// CUDA runtime +#include + +// CMSSW headers +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h" + +#include "PixelRecHitGPUKernel.h" +#include "gpuPixelRecHits.h" + +namespace { + __global__ void setHitsLayerStart(uint32_t const* __restrict__ hitsModuleStart, + pixelCPEforGPU::ParamsOnGPU const* cpeParams, + uint32_t* hitsLayerStart) { + auto i = blockIdx.x * blockDim.x + threadIdx.x; + + assert(0 == hitsModuleStart[0]); + + if (i < 11) { + hitsLayerStart[i] = hitsModuleStart[cpeParams->layerGeometry().layerStart[i]]; +#ifdef GPU_DEBUG + printf("LayerStart %d %d: %d\n", i, cpeParams->layerGeometry().layerStart[i], hitsLayerStart[i]); +#endif + } + } +} // namespace + +namespace pixelgpudetails { + + TrackingRecHit2DCUDA PixelRecHitGPUKernel::makeHitsAsync(SiPixelDigisCUDA const& digis_d, + SiPixelClustersCUDA const& clusters_d, + BeamSpotCUDA const& bs_d, + pixelCPEforGPU::ParamsOnGPU const* cpeParams, + cudaStream_t stream) const { + auto nHits = clusters_d.nClusters(); + TrackingRecHit2DCUDA hits_d(nHits, cpeParams, clusters_d.clusModuleStart(), stream); + + int threadsPerBlock = 128; + int blocks = digis_d.nModules(); // active modules (with digis) + +#ifdef GPU_DEBUG + std::cout << "launching getHits kernel for " << blocks << " blocks" << std::endl; +#endif + if (blocks) // protect from empty events + gpuPixelRecHits::getHits<<>>( + cpeParams, bs_d.data(), digis_d.view(), digis_d.nDigis(), clusters_d.view(), hits_d.view()); + cudaCheck(cudaGetLastError()); +#ifdef GPU_DEBUG + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); +#endif + + // assuming full warp of threads is better than a smaller number... + if (nHits) { + setHitsLayerStart<<<1, 32, 0, stream>>>(clusters_d.clusModuleStart(), cpeParams, hits_d.hitsLayerStart()); + cudaCheck(cudaGetLastError()); + } + + if (nHits) { + cms::cuda::fillManyFromVector(hits_d.phiBinner(), 10, hits_d.iphi(), hits_d.hitsLayerStart(), nHits, 256, stream); + cudaCheck(cudaGetLastError()); + } + +#ifdef GPU_DEBUG + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); +#endif + + return hits_d; + } + +} // namespace pixelgpudetails diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.h b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.h new file mode 100644 index 0000000000000..61bc8b58bb7d6 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.h @@ -0,0 +1,33 @@ +#ifndef RecoLocalTracker_SiPixelRecHits_plugins_PixelRecHitGPUKernel_h +#define RecoLocalTracker_SiPixelRecHits_plugins_PixelRecHitGPUKernel_h + +#include + +#include + +#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" +#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" + +namespace pixelgpudetails { + + class PixelRecHitGPUKernel { + public: + PixelRecHitGPUKernel() = default; + ~PixelRecHitGPUKernel() = default; + + PixelRecHitGPUKernel(const PixelRecHitGPUKernel&) = delete; + PixelRecHitGPUKernel(PixelRecHitGPUKernel&&) = delete; + PixelRecHitGPUKernel& operator=(const PixelRecHitGPUKernel&) = delete; + PixelRecHitGPUKernel& operator=(PixelRecHitGPUKernel&&) = delete; + + TrackingRecHit2DCUDA makeHitsAsync(SiPixelDigisCUDA const& digis_d, + SiPixelClustersCUDA const& clusters_d, + BeamSpotCUDA const& bs_d, + pixelCPEforGPU::ParamsOnGPU const* cpeParams, + cudaStream_t stream) const; + }; +} // namespace pixelgpudetails + +#endif // RecoLocalTracker_SiPixelRecHits_plugins_PixelRecHitGPUKernel_h diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc new file mode 100644 index 0000000000000..09b90526bf7db --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc @@ -0,0 +1,92 @@ +#include + +#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" +#include "CUDADataFormats/Common/interface/Product.h" +#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" +#include "DataFormats/Common/interface/Handle.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" +#include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" + +#include "PixelRecHitGPUKernel.h" + +class SiPixelRecHitCUDA : public edm::global::EDProducer<> { +public: + explicit SiPixelRecHitCUDA(const edm::ParameterSet& iConfig); + ~SiPixelRecHitCUDA() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + + const edm::ESGetToken cpeToken_; + const edm::EDGetTokenT> tBeamSpot; + const edm::EDGetTokenT> token_; + const edm::EDGetTokenT> tokenDigi_; + const edm::EDPutTokenT> tokenHit_; + + const pixelgpudetails::PixelRecHitGPUKernel gpuAlgo_; +}; + +SiPixelRecHitCUDA::SiPixelRecHitCUDA(const edm::ParameterSet& iConfig) + : cpeToken_(esConsumes(edm::ESInputTag("", iConfig.getParameter("CPE")))), + tBeamSpot(consumes>(iConfig.getParameter("beamSpot"))), + token_(consumes>(iConfig.getParameter("src"))), + tokenDigi_(consumes>(iConfig.getParameter("src"))), + tokenHit_(produces>()) {} + +void SiPixelRecHitCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("beamSpot", edm::InputTag("offlineBeamSpotCUDA")); + desc.add("src", edm::InputTag("siPixelClustersPreSplittingCUDA")); + desc.add("CPE", "PixelCPEFast"); + descriptions.add("siPixelRecHitCUDA", desc); +} + +void SiPixelRecHitCUDA::produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& es) const { + PixelCPEFast const* fcpe = dynamic_cast(&es.getData(cpeToken_)); + if (not fcpe) { + throw cms::Exception("Configuration") << "SiPixelRecHitSoAFromLegacy can only use a CPE of type PixelCPEFast"; + } + + edm::Handle> hclusters; + iEvent.getByToken(token_, hclusters); + + cms::cuda::ScopedContextProduce ctx{*hclusters}; + auto const& clusters = ctx.get(*hclusters); + + edm::Handle> hdigis; + iEvent.getByToken(tokenDigi_, hdigis); + auto const& digis = ctx.get(*hdigis); + + edm::Handle> hbs; + iEvent.getByToken(tBeamSpot, hbs); + auto const& bs = ctx.get(*hbs); + + auto nHits = clusters.nClusters(); + if (nHits >= TrackingRecHit2DSOAView::maxHits()) { + edm::LogWarning("PixelRecHitGPUKernel") + << "Clusters/Hits Overflow " << nHits << " >= " << TrackingRecHit2DSOAView::maxHits(); + } + + ctx.emplace(iEvent, + tokenHit_, + gpuAlgo_.makeHitsAsync(digis, clusters, bs, fcpe->getGPUProductAsync(ctx.stream()), ctx.stream())); +} + +DEFINE_FWK_MODULE(SiPixelRecHitCUDA); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc index 42efbd12c2e2d..8c16be54e5774 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc @@ -82,6 +82,10 @@ #include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" +// Make heterogeneous framework happy +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" +#include "CUDADataFormats/Common/interface/HostProduct.h" + using namespace std; namespace cms { @@ -104,22 +108,21 @@ namespace cms { void produce(edm::Event& e, const edm::EventSetup& c) override; //--- Execute the position estimator algorithm(s). - //--- New interface with DetSetVector - void run(const edmNew::DetSetVector& input, - SiPixelRecHitCollectionNew& output, - TrackerGeometry const& geom); - - void run(edm::Handle> inputhandle, + void run(edm::Event& e, + edm::Handle> inputhandle, SiPixelRecHitCollectionNew& output, TrackerGeometry const& geom); private: + using HMSstorage = HostProduct; + // TO DO: maybe allow a map of pointers? /// const PixelClusterParameterEstimator * cpe_; // what we got (for now, one ptr to base class) PixelCPEBase const* cpe_ = nullptr; // What we got (for now, one ptr to base class) edm::InputTag const src_; edm::EDGetTokenT> const tPixelCluster_; edm::EDPutTokenT const tPut_; + edm::EDPutTokenT const tHost_; edm::ESGetToken const tTrackerGeom_; edm::ESGetToken const tCPE_; bool m_newCont; // save also in emdNew::DetSetVector @@ -132,6 +135,7 @@ namespace cms { : src_(conf.getParameter("src")), tPixelCluster_(consumes>(src_)), tPut_(produces()), + tHost_(produces()), tTrackerGeom_(esConsumes()), tCPE_(esConsumes( edm::ESInputTag("", conf.getParameter("CPE")))) {} @@ -159,7 +163,7 @@ namespace cms { // Step C: Iterate over DetIds and invoke the strip CPE algorithm // on each DetUnit - run(input, output, geom); + run(e, input, output, geom); output.shrink_to_fit(); e.emplace(tPut_, std::move(output)); @@ -170,7 +174,8 @@ namespace cms { //! and make a RecHit to store the result. //! New interface reading DetSetVector by V.Chiochia (May 30th, 2006) //--------------------------------------------------------------------------- - void SiPixelRecHitConverter::run(edm::Handle> inputhandle, + void SiPixelRecHitConverter::run(edm::Event& iEvent, + edm::Handle> inputhandle, SiPixelRecHitCollectionNew& output, TrackerGeometry const& geom) { if (!cpe_) { @@ -185,18 +190,45 @@ namespace cms { const edmNew::DetSetVector& input = *inputhandle; - edmNew::DetSetVector::const_iterator DSViter = input.begin(); + // allocate a buffer for the indices of the clusters + auto hmsp = std::make_unique(gpuClustering::maxNumModules + 1); + // hitsModuleStart is a non-owning pointer to the buffer + auto hitsModuleStart = hmsp.get(); + // fill cluster arrays + std::array clusInModule{}; + for (auto const& dsv : input) { + unsigned int detid = dsv.detId(); + DetId detIdObject(detid); + const GeomDetUnit* genericDet = geom.idToDetUnit(detIdObject); + auto gind = genericDet->index(); + // FIXME to be changed to support Phase2 + if (gind >= int(gpuClustering::maxNumModules)) + continue; + auto const nclus = dsv.size(); + assert(nclus > 0); + clusInModule[gind] = nclus; + numberOfClusters += nclus; + } + hitsModuleStart[0] = 0; + assert(clusInModule.size() > gpuClustering::maxNumModules); + for (int i = 1, n = clusInModule.size(); i < n; ++i) + hitsModuleStart[i] = hitsModuleStart[i - 1] + clusInModule[i - 1]; + assert(numberOfClusters == int(hitsModuleStart[gpuClustering::maxNumModules])); + + // wrap the buffer in a HostProduct, and move it to the Event, without reallocating the buffer or affecting hitsModuleStart + iEvent.emplace(tHost_, std::move(hmsp)); - for (; DSViter != input.end(); DSViter++) { + numberOfClusters = 0; + for (auto const& dsv : input) { numberOfDetUnits++; - unsigned int detid = DSViter->detId(); + unsigned int detid = dsv.detId(); DetId detIdObject(detid); const GeomDetUnit* genericDet = geom.idToDetUnit(detIdObject); const PixelGeomDetUnit* pixDet = dynamic_cast(genericDet); assert(pixDet); SiPixelRecHitCollectionNew::FastFiller recHitsOnDetUnit(output, detid); - edmNew::DetSet::const_iterator clustIt = DSViter->begin(), clustEnd = DSViter->end(); + edmNew::DetSet::const_iterator clustIt = dsv.begin(), clustEnd = dsv.end(); for (; clustIt != clustEnd; clustIt++) { numberOfClusters++; diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromCUDA.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromCUDA.cc new file mode 100644 index 0000000000000..790b0da51ecfb --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromCUDA.cc @@ -0,0 +1,187 @@ +#include + +#include + +#include "CUDADataFormats/Common/interface/HostProduct.h" +#include "CUDADataFormats/Common/interface/Product.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" +#include "DataFormats/Common/interface/DetSetVectorNew.h" +#include "DataFormats/Common/interface/Handle.h" +#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" +#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" + +class SiPixelRecHitFromCUDA : public edm::stream::EDProducer { +public: + explicit SiPixelRecHitFromCUDA(const edm::ParameterSet& iConfig); + ~SiPixelRecHitFromCUDA() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + using HMSstorage = HostProduct; + +private: + void acquire(edm::Event const& iEvent, + edm::EventSetup const& iSetup, + edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; + void produce(edm::Event& iEvent, edm::EventSetup const& iSetup) override; + + const edm::ESGetToken geomToken_; + const edm::EDGetTokenT> hitsToken_; // CUDA hits + const edm::EDGetTokenT clusterToken_; // legacy clusters + const edm::EDPutTokenT rechitsPutToken_; // legacy rechits + const edm::EDPutTokenT hostPutToken_; + + uint32_t nHits_; + cms::cuda::host::unique_ptr store32_; + cms::cuda::host::unique_ptr hitsModuleStart_; +}; + +SiPixelRecHitFromCUDA::SiPixelRecHitFromCUDA(const edm::ParameterSet& iConfig) + : geomToken_(esConsumes()), + hitsToken_( + consumes>(iConfig.getParameter("pixelRecHitSrc"))), + clusterToken_(consumes(iConfig.getParameter("src"))), + rechitsPutToken_(produces()), + hostPutToken_(produces()) {} + +void SiPixelRecHitFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("pixelRecHitSrc", edm::InputTag("siPixelRecHitsPreSplittingCUDA")); + desc.add("src", edm::InputTag("siPixelClustersPreSplitting")); + descriptions.addWithDefaultLabel(desc); +} + +void SiPixelRecHitFromCUDA::acquire(edm::Event const& iEvent, + edm::EventSetup const& iSetup, + edm::WaitingTaskWithArenaHolder waitingTaskHolder) { + cms::cuda::Product const& inputDataWrapped = iEvent.get(hitsToken_); + cms::cuda::ScopedContextAcquire ctx{inputDataWrapped, std::move(waitingTaskHolder)}; + auto const& inputData = ctx.get(inputDataWrapped); + + nHits_ = inputData.nHits(); + + LogDebug("SiPixelRecHitFromCUDA") << "converting " << nHits_ << " Hits"; + + if (0 == nHits_) + return; + store32_ = inputData.localCoordToHostAsync(ctx.stream()); + hitsModuleStart_ = inputData.hitsModuleStartToHostAsync(ctx.stream()); +} + +void SiPixelRecHitFromCUDA::produce(edm::Event& iEvent, edm::EventSetup const& es) { + // allocate a buffer for the indices of the clusters + auto hmsp = std::make_unique(gpuClustering::maxNumModules + 1); + std::copy(hitsModuleStart_.get(), hitsModuleStart_.get() + gpuClustering::maxNumModules + 1, hmsp.get()); + // wrap the buffer in a HostProduct, and move it to the Event, without reallocating the buffer or affecting hitsModuleStart + iEvent.emplace(hostPutToken_, std::move(hmsp)); + + SiPixelRecHitCollection output; + if (0 == nHits_) { + iEvent.emplace(rechitsPutToken_, std::move(output)); + return; + } + + auto xl = store32_.get(); + auto yl = xl + nHits_; + auto xe = yl + nHits_; + auto ye = xe + nHits_; + + const TrackerGeometry* geom = &es.getData(geomToken_); + + edm::Handle hclusters = iEvent.getHandle(clusterToken_); + auto const& input = *hclusters; + + constexpr uint32_t maxHitsInModule = gpuClustering::maxHitsInModule(); + + int numberOfDetUnits = 0; + int numberOfClusters = 0; + for (auto const& dsv : input) { + numberOfDetUnits++; + unsigned int detid = dsv.detId(); + DetId detIdObject(detid); + const GeomDetUnit* genericDet = geom->idToDetUnit(detIdObject); + auto gind = genericDet->index(); + const PixelGeomDetUnit* pixDet = dynamic_cast(genericDet); + assert(pixDet); + SiPixelRecHitCollection::FastFiller recHitsOnDetUnit(output, detid); + auto fc = hitsModuleStart_[gind]; + auto lc = hitsModuleStart_[gind + 1]; + auto nhits = lc - fc; + + assert(lc > fc); + LogDebug("SiPixelRecHitFromCUDA") << "in det " << gind << ": conv " << nhits << " hits from " << dsv.size() + << " legacy clusters" << ' ' << fc << ',' << lc; + if (nhits > maxHitsInModule) + edm::LogWarning("SiPixelRecHitFromCUDA") << fmt::sprintf( + "Too many clusters %d in module %d. Only the first %d hits will be converted", nhits, gind, maxHitsInModule); + nhits = std::min(nhits, maxHitsInModule); + + LogDebug("SiPixelRecHitFromCUDA") << "in det " << gind << "conv " << nhits << " hits from " << dsv.size() + << " legacy clusters" << ' ' << lc << ',' << fc; + + if (0 == nhits) + continue; + auto jnd = [&](int k) { return fc + k; }; + assert(nhits <= dsv.size()); + if (nhits != dsv.size()) { + edm::LogWarning("GPUHits2CPU") << "nhits!= nclus " << nhits << ' ' << dsv.size(); + } + for (auto const& clust : dsv) { + assert(clust.originalId() >= 0); + assert(clust.originalId() < dsv.size()); + if (clust.originalId() >= nhits) + continue; + auto ij = jnd(clust.originalId()); + if (ij >= TrackingRecHit2DSOAView::maxHits()) + continue; // overflow... + LocalPoint lp(xl[ij], yl[ij]); + LocalError le(xe[ij], 0, ye[ij]); + SiPixelRecHitQuality::QualWordType rqw = 0; + + numberOfClusters++; + + /* cpu version.... (for reference) + std::tuple tuple = cpe_->getParameters( clust, *genericDet ); + LocalPoint lp( std::get<0>(tuple) ); + LocalError le( std::get<1>(tuple) ); + SiPixelRecHitQuality::QualWordType rqw( std::get<2>(tuple) ); + */ + + // Create a persistent edm::Ref to the cluster + edm::Ref, SiPixelCluster> cluster = edmNew::makeRefTo(hclusters, &clust); + // Make a RecHit and add it to the DetSet + SiPixelRecHit hit(lp, le, rqw, *genericDet, cluster); + // + // Now save it ================= + recHitsOnDetUnit.push_back(hit); + // ============================= + + LogDebug("SiPixelRecHitFromCUDA") << "cluster " << numberOfClusters << " at " << lp << ' ' << le; + + } // <-- End loop on Clusters + + // LogDebug("SiPixelRecHitGPU") + LogDebug("SiPixelRecHitFromCUDA") << "found " << recHitsOnDetUnit.size() << " RecHits on " << detid; + + } // <-- End loop on DetUnits + + LogDebug("SiPixelRecHitFromCUDA") << "found " << numberOfDetUnits << " dets, " << numberOfClusters << " clusters"; + + iEvent.emplace(rechitsPutToken_, std::move(output)); +} + +DEFINE_FWK_MODULE(SiPixelRecHitFromCUDA); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc new file mode 100644 index 0000000000000..0d95370f25e47 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc @@ -0,0 +1,252 @@ +#include + +#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" +#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" +#include "CUDADataFormats/Common/interface/HostProduct.h" +#include "DataFormats/BeamSpot/interface/BeamSpot.h" +#include "DataFormats/Common/interface/DetSetVectorNew.h" +#include "DataFormats/Common/interface/Handle.h" +#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" +#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" + +#include "gpuPixelRecHits.h" + +class SiPixelRecHitSoAFromLegacy : public edm::global::EDProducer<> { +public: + explicit SiPixelRecHitSoAFromLegacy(const edm::ParameterSet& iConfig); + ~SiPixelRecHitSoAFromLegacy() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + using HitModuleStart = std::array; + using HMSstorage = HostProduct; + +private: + void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + + const edm::ESGetToken geomToken_; + const edm::ESGetToken cpeToken_; + const edm::EDGetTokenT bsGetToken_; + const edm::EDGetTokenT clusterToken_; // Legacy Clusters + const edm::EDPutTokenT tokenHit_; + const edm::EDPutTokenT tokenModuleStart_; + const bool convert2Legacy_; +}; + +SiPixelRecHitSoAFromLegacy::SiPixelRecHitSoAFromLegacy(const edm::ParameterSet& iConfig) + : geomToken_(esConsumes()), + cpeToken_(esConsumes(edm::ESInputTag("", iConfig.getParameter("CPE")))), + bsGetToken_{consumes(iConfig.getParameter("beamSpot"))}, + clusterToken_{consumes(iConfig.getParameter("src"))}, + tokenHit_{produces()}, + tokenModuleStart_{produces()}, + convert2Legacy_(iConfig.getParameter("convertToLegacy")) { + if (convert2Legacy_) + produces(); +} + +void SiPixelRecHitSoAFromLegacy::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("beamSpot", edm::InputTag("offlineBeamSpot")); + desc.add("src", edm::InputTag("siPixelClustersPreSplitting")); + desc.add("CPE", "PixelCPEFast"); + desc.add("convertToLegacy", false); + descriptions.addWithDefaultLabel(desc); +} + +void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& es) const { + const TrackerGeometry* geom_ = &es.getData(geomToken_); + PixelCPEFast const* fcpe = dynamic_cast(&es.getData(cpeToken_)); + if (not fcpe) { + throw cms::Exception("Configuration") << "SiPixelRecHitSoAFromLegacy can only use a CPE of type PixelCPEFast"; + } + auto const& cpeView = fcpe->getCPUProduct(); + + const reco::BeamSpot& bs = iEvent.get(bsGetToken_); + + BeamSpotPOD bsHost; + bsHost.x = bs.x0(); + bsHost.y = bs.y0(); + bsHost.z = bs.z0(); + + edm::Handle hclusters; + iEvent.getByToken(clusterToken_, hclusters); + auto const& input = *hclusters; + + // allocate a buffer for the indices of the clusters + auto hmsp = std::make_unique(gpuClustering::maxNumModules + 1); + // hitsModuleStart is a non-owning pointer to the buffer + auto hitsModuleStart = hmsp.get(); + // wrap the buffer in a HostProduct + auto hms = std::make_unique(std::move(hmsp)); + // move the HostProduct to the Event, without reallocating the buffer or affecting hitsModuleStart + iEvent.put(tokenModuleStart_, std::move(hms)); + + // legacy output + auto legacyOutput = std::make_unique(); + + // storage + std::vector xx; + std::vector yy; + std::vector adc; + std::vector moduleInd; + std::vector clus; + + std::vector, SiPixelCluster>> clusterRef; + + constexpr uint32_t maxHitsInModule = gpuClustering::maxHitsInModule(); + + HitModuleStart moduleStart_; // index of the first pixel of each module + HitModuleStart clusInModule_; + memset(&clusInModule_, 0, sizeof(HitModuleStart)); // needed?? + assert(gpuClustering::maxNumModules + 1 == clusInModule_.size()); + assert(0 == clusInModule_[gpuClustering::maxNumModules]); + uint32_t moduleId_; + moduleStart_[1] = 0; // we run sequentially.... + + SiPixelClustersCUDA::DeviceConstView clusterView{ + moduleStart_.data(), clusInModule_.data(), &moduleId_, hitsModuleStart}; + + // fill cluster arrays + int numberOfClusters = 0; + for (auto const& dsv : input) { + unsigned int detid = dsv.detId(); + DetId detIdObject(detid); + const GeomDetUnit* genericDet = geom_->idToDetUnit(detIdObject); + auto gind = genericDet->index(); + assert(gind < gpuClustering::maxNumModules); + auto const nclus = dsv.size(); + clusInModule_[gind] = nclus; + numberOfClusters += nclus; + } + hitsModuleStart[0] = 0; + for (int i = 1, n = clusInModule_.size(); i < n; ++i) + hitsModuleStart[i] = hitsModuleStart[i - 1] + clusInModule_[i - 1]; + assert(numberOfClusters == int(hitsModuleStart[gpuClustering::maxNumModules])); + + // output SoA + auto output = std::make_unique(numberOfClusters, &cpeView, hitsModuleStart, nullptr); + + if (0 == numberOfClusters) { + iEvent.put(std::move(output)); + if (convert2Legacy_) + iEvent.put(std::move(legacyOutput)); + return; + } + + if (convert2Legacy_) + legacyOutput->reserve(gpuClustering::maxNumModules, numberOfClusters); + + int numberOfDetUnits = 0; + int numberOfHits = 0; + for (auto const& dsv : input) { + numberOfDetUnits++; + unsigned int detid = dsv.detId(); + DetId detIdObject(detid); + const GeomDetUnit* genericDet = geom_->idToDetUnit(detIdObject); + auto const gind = genericDet->index(); + assert(gind < gpuClustering::maxNumModules); + const PixelGeomDetUnit* pixDet = dynamic_cast(genericDet); + assert(pixDet); + auto const nclus = dsv.size(); + assert(clusInModule_[gind] == nclus); + if (0 == nclus) + continue; // is this really possible? + + auto const fc = hitsModuleStart[gind]; + auto const lc = hitsModuleStart[gind + 1]; + assert(lc > fc); + LogDebug("SiPixelRecHitSoAFromLegacy") << "in det " << gind << ": conv " << nclus << " hits from " << dsv.size() + << " legacy clusters" << ' ' << fc << ',' << lc; + assert((lc - fc) == nclus); + if (nclus > maxHitsInModule) + printf( + "WARNING: too many clusters %d in Module %d. Only first %d Hits converted\n", nclus, gind, maxHitsInModule); + + // fill digis + xx.clear(); + yy.clear(); + adc.clear(); + moduleInd.clear(); + clus.clear(); + clusterRef.clear(); + moduleId_ = gind; + uint32_t ic = 0; + uint32_t ndigi = 0; + for (auto const& clust : dsv) { + assert(clust.size() > 0); + for (int i = 0, nd = clust.size(); i < nd; ++i) { + auto px = clust.pixel(i); + xx.push_back(px.x); + yy.push_back(px.y); + adc.push_back(px.adc); + moduleInd.push_back(gind); + clus.push_back(ic); + ++ndigi; + } + assert(clust.originalId() == ic); // make sure hits and clus are in sync + if (convert2Legacy_) + clusterRef.emplace_back(edmNew::makeRefTo(hclusters, &clust)); + ic++; + } + assert(nclus == ic); + assert(clus.size() == ndigi); + numberOfHits += nclus; + // filled creates view + SiPixelDigisCUDA::DeviceConstView digiView{xx.data(), yy.data(), adc.data(), moduleInd.data(), clus.data()}; + assert(digiView.adc(0) != 0); + // we run on blockId.x==0 + gpuPixelRecHits::getHits(&cpeView, &bsHost, &digiView, ndigi, &clusterView, output->view()); + for (auto h = fc; h < lc; ++h) + if (h - fc < maxHitsInModule) + assert(gind == output->view()->detectorIndex(h)); + else + assert(gpuClustering::invalidModuleId == output->view()->detectorIndex(h)); + if (convert2Legacy_) { + SiPixelRecHitCollectionNew::FastFiller recHitsOnDetUnit(*legacyOutput, detid); + for (auto h = fc; h < lc; ++h) { + auto ih = h - fc; + if (ih >= maxHitsInModule) + break; + assert(ih < clusterRef.size()); + LocalPoint lp(output->view()->xLocal(h), output->view()->yLocal(h)); + LocalError le(output->view()->xerrLocal(h), 0, output->view()->yerrLocal(h)); + SiPixelRecHitQuality::QualWordType rqw = 0; + SiPixelRecHit hit(lp, le, rqw, *genericDet, clusterRef[ih]); + recHitsOnDetUnit.push_back(hit); + } + } + } + assert(numberOfHits == numberOfClusters); + + // fill data structure to support CA + for (auto i = 0; i < 11; ++i) { + output->hitsLayerStart()[i] = hitsModuleStart[cpeView.layerGeometry().layerStart[i]]; + } + cms::cuda::fillManyFromVector( + output->phiBinner(), 10, output->iphi(), output->hitsLayerStart(), numberOfHits, 256, nullptr); + + LogDebug("SiPixelRecHitSoAFromLegacy") << "created HitSoa for " << numberOfClusters << " clusters in " + << numberOfDetUnits << " Dets"; + iEvent.put(std::move(output)); + if (convert2Legacy_) + iEvent.put(std::move(legacyOutput)); +} + +DEFINE_FWK_MODULE(SiPixelRecHitSoAFromLegacy); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h new file mode 100644 index 0000000000000..2401fed6c5171 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -0,0 +1,212 @@ +#ifndef RecoLocalTracker_SiPixelRecHits_plugins_gpuPixelRecHits_h +#define RecoLocalTracker_SiPixelRecHits_plugins_gpuPixelRecHits_h + +#include +#include +#include + +#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" +#include "DataFormats/Math/interface/approx_atan2.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" + +namespace gpuPixelRecHits { + + __global__ void getHits(pixelCPEforGPU::ParamsOnGPU const* __restrict__ cpeParams, + BeamSpotPOD const* __restrict__ bs, + SiPixelDigisCUDA::DeviceConstView const* __restrict__ pdigis, + int numElements, + SiPixelClustersCUDA::DeviceConstView const* __restrict__ pclusters, + TrackingRecHit2DSOAView* phits) { + // FIXME + // the compiler seems NOT to optimize loads from views (even in a simple test case) + // The whole gimnastic here of copying or not is a pure heuristic exercise that seems to produce the fastest code with the above signature + // not using views (passing a gazzilion of array pointers) seems to produce the fastest code (but it is harder to mantain) + + assert(phits); + assert(cpeParams); + + auto& hits = *phits; + + auto const digis = *pdigis; // the copy is intentional! + auto const& clusters = *pclusters; + + // copy average geometry corrected by beamspot . FIXME (move it somewhere else???) + if (0 == blockIdx.x) { + auto& agc = hits.averageGeometry(); + auto const& ag = cpeParams->averageGeometry(); + for (int il = threadIdx.x, nl = TrackingRecHit2DSOAView::AverageGeometry::numberOfLaddersInBarrel; il < nl; + il += blockDim.x) { + agc.ladderZ[il] = ag.ladderZ[il] - bs->z; + agc.ladderX[il] = ag.ladderX[il] - bs->x; + agc.ladderY[il] = ag.ladderY[il] - bs->y; + agc.ladderR[il] = sqrt(agc.ladderX[il] * agc.ladderX[il] + agc.ladderY[il] * agc.ladderY[il]); + agc.ladderMinZ[il] = ag.ladderMinZ[il] - bs->z; + agc.ladderMaxZ[il] = ag.ladderMaxZ[il] - bs->z; + } + if (0 == threadIdx.x) { + agc.endCapZ[0] = ag.endCapZ[0] - bs->z; + agc.endCapZ[1] = ag.endCapZ[1] - bs->z; + // printf("endcapZ %f %f\n",agc.endCapZ[0],agc.endCapZ[1]); + } + } + + // to be moved in common namespace... + using gpuClustering::invalidModuleId; + constexpr int32_t MaxHitsInIter = pixelCPEforGPU::MaxHitsInIter; + + using ClusParams = pixelCPEforGPU::ClusParams; + + // as usual one block per module + __shared__ ClusParams clusParams; + + auto me = clusters.moduleId(blockIdx.x); + int nclus = clusters.clusInModule(me); + + if (0 == nclus) + return; + +#ifdef GPU_DEBUG + if (threadIdx.x == 0) { + auto k = clusters.moduleStart(1 + blockIdx.x); + while (digis.moduleInd(k) == invalidModuleId) + ++k; + assert(digis.moduleInd(k) == me); + } +#endif + +#ifdef GPU_DEBUG + if (me % 100 == 1) + if (threadIdx.x == 0) + printf("hitbuilder: %d clusters in module %d. will write at %d\n", nclus, me, clusters.clusModuleStart(me)); +#endif + + for (int startClus = 0, endClus = nclus; startClus < endClus; startClus += MaxHitsInIter) { + int nClusInIter = std::min(MaxHitsInIter, endClus - startClus); + int lastClus = startClus + nClusInIter; + assert(nClusInIter <= nclus); + assert(nClusInIter > 0); + assert(lastClus <= nclus); + + assert(nclus > MaxHitsInIter || (0 == startClus && nClusInIter == nclus && lastClus == nclus)); + + // init + for (int ic = threadIdx.x; ic < nClusInIter; ic += blockDim.x) { + clusParams.minRow[ic] = std::numeric_limits::max(); + clusParams.maxRow[ic] = 0; + clusParams.minCol[ic] = std::numeric_limits::max(); + clusParams.maxCol[ic] = 0; + clusParams.charge[ic] = 0; + clusParams.q_f_X[ic] = 0; + clusParams.q_l_X[ic] = 0; + clusParams.q_f_Y[ic] = 0; + clusParams.q_l_Y[ic] = 0; + } + + __syncthreads(); + + // one thread per "digi" + auto first = clusters.moduleStart(1 + blockIdx.x) + threadIdx.x; + for (int i = first; i < numElements; i += blockDim.x) { + auto id = digis.moduleInd(i); + if (id == invalidModuleId) + continue; // not valid + if (id != me) + break; // end of module + auto cl = digis.clus(i); + if (cl < startClus || cl >= lastClus) + continue; + cl -= startClus; + assert(cl >= 0); + assert(cl < MaxHitsInIter); + auto x = digis.xx(i); + auto y = digis.yy(i); + atomicMin(&clusParams.minRow[cl], x); + atomicMax(&clusParams.maxRow[cl], x); + atomicMin(&clusParams.minCol[cl], y); + atomicMax(&clusParams.maxCol[cl], y); + } + + __syncthreads(); + + auto pixmx = cpeParams->detParams(me).pixmx; + for (int i = first; i < numElements; i += blockDim.x) { + auto id = digis.moduleInd(i); + if (id == invalidModuleId) + continue; // not valid + if (id != me) + break; // end of module + auto cl = digis.clus(i); + if (cl < startClus || cl >= lastClus) + continue; + cl -= startClus; + assert(cl >= 0); + assert(cl < MaxHitsInIter); + auto x = digis.xx(i); + auto y = digis.yy(i); + auto ch = std::min(digis.adc(i), pixmx); + atomicAdd(&clusParams.charge[cl], ch); + if (clusParams.minRow[cl] == x) + atomicAdd(&clusParams.q_f_X[cl], ch); + if (clusParams.maxRow[cl] == x) + atomicAdd(&clusParams.q_l_X[cl], ch); + if (clusParams.minCol[cl] == y) + atomicAdd(&clusParams.q_f_Y[cl], ch); + if (clusParams.maxCol[cl] == y) + atomicAdd(&clusParams.q_l_Y[cl], ch); + } + + __syncthreads(); + + // next one cluster per thread... + + first = clusters.clusModuleStart(me) + startClus; + for (int ic = threadIdx.x; ic < nClusInIter; ic += blockDim.x) { + auto h = first + ic; // output index in global memory + + assert(h < TrackingRecHit2DSOAView::maxHits()); + assert(h < hits.nHits()); + assert(h < clusters.clusModuleStart(me + 1)); + + pixelCPEforGPU::position(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); + pixelCPEforGPU::errorFromDB(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); + + // store it + hits.charge(h) = clusParams.charge[ic]; + hits.detectorIndex(h) = me; + + float xl, yl; + hits.xLocal(h) = xl = clusParams.xpos[ic]; + hits.yLocal(h) = yl = clusParams.ypos[ic]; + + hits.clusterSizeX(h) = clusParams.xsize[ic]; + hits.clusterSizeY(h) = clusParams.ysize[ic]; + + hits.xerrLocal(h) = clusParams.xerr[ic] * clusParams.xerr[ic]; + hits.yerrLocal(h) = clusParams.yerr[ic] * clusParams.yerr[ic]; + + // keep it local for computations + float xg, yg, zg; + // to global and compute phi... + cpeParams->detParams(me).frame.toGlobal(xl, yl, xg, yg, zg); + // here correct for the beamspot... + xg -= bs->x; + yg -= bs->y; + zg -= bs->z; + + hits.xGlobal(h) = xg; + hits.yGlobal(h) = yg; + hits.zGlobal(h) = zg; + + hits.rGlobal(h) = std::sqrt(xg * xg + yg * yg); + hits.iphi(h) = unsafe_atan2s<7>(yg, xg); + } + __syncthreads(); + } // end loop on batches + } + +} // namespace gpuPixelRecHits + +#endif // RecoLocalTracker_SiPixelRecHits_plugins_gpuPixelRecHits_h diff --git a/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py b/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py index e349a515c69b3..e3879f4d9d34c 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py +++ b/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py @@ -10,6 +10,7 @@ # 2. Pixel Generic CPE # from RecoLocalTracker.SiPixelRecHits.PixelCPEGeneric_cfi import * +from RecoLocalTracker.SiPixelRecHits.PixelCPEFastESProducer_cfi import * # # 3. ESProducer for the Magnetic-field dependent template records # diff --git a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py index 465aa0bb346ce..eb9dbad4934cd 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py +++ b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py @@ -6,6 +6,39 @@ VerboseLevel = cms.untracked.int32(0) ) -siPixelRecHitsPreSplitting = siPixelRecHits.clone( +_siPixelRecHitsPreSplitting = siPixelRecHits.clone( src = 'siPixelClustersPreSplitting' ) + +from HeterogeneousCore.CUDACore.SwitchProducerCUDA import SwitchProducerCUDA +siPixelRecHitsPreSplitting = SwitchProducerCUDA( + cpu = _siPixelRecHitsPreSplitting.clone() +) + + + +from Configuration.ProcessModifiers.gpu_cff import gpu +from RecoLocalTracker.SiPixelRecHits.siPixelRecHitCUDA_cfi import siPixelRecHitCUDA as _siPixelRecHitCUDA +from RecoLocalTracker.SiPixelRecHits.siPixelRecHitFromCUDA_cfi import siPixelRecHitFromCUDA as _siPixelRecHitFromCUDA + +gpu.toModify(siPixelRecHitsPreSplitting, + cuda = _siPixelRecHitFromCUDA.clone() +) + + +siPixelRecHitsPreSplittingTask = cms.Task(siPixelRecHitsPreSplitting) + +siPixelRecHitsPreSplittingCUDA = _siPixelRecHitCUDA.clone( + beamSpot = "offlineBeamSpotToCUDA" +) + +siPixelRecHitsPreSplittingLegacy = _siPixelRecHitFromCUDA.clone() +siPixelRecHitsPreSplittingTaskCUDA = cms.Task( + siPixelRecHitsPreSplittingCUDA, + siPixelRecHitsPreSplittingLegacy, +) + +from Configuration.ProcessModifiers.gpu_cff import gpu +_siPixelRecHitsPreSplittingTask_gpu = siPixelRecHitsPreSplittingTask.copy() +_siPixelRecHitsPreSplittingTask_gpu.add(siPixelRecHitsPreSplittingTaskCUDA) +gpu.toReplaceWith(siPixelRecHitsPreSplittingTask, _siPixelRecHitsPreSplittingTask_gpu) diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc new file mode 100644 index 0000000000000..0077c0748ca28 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -0,0 +1,561 @@ +#include + +#include "CondFormats/SiPixelTransient/interface/SiPixelTemplate.h" +#include "DataFormats/DetId/interface/DetId.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" +#include "Geometry/TrackerGeometryBuilder/interface/RectangularPixelTopology.h" +#include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "MagneticField/Engine/interface/MagneticField.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" + +// Services +// this is needed to get errors from templates + +namespace { + constexpr float micronsToCm = 1.0e-4; +} + +//----------------------------------------------------------------------------- +//! The constructor. +//----------------------------------------------------------------------------- +PixelCPEFast::PixelCPEFast(edm::ParameterSet const& conf, + const MagneticField* mag, + const TrackerGeometry& geom, + const TrackerTopology& ttopo, + const SiPixelLorentzAngle* lorentzAngle, + const SiPixelGenErrorDBObject* genErrorDBObject, + const SiPixelLorentzAngle* lorentzAngleWidth) + : PixelCPEBase(conf, mag, geom, ttopo, lorentzAngle, genErrorDBObject, nullptr, lorentzAngleWidth, 0), + edgeClusterErrorX_(conf.getParameter("EdgeClusterErrorX")), + edgeClusterErrorY_(conf.getParameter("EdgeClusterErrorY")), + useErrorsFromTemplates_(conf.getParameter("UseErrorsFromTemplates")), + truncatePixelCharge_(conf.getParameter("TruncatePixelCharge")) { + // Use errors from templates or from GenError + if (useErrorsFromTemplates_) { + if (!SiPixelGenError::pushfile(*genErrorDBObject_, thePixelGenError_)) + throw cms::Exception("InvalidCalibrationLoaded") + << "ERROR: GenErrors not filled correctly. Check the sqlite file. Using SiPixelTemplateDBObject version " + << (*genErrorDBObject_).version(); + } + + // Rechit errors in case other, more correct, errors are not vailable + // These are constants. Maybe there is a more efficienct way to store them. + xerr_barrel_l1_ = {0.00115, 0.00120, 0.00088}; + xerr_barrel_l1_def_ = 0.01030; + yerr_barrel_l1_ = {0.00375, 0.00230, 0.00250, 0.00250, 0.00230, 0.00230, 0.00210, 0.00210, 0.00240}; + yerr_barrel_l1_def_ = 0.00210; + xerr_barrel_ln_ = {0.00115, 0.00120, 0.00088}; + xerr_barrel_ln_def_ = 0.01030; + yerr_barrel_ln_ = {0.00375, 0.00230, 0.00250, 0.00250, 0.00230, 0.00230, 0.00210, 0.00210, 0.00240}; + yerr_barrel_ln_def_ = 0.00210; + xerr_endcap_ = {0.0020, 0.0020}; + xerr_endcap_def_ = 0.0020; + yerr_endcap_ = {0.00210}; + yerr_endcap_def_ = 0.00075; + + fillParamsForGpu(); + + cpuData_ = { + &commonParamsGPU_, + detParamsGPU_.data(), + &layerGeometry_, + &averageGeometry_, + }; +} + +const pixelCPEforGPU::ParamsOnGPU* PixelCPEFast::getGPUProductAsync(cudaStream_t cudaStream) const { + const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cudaStream_t stream) { + // and now copy to device... + cudaCheck(cudaMalloc((void**)&data.paramsOnGPU_h.m_commonParams, sizeof(pixelCPEforGPU::CommonParams))); + cudaCheck(cudaMalloc((void**)&data.paramsOnGPU_h.m_detParams, + this->detParamsGPU_.size() * sizeof(pixelCPEforGPU::DetParams))); + cudaCheck(cudaMalloc((void**)&data.paramsOnGPU_h.m_averageGeometry, sizeof(pixelCPEforGPU::AverageGeometry))); + cudaCheck(cudaMalloc((void**)&data.paramsOnGPU_h.m_layerGeometry, sizeof(pixelCPEforGPU::LayerGeometry))); + cudaCheck(cudaMalloc((void**)&data.paramsOnGPU_d, sizeof(pixelCPEforGPU::ParamsOnGPU))); + + cudaCheck(cudaMemcpyAsync( + data.paramsOnGPU_d, &data.paramsOnGPU_h, sizeof(pixelCPEforGPU::ParamsOnGPU), cudaMemcpyDefault, stream)); + cudaCheck(cudaMemcpyAsync((void*)data.paramsOnGPU_h.m_commonParams, + &this->commonParamsGPU_, + sizeof(pixelCPEforGPU::CommonParams), + cudaMemcpyDefault, + stream)); + cudaCheck(cudaMemcpyAsync((void*)data.paramsOnGPU_h.m_averageGeometry, + &this->averageGeometry_, + sizeof(pixelCPEforGPU::AverageGeometry), + cudaMemcpyDefault, + stream)); + cudaCheck(cudaMemcpyAsync((void*)data.paramsOnGPU_h.m_layerGeometry, + &this->layerGeometry_, + sizeof(pixelCPEforGPU::LayerGeometry), + cudaMemcpyDefault, + stream)); + cudaCheck(cudaMemcpyAsync((void*)data.paramsOnGPU_h.m_detParams, + this->detParamsGPU_.data(), + this->detParamsGPU_.size() * sizeof(pixelCPEforGPU::DetParams), + cudaMemcpyDefault, + stream)); + }); + return data.paramsOnGPU_d; +} + +void PixelCPEFast::fillParamsForGpu() { + commonParamsGPU_.theThicknessB = m_DetParams.front().theThickness; + commonParamsGPU_.theThicknessE = m_DetParams.back().theThickness; + commonParamsGPU_.thePitchX = m_DetParams[0].thePitchX; + commonParamsGPU_.thePitchY = m_DetParams[0].thePitchY; + + LogDebug("PixelCPEFast") << "pitch & thickness " << commonParamsGPU_.thePitchX << ' ' << commonParamsGPU_.thePitchY + << " " << commonParamsGPU_.theThicknessB << ' ' << commonParamsGPU_.theThicknessE; + + // zero average geometry + memset(&averageGeometry_, 0, sizeof(pixelCPEforGPU::AverageGeometry)); + + uint32_t oldLayer = 0; + uint32_t oldLadder = 0; + float rl = 0; + float zl = 0; + float miz = 90, mxz = 0; + float pl = 0; + int nl = 0; + detParamsGPU_.resize(m_DetParams.size()); + for (auto i = 0U; i < m_DetParams.size(); ++i) { + auto& p = m_DetParams[i]; + auto& g = detParamsGPU_[i]; + + assert(p.theDet->index() == int(i)); + assert(commonParamsGPU_.thePitchY == p.thePitchY); + assert(commonParamsGPU_.thePitchX == p.thePitchX); + + g.isBarrel = GeomDetEnumerators::isBarrel(p.thePart); + g.isPosZ = p.theDet->surface().position().z() > 0; + g.layer = ttopo_.layer(p.theDet->geographicalId()); + g.index = i; // better be! + g.rawId = p.theDet->geographicalId(); + assert((g.isBarrel ? commonParamsGPU_.theThicknessB : commonParamsGPU_.theThicknessE) == p.theThickness); + + auto ladder = ttopo_.pxbLadder(p.theDet->geographicalId()); + if (oldLayer != g.layer) { + oldLayer = g.layer; + LogDebug("PixelCPEFast") << "new layer at " << i << (g.isBarrel ? " B " : (g.isPosZ ? " E+ " : " E- ")) + << g.layer << " starting at " << g.rawId << '\n' + << "old layer had " << nl << " ladders"; + nl = 0; + } + if (oldLadder != ladder) { + oldLadder = ladder; + LogDebug("PixelCPEFast") << "new ladder at " << i << (g.isBarrel ? " B " : (g.isPosZ ? " E+ " : " E- ")) + << ladder << " starting at " << g.rawId << '\n' + << "old ladder ave z,r,p mz " << zl / 8.f << " " << rl / 8.f << " " << pl / 8.f << ' ' + << miz << ' ' << mxz; + rl = 0; + zl = 0; + pl = 0; + miz = 90; + mxz = 0; + nl++; + } + + g.shiftX = 0.5f * p.lorentzShiftInCmX; + g.shiftY = 0.5f * p.lorentzShiftInCmY; + g.chargeWidthX = p.lorentzShiftInCmX * p.widthLAFractionX; + g.chargeWidthY = p.lorentzShiftInCmY * p.widthLAFractionY; + + g.x0 = p.theOrigin.x(); + g.y0 = p.theOrigin.y(); + g.z0 = p.theOrigin.z(); + + auto vv = p.theDet->surface().position(); + auto rr = pixelCPEforGPU::Rotation(p.theDet->surface().rotation()); + g.frame = pixelCPEforGPU::Frame(vv.x(), vv.y(), vv.z(), rr); + + zl += vv.z(); + miz = std::min(miz, std::abs(vv.z())); + mxz = std::max(mxz, std::abs(vv.z())); + rl += vv.perp(); + pl += vv.phi(); // (not obvious) + + // errors ..... + ClusterParamGeneric cp; + auto gvx = p.theOrigin.x() + 40.f * commonParamsGPU_.thePitchX; + auto gvy = p.theOrigin.y(); + auto gvz = 1.f / p.theOrigin.z(); + //--- Note that the normalization is not required as only the ratio used + + // calculate angles + cp.cotalpha = gvx * gvz; + cp.cotbeta = gvy * gvz; + + cp.with_track_angle = false; + + auto lape = p.theDet->localAlignmentError(); + if (lape.invalid()) + lape = LocalError(); // zero.... + +#ifdef EDM_ML_DEBUG + auto m = 10000.f; + for (float qclus = 15000; qclus < 35000; qclus += 15000) { + errorFromTemplates(p, cp, qclus); + LogDebug("PixelCPEFast") << i << ' ' << qclus << ' ' << cp.pixmx << ' ' << m * cp.sigmax << ' ' << m * cp.sx1 + << ' ' << m * cp.sx2 << ' ' << m * cp.sigmay << ' ' << m * cp.sy1 << ' ' << m * cp.sy2; + } + LogDebug("PixelCPEFast") << i << ' ' << m * std::sqrt(lape.xx()) << ' ' << m * std::sqrt(lape.yy()); +#endif // EDM_ML_DEBUG + + errorFromTemplates(p, cp, 20000.f); + g.pixmx = std::max(0, cp.pixmx); + g.sx[0] = cp.sigmax; + g.sx[1] = cp.sx1; + g.sx[2] = cp.sx2; + + g.sy[0] = cp.sigmay; + g.sy[1] = cp.sy1; + g.sy[2] = cp.sy2; + + for (int i = 0; i < 3; ++i) { + g.sx[i] = std::sqrt(g.sx[i] * g.sx[i] + lape.xx()); + g.sy[i] = std::sqrt(g.sy[i] * g.sy[i] + lape.yy()); + } + } + + // compute ladder baricenter (only in global z) for the barrel + auto& aveGeom = averageGeometry_; + int il = 0; + for (int im = 0, nm = phase1PixelTopology::numberOfModulesInBarrel; im < nm; ++im) { + auto const& g = detParamsGPU_[im]; + il = im / 8; + assert(il < int(phase1PixelTopology::numberOfLaddersInBarrel)); + auto z = g.frame.z(); + aveGeom.ladderZ[il] += 0.125f * z; + aveGeom.ladderMinZ[il] = std::min(aveGeom.ladderMinZ[il], z); + aveGeom.ladderMaxZ[il] = std::max(aveGeom.ladderMaxZ[il], z); + aveGeom.ladderX[il] += 0.125f * g.frame.x(); + aveGeom.ladderY[il] += 0.125f * g.frame.y(); + aveGeom.ladderR[il] += 0.125f * sqrt(g.frame.x() * g.frame.x() + g.frame.y() * g.frame.y()); + } + assert(il + 1 == int(phase1PixelTopology::numberOfLaddersInBarrel)); + // add half_module and tollerance + constexpr float module_length = 6.7f; + constexpr float module_tolerance = 0.2f; + for (int il = 0, nl = phase1PixelTopology::numberOfLaddersInBarrel; il < nl; ++il) { + aveGeom.ladderMinZ[il] -= (0.5f * module_length - module_tolerance); + aveGeom.ladderMaxZ[il] += (0.5f * module_length - module_tolerance); + } + + // compute "max z" for first layer in endcap (should we restrict to the outermost ring?) + for (auto im = phase1PixelTopology::layerStart[4]; im < phase1PixelTopology::layerStart[5]; ++im) { + auto const& g = detParamsGPU_[im]; + aveGeom.endCapZ[0] = std::max(aveGeom.endCapZ[0], g.frame.z()); + } + for (auto im = phase1PixelTopology::layerStart[7]; im < phase1PixelTopology::layerStart[8]; ++im) { + auto const& g = detParamsGPU_[im]; + aveGeom.endCapZ[1] = std::min(aveGeom.endCapZ[1], g.frame.z()); + } + // correct for outer ring being closer + aveGeom.endCapZ[0] -= 1.5f; + aveGeom.endCapZ[1] += 1.5f; + +#ifdef EDM_ML_DEBUG + for (int jl = 0, nl = phase1PixelTopology::numberOfLaddersInBarrel; jl < nl; ++jl) { + LogDebug("PixelCPEFast") << jl << ':' << aveGeom.ladderR[jl] << '/' + << std::sqrt(aveGeom.ladderX[jl] * aveGeom.ladderX[jl] + + aveGeom.ladderY[jl] * aveGeom.ladderY[jl]) + << ',' << aveGeom.ladderZ[jl] << ',' << aveGeom.ladderMinZ[jl] << ',' + << aveGeom.ladderMaxZ[jl] << '\n'; + } + LogDebug("PixelCPEFast") << aveGeom.endCapZ[0] << ' ' << aveGeom.endCapZ[1]; +#endif // EDM_ML_DEBUG + + // fill Layer and ladders geometry + memcpy(layerGeometry_.layerStart, phase1PixelTopology::layerStart, sizeof(phase1PixelTopology::layerStart)); + memcpy(layerGeometry_.layer, phase1PixelTopology::layer.data(), phase1PixelTopology::layer.size()); +} + +PixelCPEFast::GPUData::~GPUData() { + if (paramsOnGPU_d != nullptr) { + cudaFree((void*)paramsOnGPU_h.m_commonParams); + cudaFree((void*)paramsOnGPU_h.m_detParams); + cudaFree((void*)paramsOnGPU_h.m_averageGeometry); + cudaFree((void*)paramsOnGPU_h.m_layerGeometry); + cudaFree(paramsOnGPU_d); + } +} + +std::unique_ptr PixelCPEFast::createClusterParam(const SiPixelCluster& cl) const { + return std::make_unique(cl); +} + +void PixelCPEFast::errorFromTemplates(DetParam const& theDetParam, + ClusterParamGeneric& theClusterParam, + float qclus) const { + float locBz = theDetParam.bz; + float locBx = theDetParam.bx; + LogDebug("PixelCPEFast") << "PixelCPEFast::localPosition(...) : locBz = " << locBz; + + theClusterParam.pixmx = std::numeric_limits::max(); // max pixel charge for truncation of 2-D cluster + + theClusterParam.sigmay = -999.9; // CPE Generic y-error for multi-pixel cluster + theClusterParam.sigmax = -999.9; // CPE Generic x-error for multi-pixel cluster + theClusterParam.sy1 = -999.9; // CPE Generic y-error for single single-pixel + theClusterParam.sy2 = -999.9; // CPE Generic y-error for single double-pixel cluster + theClusterParam.sx1 = -999.9; // CPE Generic x-error for single single-pixel cluster + theClusterParam.sx2 = -999.9; // CPE Generic x-error for single double-pixel cluster + + float dummy; + + SiPixelGenError gtempl(thePixelGenError_); + int gtemplID = theDetParam.detTemplateId; + + theClusterParam.qBin_ = gtempl.qbin(gtemplID, + theClusterParam.cotalpha, + theClusterParam.cotbeta, + locBz, + locBx, + qclus, + false, + theClusterParam.pixmx, + theClusterParam.sigmay, + dummy, + theClusterParam.sigmax, + dummy, + theClusterParam.sy1, + dummy, + theClusterParam.sy2, + dummy, + theClusterParam.sx1, + dummy, + theClusterParam.sx2, + dummy); + + theClusterParam.sigmax = theClusterParam.sigmax * micronsToCm; + theClusterParam.sx1 = theClusterParam.sx1 * micronsToCm; + theClusterParam.sx2 = theClusterParam.sx2 * micronsToCm; + + theClusterParam.sigmay = theClusterParam.sigmay * micronsToCm; + theClusterParam.sy1 = theClusterParam.sy1 * micronsToCm; + theClusterParam.sy2 = theClusterParam.sy2 * micronsToCm; +} + +//----------------------------------------------------------------------------- +//! Hit position in the local frame (in cm). Unlike other CPE's, this +//! one converts everything from the measurement frame (in channel numbers) +//! into the local frame (in centimeters). +//----------------------------------------------------------------------------- +LocalPoint PixelCPEFast::localPosition(DetParam const& theDetParam, ClusterParam& theClusterParamBase) const { + ClusterParamGeneric& theClusterParam = static_cast(theClusterParamBase); + + assert(!theClusterParam.with_track_angle); + + if (useErrorsFromTemplates_) { + errorFromTemplates(theDetParam, theClusterParam, theClusterParam.theCluster->charge()); + } else { + theClusterParam.qBin_ = 0; + } + + int q_f_X; //!< Q of the first pixel in X + int q_l_X; //!< Q of the last pixel in X + int q_f_Y; //!< Q of the first pixel in Y + int q_l_Y; //!< Q of the last pixel in Y + collect_edge_charges(theClusterParam, q_f_X, q_l_X, q_f_Y, q_l_Y, useErrorsFromTemplates_ && truncatePixelCharge_); + + // do GPU like ... + pixelCPEforGPU::ClusParams cp; + + cp.minRow[0] = theClusterParam.theCluster->minPixelRow(); + cp.maxRow[0] = theClusterParam.theCluster->maxPixelRow(); + cp.minCol[0] = theClusterParam.theCluster->minPixelCol(); + cp.maxCol[0] = theClusterParam.theCluster->maxPixelCol(); + + cp.q_f_X[0] = q_f_X; + cp.q_l_X[0] = q_l_X; + cp.q_f_Y[0] = q_f_Y; + cp.q_l_Y[0] = q_l_Y; + + auto ind = theDetParam.theDet->index(); + pixelCPEforGPU::position(commonParamsGPU_, detParamsGPU_[ind], cp, 0); + auto xPos = cp.xpos[0]; + auto yPos = cp.ypos[0]; + + LogDebug("PixelCPEFast") << " in PixelCPEFast:localPosition - pos = " << xPos << " " << yPos << " size " + << cp.maxRow[0] - cp.minRow[0] << ' ' << cp.maxCol[0] - cp.minCol[0]; + + //--- Now put the two together + LocalPoint pos_in_local(xPos, yPos); + return pos_in_local; +} + +//----------------------------------------------------------------------------- +//! Collect the edge charges in x and y, in a single pass over the pixel vector. +//! Calculate charge in the first and last pixel projected in x and y +//! and the inner cluster charge, projected in x and y. +//----------------------------------------------------------------------------- +void PixelCPEFast::collect_edge_charges(ClusterParam& theClusterParamBase, //!< input, the cluster + int& q_f_X, //!< output, Q first in X + int& q_l_X, //!< output, Q last in X + int& q_f_Y, //!< output, Q first in Y + int& q_l_Y, //!< output, Q last in Y + bool truncate) { + ClusterParamGeneric& theClusterParam = static_cast(theClusterParamBase); + + // Initialize return variables. + q_f_X = q_l_X = 0; + q_f_Y = q_l_Y = 0; + + // Obtain boundaries in index units + int xmin = theClusterParam.theCluster->minPixelRow(); + int xmax = theClusterParam.theCluster->maxPixelRow(); + int ymin = theClusterParam.theCluster->minPixelCol(); + int ymax = theClusterParam.theCluster->maxPixelCol(); + + // Iterate over the pixels. + int isize = theClusterParam.theCluster->size(); + for (int i = 0; i != isize; ++i) { + auto const& pixel = theClusterParam.theCluster->pixel(i); + // ggiurgiu@fnal.gov: add pixel charge truncation + int pix_adc = pixel.adc; + if (truncate) + pix_adc = std::min(pix_adc, theClusterParam.pixmx); + + // + // X projection + if (pixel.x == xmin) + q_f_X += pix_adc; + if (pixel.x == xmax) + q_l_X += pix_adc; + // + // Y projection + if (pixel.y == ymin) + q_f_Y += pix_adc; + if (pixel.y == ymax) + q_l_Y += pix_adc; + } +} + +//============== INFLATED ERROR AND ERRORS FROM DB BELOW ================ + +//------------------------------------------------------------------------- +// Hit error in the local frame +//------------------------------------------------------------------------- +LocalError PixelCPEFast::localError(DetParam const& theDetParam, ClusterParam& theClusterParamBase) const { + ClusterParamGeneric& theClusterParam = static_cast(theClusterParamBase); + + // Default errors are the maximum error used for edge clusters. + // These are determined by looking at residuals for edge clusters + float xerr = edgeClusterErrorX_ * micronsToCm; + float yerr = edgeClusterErrorY_ * micronsToCm; + + // Find if cluster is at the module edge. + int maxPixelCol = theClusterParam.theCluster->maxPixelCol(); + int maxPixelRow = theClusterParam.theCluster->maxPixelRow(); + int minPixelCol = theClusterParam.theCluster->minPixelCol(); + int minPixelRow = theClusterParam.theCluster->minPixelRow(); + + bool edgex = phase1PixelTopology::isEdgeX(minPixelRow) | phase1PixelTopology::isEdgeX(maxPixelRow); + bool edgey = phase1PixelTopology::isEdgeY(minPixelCol) | phase1PixelTopology::isEdgeY(maxPixelCol); + + unsigned int sizex = theClusterParam.theCluster->sizeX(); + unsigned int sizey = theClusterParam.theCluster->sizeY(); + + // Find if cluster contains double (big) pixels. + bool bigInX = theDetParam.theRecTopol->containsBigPixelInX(minPixelRow, maxPixelRow); + bool bigInY = theDetParam.theRecTopol->containsBigPixelInY(minPixelCol, maxPixelCol); + + if (useErrorsFromTemplates_) { + // + // Use template errors + + if (!edgex) { // Only use this for non-edge clusters + if (sizex == 1) { + if (!bigInX) { + xerr = theClusterParam.sx1; + } else { + xerr = theClusterParam.sx2; + } + } else { + xerr = theClusterParam.sigmax; + } + } + + if (!edgey) { // Only use for non-edge clusters + if (sizey == 1) { + if (!bigInY) { + yerr = theClusterParam.sy1; + } else { + yerr = theClusterParam.sy2; + } + } else { + yerr = theClusterParam.sigmay; + } + } + + } else { // simple errors + + // This are the simple errors, hardcoded in the code + LogDebug("PixelCPEFast") << "Track angles are not known.\n" + << "Default angle estimation which assumes track from PV (0,0,0) does not work."; + + if (GeomDetEnumerators::isTrackerPixel(theDetParam.thePart)) { + if (GeomDetEnumerators::isBarrel(theDetParam.thePart)) { + DetId id = (theDetParam.theDet->geographicalId()); + int layer = ttopo_.layer(id); + if (layer == 1) { + if (!edgex) { + if (sizex <= xerr_barrel_l1_.size()) + xerr = xerr_barrel_l1_[sizex - 1]; + else + xerr = xerr_barrel_l1_def_; + } + + if (!edgey) { + if (sizey <= yerr_barrel_l1_.size()) + yerr = yerr_barrel_l1_[sizey - 1]; + else + yerr = yerr_barrel_l1_def_; + } + } else { // layer 2,3 + if (!edgex) { + if (sizex <= xerr_barrel_ln_.size()) + xerr = xerr_barrel_ln_[sizex - 1]; + else + xerr = xerr_barrel_ln_def_; + } + + if (!edgey) { + if (sizey <= yerr_barrel_ln_.size()) + yerr = yerr_barrel_ln_[sizey - 1]; + else + yerr = yerr_barrel_ln_def_; + } + } + + } else { // EndCap + + if (!edgex) { + if (sizex <= xerr_endcap_.size()) + xerr = xerr_endcap_[sizex - 1]; + else + xerr = xerr_endcap_def_; + } + + if (!edgey) { + if (sizey <= yerr_endcap_.size()) + yerr = yerr_endcap_[sizey - 1]; + else + yerr = yerr_endcap_def_; + } + } // end endcap + } + + } // end + + LogDebug("PixelCPEFast") << " errors " << xerr << " " << yerr; + + auto xerr_sq = xerr * xerr; + auto yerr_sq = yerr * yerr; + + return LocalError(xerr_sq, 0, yerr_sq); +} + +void PixelCPEFast::fillPSetDescription(edm::ParameterSetDescription& desc) {} diff --git a/Validation/Configuration/python/globalValidation_cff.py b/Validation/Configuration/python/globalValidation_cff.py index 0a98acf89ad69..b43f8ee22e3a4 100644 --- a/Validation/Configuration/python/globalValidation_cff.py +++ b/Validation/Configuration/python/globalValidation_cff.py @@ -219,8 +219,13 @@ _phase_1_globalValidation = globalValidation.copy() _phase_1_globalValidation += siPixelPhase1OfflineDQM_sourceV + +_phase_1_globalValidationPixelTrackingOnly = globalValidationPixelTrackingOnly.copy() +_phase_1_globalValidationPixelTrackingOnly += siPixelPhase1ValidationPixelTrackingOnly_sourceV + from Configuration.Eras.Modifier_phase1Pixel_cff import phase1Pixel (phase1Pixel & ~fastSim).toReplaceWith( globalValidation, _phase_1_globalValidation ) #module siPixelPhase1OfflineDQM_sourceV can't run in FastSim since siPixelClusters of type edmNew::DetSetVector are not produced +(phase1Pixel & ~fastSim).toReplaceWith( globalValidationPixelTrackingOnly, _phase_1_globalValidationPixelTrackingOnly ) #module siPixelPhase1OfflineDQM_sourceV can't run in FastSim since siPixelClusters of type edmNew::DetSetVector are not produced _run3_globalValidation = globalValidation.copy() _run3_globalValidation += gemSimValid diff --git a/Validation/Configuration/python/postValidation_cff.py b/Validation/Configuration/python/postValidation_cff.py index 8f310eacfce2e..8468943e81b04 100644 --- a/Validation/Configuration/python/postValidation_cff.py +++ b/Validation/Configuration/python/postValidation_cff.py @@ -115,8 +115,13 @@ _phase1_postValidation = postValidation.copy() _phase1_postValidation += siPixelPhase1OfflineDQM_harvestingV + +_phase1_postValidation_trackingOnly = postValidation_trackingOnly.copy() +_phase1_postValidation_trackingOnly += siPixelPhase1OfflineDQM_harvestingV + from Configuration.Eras.Modifier_phase1Pixel_cff import phase1Pixel phase1Pixel.toReplaceWith( postValidation, _phase1_postValidation ) +phase1Pixel.toReplaceWith( postValidation_trackingOnly, _phase1_postValidation_trackingOnly) _run3_postValidation = postValidation.copy() _run3_postValidation += MuonGEMHitsPostProcessors diff --git a/Validation/SiPixelPhase1ConfigV/python/SiPixelPhase1OfflineDQM_sourceV_cff.py b/Validation/SiPixelPhase1ConfigV/python/SiPixelPhase1OfflineDQM_sourceV_cff.py index 1a5692bf6677b..d0d98251ea9d4 100644 --- a/Validation/SiPixelPhase1ConfigV/python/SiPixelPhase1OfflineDQM_sourceV_cff.py +++ b/Validation/SiPixelPhase1ConfigV/python/SiPixelPhase1OfflineDQM_sourceV_cff.py @@ -20,3 +20,38 @@ + SiPixelPhase1TrackingParticleAnalyzerV ) +### Pixel Tracking-only configurations for the GPU workflow + +# Pixel digis +pixelOnlyDigisAnalyzerV = SiPixelPhase1DigisAnalyzerV.clone() + +# Pixel clusters +pixelOnlyTrackClustersAnalyzerV = SiPixelPhase1TrackClustersAnalyzerV.clone( + clusters = 'siPixelClustersPreSplitting', + tracks = 'pixelTracks' +) + +# Pixel rechit analyzer +pixelOnlyRecHitsAnalyzerV = SiPixelPhase1RecHitsAnalyzerV.clone( + src = 'siPixelRecHitsPreSplitting', + pixelSimLinkSrc = 'simSiPixelDigis', + ROUList = ('TrackerHitsPixelBarrelLowTof', + 'TrackerHitsPixelBarrelHighTof', + 'TrackerHitsPixelEndcapLowTof', + 'TrackerHitsPixelEndcapHighTof') +) + +# Pixel hits +pixelOnlyHitsAnalyzerV = SiPixelPhase1HitsAnalyzerV.clone( + tracksTag = 'pixelTracks' +) + +# Tracking particles +pixelOnlyTrackingParticleAnalyzerV = SiPixelPhase1TrackingParticleAnalyzerV.clone() + +siPixelPhase1ValidationPixelTrackingOnly_sourceV = cms.Sequence(pixelOnlyDigisAnalyzerV + + pixelOnlyTrackClustersAnalyzerV + + pixelOnlyHitsAnalyzerV + + pixelOnlyRecHitsAnalyzerV + + pixelOnlyTrackingParticleAnalyzerV +)