From 0b559ec2ee5abfd2284536f9f5f8f2df31d4ce96 Mon Sep 17 00:00:00 2001 From: Michele Cancilla Date: Fri, 12 Mar 2021 17:59:10 +0100 Subject: [PATCH 01/38] Add MakeGrid function for visualizing/saving EDDL tensors --- CMakeLists.txt | 1 + examples/example_ecvl_eddl.cpp | 12 +++++-- modules/eddl/include/ecvl/support_eddl.h | 12 +++++++ modules/eddl/src/support_eddl.cpp | 41 ++++++++++++++++++++++++ 4 files changed, 64 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 899c6327..57af48e6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,6 +22,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/") set_property(GLOBAL PROPERTY USE_FOLDERS ON) set_property(GLOBAL PROPERTY PREDEFINED_TARGETS_FOLDER "") set(CMAKE_POSITION_INDEPENDENT_CODE ON) # To always generate position independent code +set(CMAKE_VERBOSE_MAKEFILE ON) if (WIN32) set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) endif() diff --git a/examples/example_ecvl_eddl.cpp b/examples/example_ecvl_eddl.cpp index 3eba25de..8fe2c466 100644 --- a/examples/example_ecvl_eddl.cpp +++ b/examples/example_ecvl_eddl.cpp @@ -92,11 +92,15 @@ int main() AugAdditiveLaplaceNoise({ 0, 0.2 * 255 }), AugCoarseDropout({ 0, 0.55 }, { 0.02,0.1 }, 0), AugAdditivePoissonNoise({ 0, 40 }), - AugResizeDim({ 30, 30 }) + AugResizeDim({ 30, 30 }), + AugToFloat32(255), + AugNormalize({ 0.449 }, { 0.226 }) // mean of imagenet stats ); auto test_augs = make_shared( - AugResizeDim({ 30, 30 }) + AugResizeDim({ 30, 30 }), + AugToFloat32(255), + AugNormalize({ 0.449 }, { 0.226 }) // mean of imagenet stats ); DatasetAugmentations dataset_augmentations{ {training_augs, nullptr, test_augs } }; @@ -128,6 +132,10 @@ int main() d.SetSplit(SplitType::test); d.LoadBatch(x, y); + // Save some input images + ImWrite("mnist_batch.png", MakeGrid(x, 8, false)); + ImWrite("mnist_batch_normalized.png", MakeGrid(x, 8, true)); + delete x; delete y; diff --git a/modules/eddl/include/ecvl/support_eddl.h b/modules/eddl/include/ecvl/support_eddl.h index bd717473..7ebc1019 100644 --- a/modules/eddl/include/ecvl/support_eddl.h +++ b/modules/eddl/include/ecvl/support_eddl.h @@ -215,6 +215,18 @@ Image must have 3 dimensions "xy[czo]" (in any order). \n */ void ImageToTensor(const Image& img, tensor& t, const int& offset); +/** @brief Make a grid of images from a EDDL Tensor. + +Return a grid of Image from a EDDL Tensor. + +@param[in] img Input EDDL Tensor of shape (B x C x H x W). +@param[in] cols Number of images displayed in each row of the grid. +@param[in] normalize If true, shift the image to the range [0,1]. + +@return Image taht contains the grid of images +*/ +Image MakeGrid(const tensor& t, int cols = 8, bool normalize = false); + /** @example example_ecvl_eddl.cpp Example of using ECVL with EDDL. */ diff --git a/modules/eddl/src/support_eddl.cpp b/modules/eddl/src/support_eddl.cpp index 882cdc1d..e4e0d0af 100644 --- a/modules/eddl/src/support_eddl.cpp +++ b/modules/eddl/src/support_eddl.cpp @@ -318,4 +318,45 @@ void DLDataset::LoadBatch(tensor& images) ++offset; } } + +Image MakeGrid(const tensor& t, int cols, bool normalize) +{ + const auto batch_size = t->shape[0]; + cols = std::min(batch_size, cols); + const auto rows = static_cast(std::ceil(static_cast(batch_size) / cols)); + + Image image_t; + vector vimages; + for (int r = 0, b = 0; r < rows; ++r) { + vector himages; + for (int c = 0; c < cols; ++c) { + tensor tensor_t; + if (b < batch_size) { + tensor_t = t->select({ to_string(b) }); + TensorToImage(tensor_t, image_t); + if (normalize) { + ScaleTo(image_t, image_t, 0, 1); + } + image_t.Mul(255.); + image_t.channels_ = "xyc"; + image_t.ConvertTo(DataType::uint8); + delete tensor_t; + } + else { + image_t = Image({ t->shape[3],t->shape[2],t->shape[1] }, DataType::uint8, "xyc", ColorType::none); + image_t.SetTo(0); + } + himages.push_back(image_t); + ++b; + } + if (himages.size() > 1) { + HConcat(himages, image_t); + } + vimages.push_back(image_t); + } + if (vimages.size() > 1) { + VConcat(vimages, image_t); + } + return image_t; +} } // namespace ecvl \ No newline at end of file From 31ba25cd92b5c8390fc2bf9d05230869e61274cd Mon Sep 17 00:00:00 2001 From: Laura Canalini Date: Fri, 23 Apr 2021 13:16:46 +0200 Subject: [PATCH 02/38] Add ImRead functions which can decode an Image from memory buffer --- modules/core/include/ecvl/core/imgcodecs.h | 31 ++++++++++++++++++++-- modules/core/src/imgcodecs.cpp | 15 +++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/modules/core/include/ecvl/core/imgcodecs.h b/modules/core/include/ecvl/core/imgcodecs.h index 68f4a19b..2768b82e 100644 --- a/modules/core/include/ecvl/core/imgcodecs.h +++ b/modules/core/include/ecvl/core/imgcodecs.h @@ -27,7 +27,7 @@ namespace ecvl */ enum class ImReadMode { - //IMREAD_UNCHANGED = -1, //!< If set, return the loaded image as is (with alpha channel, otherwise it gets cropped). + UNCHANGED = -1, //!< If set, return the loaded image as is (with alpha channel, otherwise it gets cropped). GRAYSCALE = 0, //!< If set, always convert image to the single channel grayscale image (codec internal conversion). COLOR = 1, //!< If set, always convert image to the 3 channel BGR color image. //IMREAD_ANYDEPTH = 2, //!< If set, return 16-bit/32-bit image when the input has the corresponding depth, otherwise convert it to 8-bit. @@ -43,12 +43,39 @@ be read for any reason, the function creates an empty Image and returns false. @param[in] filename A std::filesystem::path identifying the file name. @param[out] dst Image in which data will be stored. -@param[in] flags An ImReadMode indicating how to read the image. +@param[in] flags \ref ImReadMode indicating how to read the image. @return true if the image is correctly read, false otherwise. */ bool ImRead(const ecvl::filesystem::path& filename, Image& dst, ImReadMode flags = ImReadMode::ANYCOLOR); +/** +@brief Loads an image from a buffer in memory. This is an overloaded function, provided for convenience. + +The buffer must be a raw encoded image (png, jpg). +If the image cannot be read for any reason, the function creates an empty Image and returns false. + +@param[in] buffer A char* identifying the input buffer. +@param[in] size Dimension of the input buffer. +@param[out] dst Image in which data will be stored. +@param[in] flags \ref ImReadMode indicating how to read the image. + +@return true if the image is correctly read, false otherwise. +*/ +bool ImRead(const char* buffer, const int size, Image& dst, ImReadMode flags = ImReadMode::ANYCOLOR); + +/** @brief Loads an image from a buffer in memory. This is an overloaded function, provided for convenience. + +It differs from the above function only because it accepts a std::vector instead of a char*. + +@param[in] buffer A std::vector identifying the input buffer. +@param[out] dst Image in which data will be stored. +@param[in] flags \ref ImReadMode indicating how to read the image. + +@return true if the image is correctly read, false otherwise. +*/ +bool ImRead(const std::vector& buffer, Image& dst, ImReadMode flags = ImReadMode::ANYCOLOR); + /** @brief Loads a multi-page image from a file. The function ImReadMulti loads a multi-page image from the specified file. If the image cannot diff --git a/modules/core/src/imgcodecs.cpp b/modules/core/src/imgcodecs.cpp index 4d8f7c2a..5d1e92f7 100644 --- a/modules/core/src/imgcodecs.cpp +++ b/modules/core/src/imgcodecs.cpp @@ -52,6 +52,21 @@ bool ImRead(const path& filename, Image& dst, ImReadMode flags) } } +bool ImRead(const std::vector& buffer, Image& dst, ImReadMode flags) +{ + cv::InputArray ia(buffer); + dst = MatToImage(cv::imdecode(ia, (int)flags)); + + // TODO: Nifti and Dicom version? + return !dst.IsEmpty(); +} + +bool ImRead(const char* buffer, const int size, Image& dst, ImReadMode flags) +{ + const std::vector buf(buffer, buffer + size); + return ImRead(buf, dst, flags); +} + bool ImReadMulti(const path& filename, Image& dst) { std::vector v; From f4ffaf48cdbfe3897fd8e4516ddd47635299d691 Mon Sep 17 00:00:00 2001 From: Laura Canalini Date: Fri, 23 Apr 2021 13:46:23 +0200 Subject: [PATCH 03/38] Change `tensor` entries to `Tensor*` --- modules/eddl/include/ecvl/support_eddl.h | 2 +- modules/eddl/src/support_eddl.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/eddl/include/ecvl/support_eddl.h b/modules/eddl/include/ecvl/support_eddl.h index 84666df3..59d7bd40 100644 --- a/modules/eddl/include/ecvl/support_eddl.h +++ b/modules/eddl/include/ecvl/support_eddl.h @@ -224,7 +224,7 @@ Return a grid of Image from a EDDL Tensor. @return Image taht contains the grid of images */ -Image MakeGrid(const tensor& t, int cols = 8, bool normalize = false); +Image MakeGrid(Tensor*& t, int cols = 8, bool normalize = false); /** @example example_ecvl_eddl.cpp Example of using ECVL with EDDL. diff --git a/modules/eddl/src/support_eddl.cpp b/modules/eddl/src/support_eddl.cpp index e3b1ccac..cbbbd886 100644 --- a/modules/eddl/src/support_eddl.cpp +++ b/modules/eddl/src/support_eddl.cpp @@ -319,7 +319,7 @@ void DLDataset::LoadBatch(Tensor*& images) } } -Image MakeGrid(const tensor& t, int cols, bool normalize) +Image MakeGrid(Tensor*& t, int cols, bool normalize) { const auto batch_size = t->shape[0]; cols = std::min(batch_size, cols); @@ -330,7 +330,7 @@ Image MakeGrid(const tensor& t, int cols, bool normalize) for (int r = 0, b = 0; r < rows; ++r) { vector himages; for (int c = 0; c < cols; ++c) { - tensor tensor_t; + Tensor* tensor_t; if (b < batch_size) { tensor_t = t->select({ to_string(b) }); TensorToImage(tensor_t, image_t); From 6b4a47ca54427b2f4dd365ec5663a5abf4c77b18 Mon Sep 17 00:00:00 2001 From: Laura Canalini Date: Thu, 29 Apr 2021 18:14:31 +0200 Subject: [PATCH 04/38] Add the chance to have more than three splits, not necessarily named training, validation and test --- examples/example_dataset_generator.cpp | 13 +-- examples/example_ecvl_eddl.cpp | 4 +- .../dataset/include/ecvl/dataset_generator.h | 7 +- modules/dataset/include/ecvl/dataset_parser.h | 97 ++++++++--------- modules/dataset/src/dataset_generator.cpp | 16 +-- modules/dataset/src/dataset_parser.cpp | 88 +++++++++++---- modules/eddl/include/ecvl/support_eddl.h | 100 ++++++++---------- modules/eddl/src/support_eddl.cpp | 40 ++----- 8 files changed, 181 insertions(+), 184 deletions(-) diff --git a/examples/example_dataset_generator.cpp b/examples/example_dataset_generator.cpp index b42acb09..57d4a5be 100644 --- a/examples/example_dataset_generator.cpp +++ b/examples/example_dataset_generator.cpp @@ -45,17 +45,18 @@ int main() vector mask; vector black; - for (auto& index : d_segmentation.split_.training_) { - if (d_segmentation.samples_[index].label_path_.value().filename().compare("black.png") == 0) { - black.emplace_back(index); + auto& training = d_segmentation.GetSplit("training"); + for (auto& sample_index : training) { + if (d_segmentation.samples_[sample_index].label_path_.value().filename().compare("black.png") == 0) { + black.emplace_back(sample_index); } else { - mask.emplace_back(index); + mask.emplace_back(sample_index); } } - d_segmentation.split_.training_.clear(); - d_segmentation.split_.training_.insert(d_segmentation.split_.training_.end(), mask.begin(), mask.end()); + training.clear(); + training.insert(training.end(), mask.begin(), mask.end()); // Dump the Dataset on file d_segmentation.Dump(dateset_root_folder_segmentation / path(dateset_root_folder_segmentation.stem().string() + ".yml")); diff --git a/examples/example_ecvl_eddl.cpp b/examples/example_ecvl_eddl.cpp index 0b584498..33e9f4ae 100644 --- a/examples/example_ecvl_eddl.cpp +++ b/examples/example_ecvl_eddl.cpp @@ -13,7 +13,6 @@ #include #include -#include #include "ecvl/core.h" #include "ecvl/support_eddl.h" @@ -103,7 +102,8 @@ int main() AugNormalize({ 0.449 }, { 0.226 }) // mean of imagenet stats ); - DatasetAugmentations dataset_augmentations{ {training_augs, nullptr, test_augs } }; + // DatasetAugmentations dataset_augmentations{ {training_augs, nullptr, test_augs } }; // OLD version: nullptr are no more required + DatasetAugmentations dataset_augmentations{ {training_augs, test_augs } }; int batch_size = 64; cout << "Creating a DLDataset" << endl; diff --git a/modules/dataset/include/ecvl/dataset_generator.h b/modules/dataset/include/ecvl/dataset_generator.h index bef52168..72e85166 100644 --- a/modules/dataset/include/ecvl/dataset_generator.h +++ b/modules/dataset/include/ecvl/dataset_generator.h @@ -39,11 +39,8 @@ class GenerateDataset dataset_root_directory_(dataset_root_directory) { for (auto& p : filesystem::directory_iterator(dataset_root_directory_)) { - std::string tmp = p.path().stem().string(); - - // Check if split folders exist - if (tmp == "training" || tmp == "validation" || tmp == "test") { - splits_.emplace_back(tmp); + if (filesystem::is_directory(p)) { + splits_.emplace_back(p.path().stem().string()); } } num_samples_.resize(splits_.size()); diff --git a/modules/dataset/include/ecvl/dataset_parser.h b/modules/dataset/include/ecvl/dataset_parser.h index 770b333e..3215d01b 100644 --- a/modules/dataset/include/ecvl/dataset_parser.h +++ b/modules/dataset/include/ecvl/dataset_parser.h @@ -63,20 +63,6 @@ class Sample ecvl::Image LoadImage(ecvl::ColorType ctype = ecvl::ColorType::BGR, const bool& is_gt = false); }; -/** @brief Splits of a dataset. - -This class provides the splits a dataset can have: training, validation, and test. - -@anchor Split -*/ -class Split -{ -public: - std::vector training_; /**< @brief Vector containing samples of training split. */ - std::vector validation_; /**< @brief Vector containing samples of validation split. */ - std::vector test_; /**< @brief Vector containing samples of test split. */ -}; - /** @brief DeepHealth Dataset. This class implements the DeepHealth Dataset Format (https://github.com/deephealthproject/ecvl/wiki/DeepHealth-Toolkit-Dataset-Format). @@ -91,7 +77,8 @@ class Dataset std::vector classes_; /**< @brief Vector with all the classes available in the Dataset. */ std::vector features_; /**< @brief Vector with all the features available in the Dataset. */ std::vector samples_; /**< @brief Vector containing all the Dataset samples. See @ref Sample. */ - Split split_; /**< @brief Splits of the Dataset. See @ref Split. */ + std::vector>> split_; /**< @brief Splits of the Dataset. */ + int current_split_ = -1; /**< @brief Current split from which images are loaded. */ Dataset() {} @@ -101,6 +88,44 @@ class Dataset */ Dataset(const filesystem::path& filename, bool verify = false); + /** @brief Returns the image indexes of the current split. + @return vector of image indexes of the split in use. + */ + std::vector& GetSplit(); + + /** @brief Returns the image indexes of the requested split. + @param[in] split ecvl::SplitType representing the split to get ("training", "validation", or "test"). + @return vector of image indexes of the requested split. + */ + std::vector& GetSplit(const SplitType& split); + + /** @brief Returns the image indexes of the requested split. + @param[in] split string representing the split to get. + @return vector of image indexes of the requested split. + */ + std::vector& GetSplit(const std::string& split); + + /** @brief Returns the image indexes of the requested split. + @param[in] split int representing the index of the split to get. + @return vector of image indexes of the requested split. + */ + std::vector& GetSplit(const int& split); + + /** @brief Set the current split. + @param[in] split ecvl::SplitType representing the split to set ("training", "validation", or "test"). + */ + void SetSplit(const SplitType& split); + + /** @brief Set the current split. + @param[in] split string representing the split to set. + */ + void SetSplit(const std::string& split); + + /** @brief Set the current split. + @param[in] split int representing the index of the split to set. + */ + void SetSplit(const int& split); + /** @brief Dump the Dataset into a YAML file following the DeepHealth Dataset Format. The YAML file is saved into the dataset root directory. @@ -118,40 +143,16 @@ class Dataset void DecodeImages(const YAML::Node& node, const filesystem::path& root_path, bool verify); void FindLabel(Sample& sample, const YAML::Node& n); }; -} // namespace ecvl -/** @cond HIDDEN_SECTION */ -namespace YAML -{ -/** - Enable YAML decoding of Split. - Hidden from docs. +/** @brief Convert @ref SplitType in string. + +Useful for backward compatibility. + +@param[in] split SplitType to convert + +@return string that represent the provided SplitType */ -template<> -struct convert -{ - /*static Node encode(const ecvl::Split& rhs) - { - Node node; - node.push_back(rhs.x); - return node; - }*/ - - static bool decode(const YAML::Node& node, ecvl::Split& rhs) - { - if (node["training"].IsDefined()) { - rhs.training_ = node["training"].as>(); - } - if (node["validation"].IsDefined()) { - rhs.validation_ = node["validation"].as>(); - } - if (node["test"].IsDefined()) { - rhs.test_ = node["test"].as>(); - } - return true; - } -}; -} // namespace YAML -/** @endcond */ +const std::string SplitTypeToString(const SplitType& split); +} // namespace ecvl #endif // ECVL_DATASET_PARSER_H_ \ No newline at end of file diff --git a/modules/dataset/src/dataset_generator.cpp b/modules/dataset/src/dataset_generator.cpp index 764897a8..60bd5ea5 100644 --- a/modules/dataset/src/dataset_generator.cpp +++ b/modules/dataset/src/dataset_generator.cpp @@ -144,19 +144,11 @@ void GenerateDataset::LoadImagesAndSplits() } // load indexes of images for each split + d_.split_.resize(splits_.size()); for (int i = 0; i < splits_.size(); ++i) { - if (splits_[i] == "training") { - d_.split_.training_.resize(num_samples_[i]); - iota(d_.split_.training_.begin(), d_.split_.training_.end(), img_index); - } - else if (splits_[i] == "validation") { - d_.split_.validation_.resize(num_samples_[i]); - iota(d_.split_.validation_.begin(), d_.split_.validation_.end(), img_index); - } - else if (splits_[i] == "test") { - d_.split_.test_.resize(num_samples_[i]); - iota(d_.split_.test_.begin(), d_.split_.test_.end(), img_index); - } + d_.split_[i].first = splits_[i]; + d_.split_[i].second.resize(num_samples_[i]); + iota(d_.split_[i].second.begin(), d_.split_[i].second.end(), img_index); img_index += num_samples_[i]; } } diff --git a/modules/dataset/src/dataset_parser.cpp b/modules/dataset/src/dataset_parser.cpp index 49035187..29044392 100644 --- a/modules/dataset/src/dataset_parser.cpp +++ b/modules/dataset/src/dataset_parser.cpp @@ -225,26 +225,13 @@ void Dataset::Dump(const path& file_path) } } - if (split_.training_.size() > 0 || split_.validation_.size() > 0 || split_.test_.size() > 0) { + if (split_.size() > 0) { os << "split:" << endl; - } - - if (split_.training_.size() > 0) { - os << tab + "training:" << endl; - for (auto& i : split_.training_) { - os << tab + tab + "- " << i << endl; - } - } - if (split_.validation_.size() > 0) { - os << tab + "validation:" << endl; - for (auto& i : split_.validation_) { - os << tab + tab + "- " << i << endl; - } - } - if (split_.test_.size() > 0) { - os << tab + "test:" << endl; - for (auto& i : split_.test_) { - os << tab + tab + "- " << i << endl; + for (auto& s : split_) { + os << tab + s.first + ":" << endl; + for (auto& i : s.second) { + os << tab + tab + "- " << i << endl; + } } } @@ -288,7 +275,68 @@ Dataset::Dataset(const filesystem::path& filename, bool verify) DecodeImages(config["images"], abs_filename.parent_path(), verify); if (config["split"].IsDefined()) { - this->split_ = config["split"].as(); + for (YAML::const_iterator it = config["split"].begin(); it != config["split"].end(); ++it) { + // insert into the vector split_ the split name and the vector of image indices + split_.push_back(make_pair(it->first.as(), it->second.as>())); + } + } +} + +std::vector& Dataset::GetSplit(const SplitType& split) +{ + return GetSplit(SplitTypeToString(split)); +} + +std::vector& Dataset::GetSplit(const string& split) +{ + auto it = std::find_if(split_.begin(), split_.end(), [&](const std::pair>& element) { return element.first == split; }); + if (it == this->split_.end()) { + ECVL_ERROR_SPLIT_DOES_NOT_EXIST + } + else { + return it->second; + } +} + +std::vector& Dataset::GetSplit(const int& split) +{ + try { + return split_.at(split).second; + } + catch (const std::out_of_range) { + ECVL_ERROR_SPLIT_DOES_NOT_EXIST + } +} + +std::vector& Dataset::GetSplit() +{ + return GetSplit(current_split_); +} + +void Dataset::SetSplit(const SplitType& split) +{ + SetSplit(SplitTypeToString(split)); +} + +void Dataset::SetSplit(const string& split) +{ + // check if the split exists + int index = static_cast(distance(split_.begin(), find_if(split_.begin(), split_.end(), [&](const auto& pair) { return pair.first == split; }))); + if (index < vsize(split_)) { + this->current_split_ = index; + } + else { + ECVL_ERROR_SPLIT_DOES_NOT_EXIST + } +} + +const string SplitTypeToString(const SplitType& split) +{ + switch (split) { + case SplitType::training: return "training"; + case SplitType::validation: return "validation"; + case SplitType::test: return "test"; + default: ECVL_ERROR_SPLIT_DOES_NOT_EXIST; } } } \ No newline at end of file diff --git a/modules/eddl/include/ecvl/support_eddl.h b/modules/eddl/include/ecvl/support_eddl.h index 59d7bd40..c2f726b0 100644 --- a/modules/eddl/include/ecvl/support_eddl.h +++ b/modules/eddl/include/ecvl/support_eddl.h @@ -25,6 +25,8 @@ namespace ecvl { +#define ECVL_ERROR_AUG_DOES_NOT_EXIST throw std::runtime_error(ECVL_ERROR_MSG "Augmentation for this split does not exist"); + /** @brief Dataset Augmentations. This class represent the augmentations which will be applied to each split. @@ -35,21 +37,30 @@ This is just a shallow container for the Augmentations */ class DatasetAugmentations { - std::array, 3> augs_; + std::vector> augs_; public: - DatasetAugmentations(std::array, 3> augs = { nullptr,nullptr,nullptr }) - : augs_(augs) - {} + DatasetAugmentations(const std::vector>& augs = { nullptr, nullptr, nullptr }) : augs_(augs) {} // Getters: YAGNI - bool Apply(SplitType st, Image& img, const Image& gt = Image()) + bool Apply(const int split, Image& img, const Image& gt = Image()) { - if (augs_[+st]) { // Magic + operator - augs_[+st]->Apply(img, gt); - return true; + // check if the augs for split st are provided + try { + if (augs_.at(split)) { + augs_[split]->Apply(img, gt); + return true; + } + return false; + } + catch (const std::out_of_range) { + ECVL_ERROR_AUG_DOES_NOT_EXIST } - return false; + } + + bool Apply(SplitType st, Image& img, const Image& gt = Image()) + { + return Apply(+st, img, gt); // Magic + operator } }; @@ -64,10 +75,9 @@ class DLDataset : public Dataset public: int batch_size_; /**< @brief Size of each dataset mini batch. */ int n_channels_; /**< @brief Number of channels of the images. */ - int n_channels_gt_; /**< @brief Number of channels of the ground truth images. */ - SplitType current_split_; /**< @brief Current split from which images are loaded. */ + int n_channels_gt_ = -1; /**< @brief Number of channels of the ground truth images. */ std::vector resize_dims_; /**< @brief Dimensions (HxW) to which Dataset images must be resized. */ - std::array current_batch_ = { 0,0,0 }; /**< @brief Number of batches already loaded for each split. */ + std::vector current_batch_; /**< @brief Number of batches already loaded for each split. */ ColorType ctype_; /**< @brief ecvl::ColorType of the Dataset images. */ ColorType ctype_gt_; /**< @brief ecvl::ColorType of the Dataset ground truth images. */ DatasetAugmentations augs_; /**< @brief ecvl::DatasetAugmentations to be applied to the Dataset images (and ground truth if exist) for each split. */ @@ -95,62 +105,38 @@ class DLDataset : public Dataset ctype_{ ctype }, ctype_gt_{ ctype_gt } { - current_split_ = SplitType::training; - // if training is empty check test and validation and if one of them isn't empty, set it as current split - if (GetSplit(SplitType::training).empty()) { - if (!GetSplit(SplitType::test).empty()) { - current_split_ = SplitType::test; - } - else if (!GetSplit(SplitType::validation).empty()) { - current_split_ = SplitType::validation; - } - } - - Image tmp = this->samples_[0].LoadImage(ctype); - // Initialize resize_dims_ after that augmentations on images are performed - if (!augs_.Apply(SplitType::training, tmp)) { - if (!augs_.Apply(SplitType::validation, tmp)) { - augs_.Apply(SplitType::test, tmp); - } - } - auto y = tmp.channels_.find('y'); - auto x = tmp.channels_.find('x'); - assert(y != std::string::npos && x != std::string::npos); - resize_dims_.insert(resize_dims_.begin(), { tmp.dims_[y],tmp.dims_[x] }); + // resize current_batch_ to the number of splits and initialize it with 0 + current_batch_.resize(split_.size(), 0); // Initialize n_channels_ + Image tmp = samples_[0].LoadImage(ctype); n_channels_ = tmp.Channels(); - // Initialize n_channels_gt_ if exists - if (!GetSplit().empty()) { - if (samples_[GetSplit()[0]].label_path_ != nullopt) { - n_channels_gt_ = samples_[GetSplit()[0]].LoadImage(ctype_gt_, true).Channels(); + if (!split_.empty()) { + current_split_ = 0; + // Initialize resize_dims_ after that augmentations on the first image are performed + augs_.Apply(current_split_, tmp); + auto y = tmp.channels_.find('y'); + auto x = tmp.channels_.find('x'); + assert(y != std::string::npos && x != std::string::npos); + resize_dims_.insert(resize_dims_.begin(), { tmp.dims_[y],tmp.dims_[x] }); + + // Initialize n_channels_gt_ if exists + if (samples_[0].label_path_ != nullopt) { + n_channels_gt_ = samples_[0].LoadImage(ctype_gt_, true).Channels(); } } + else { + cout << ECVL_WARNING_MSG << "Missing splits in the dataset file." << endl; + } } - /** @brief Returns the image indexes of the current Split. - @return vector of image indexes of the Split in use. - */ - std::vector& GetSplit(); - - /** @brief Returns the image indexes of the requested Split. - @param[in] split ecvl::SplitType representing the Split to get ("training", "validation", or "test"). - @return vector of image indexes of the requested Split. - */ - std::vector& GetSplit(const SplitType& split); - - /** @brief Reset the batch counter of the current Split. */ + /** @brief Reset the batch counter of the current split. */ void ResetCurrentBatch(); - /** @brief Reset the batch counter of each Split. */ + /** @brief Reset the batch counter of each split. */ void ResetAllBatches(); - /** @brief Set the current Split. - @param[in] split ecvl::SplitType representing the Split to set ("training", "validation", or "test"). - */ - void SetSplit(const SplitType& split); - /** @brief Load a batch into _images_ and _labels_ `tensor`. @param[out] images `tensor` which stores the batch of images. @param[out] labels `tensor` which stores the batch of labels. @@ -222,7 +208,7 @@ Return a grid of Image from a EDDL Tensor. @param[in] cols Number of images displayed in each row of the grid. @param[in] normalize If true, shift the image to the range [0,1]. -@return Image taht contains the grid of images +@return Image that contains the grid of images */ Image MakeGrid(Tensor*& t, int cols = 8, bool normalize = false); diff --git a/modules/eddl/src/support_eddl.cpp b/modules/eddl/src/support_eddl.cpp index cbbbd886..f8039bd7 100644 --- a/modules/eddl/src/support_eddl.cpp +++ b/modules/eddl/src/support_eddl.cpp @@ -143,40 +143,12 @@ void ImageToTensor(const Image& img, Tensor*& t, const int& offset) memcpy(t->ptr + tot_dims * offset, tmp.data_, tot_dims * sizeof(float)); } -std::vector& DLDataset::GetSplit(const SplitType& split) -{ - if (split == SplitType::training) { - return this->split_.training_; - } - else if (split == SplitType::validation) { - return this->split_.validation_; - } - else { - return this->split_.test_; - } -} - -std::vector& DLDataset::GetSplit() -{ - return GetSplit(current_split_); -} - -void DLDataset::SetSplit(const SplitType& split) -{ - if (GetSplit(split).size() > 0) { - this->current_split_ = split; - } - else { - ECVL_ERROR_SPLIT_DOES_NOT_EXIST - } -} - void DLDataset::ResetCurrentBatch() { { // CRITICAL REGION STARTS std::unique_lock lck(mutex_current_batch_); - this->current_batch_[+current_split_] = 0; + this->current_batch_[current_split_] = 0; } // CRITICAL REGION ENDS } @@ -185,7 +157,7 @@ void DLDataset::ResetAllBatches() { // CRITICAL REGION STARTS std::unique_lock lck(mutex_current_batch_); - this->current_batch_.fill(0); + fill(current_batch_.begin(), current_batch_.end(), 0); } // CRITICAL REGION ENDS } @@ -224,8 +196,8 @@ void DLDataset::LoadBatch(Tensor*& images, Tensor*& labels) { // CRITICAL REGION STARTS std::unique_lock lck(mutex_current_batch_); - start = current_batch_[+current_split_] * bs; - ++current_batch_[+current_split_]; + start = current_batch_[current_split_] * bs; + ++current_batch_[current_split_]; } // CRITICAL REGION ENDS if (vsize(GetSplit()) < start + bs) { @@ -293,8 +265,8 @@ void DLDataset::LoadBatch(Tensor*& images) { // CRITICAL REGION STARTS std::unique_lock lck(mutex_current_batch_); - start = current_batch_[+current_split_] * bs; - ++current_batch_[+current_split_]; + start = current_batch_[current_split_] * bs; + ++current_batch_[current_split_]; } // CRITICAL REGION ENDS if (vsize(GetSplit()) < start + bs) { From a08dc49bd95f37f6e0eb70902ce7ce5d62fcbc5b Mon Sep 17 00:00:00 2001 From: Laura Canalini Date: Fri, 30 Apr 2021 12:28:10 +0200 Subject: [PATCH 05/38] Add GetLocations to retrieve the list of all Dataset samples locations --- modules/dataset/include/ecvl/dataset_parser.h | 8 ++++++++ modules/dataset/src/dataset_parser.cpp | 10 ++++++++++ 2 files changed, 18 insertions(+) diff --git a/modules/dataset/include/ecvl/dataset_parser.h b/modules/dataset/include/ecvl/dataset_parser.h index 3215d01b..d4f0775d 100644 --- a/modules/dataset/include/ecvl/dataset_parser.h +++ b/modules/dataset/include/ecvl/dataset_parser.h @@ -135,6 +135,14 @@ class Dataset */ void Dump(const filesystem::path& file_path); + /** @brief Retrieve the list of all samples locations in the dataset file. + + A single Sample can have multiple locations (e.g., if they are different acquisitions of the same image). + + @param[out] vector containing all the samples locations. + */ + std::vector> GetLocations(); + // RegEx which matchs URLs static const std::regex url_regex_; diff --git a/modules/dataset/src/dataset_parser.cpp b/modules/dataset/src/dataset_parser.cpp index 29044392..986fba19 100644 --- a/modules/dataset/src/dataset_parser.cpp +++ b/modules/dataset/src/dataset_parser.cpp @@ -330,6 +330,16 @@ void Dataset::SetSplit(const string& split) } } +vector> Dataset::GetLocations() +{ + const auto& size = vsize(samples_); + vector> locations(size); + for (int i = 0; i < size; ++i) { + locations[i] = samples_[i].location_; + } + return locations; +} + const string SplitTypeToString(const SplitType& split) { switch (split) { From 4826019f46a6d54d53c89c39f68d56ad34972a39 Mon Sep 17 00:00:00 2001 From: Laura Canalini Date: Fri, 30 Apr 2021 16:56:09 +0200 Subject: [PATCH 06/38] Change ResetCurrentBatch to ResetBatch and add the possibility to shuffle samples indices --- modules/eddl/include/ecvl/support_eddl.h | 39 +++++++++++++++++++--- modules/eddl/src/support_eddl.cpp | 42 ++++++++++++++++++++++-- 2 files changed, 74 insertions(+), 7 deletions(-) diff --git a/modules/eddl/include/ecvl/support_eddl.h b/modules/eddl/include/ecvl/support_eddl.h index c2f726b0..2bc4f26e 100644 --- a/modules/eddl/include/ecvl/support_eddl.h +++ b/modules/eddl/include/ecvl/support_eddl.h @@ -82,6 +82,7 @@ class DLDataset : public Dataset ColorType ctype_gt_; /**< @brief ecvl::ColorType of the Dataset ground truth images. */ DatasetAugmentations augs_; /**< @brief ecvl::DatasetAugmentations to be applied to the Dataset images (and ground truth if exist) for each split. */ std::mutex mutex_current_batch_; /**< @brief std::mutex to add exclusive access to attribute current_batch_. */ + static std::default_random_engine re_; /** @param[in] filename Path to the Dataset file. @@ -131,11 +132,33 @@ class DLDataset : public Dataset } } - /** @brief Reset the batch counter of the current split. */ - void ResetCurrentBatch(); + /** @brief Reset the batch counter and optionally shuffle samples indices of the specified split. + + If no split is provided (i.e. it is provided a value less than 0), the current split is reset. + @param[in] split index of the split to reset. + @param[in] reshuffle boolean which indicates whether to shuffle the split samples indices or not. + */ + void ResetBatch(int split = -1, bool reshuffle = false); + + /** @brief Reset the batch counter and optionally shuffle samples indices of the specified split. + + @param[in] split name of the split to reset. + @param[in] reshuffle boolean which indicates whether to shuffle the split samples indices or not. + */ + void ResetBatch(std::string split, bool reshuffle = false); + + /** @brief Reset the batch counter and optionally shuffle samples indices of the specified split. + + @param[in] split SplitType of the split to reset. + @param[in] reshuffle boolean which indicates whether to shuffle the split samples indices or not. + */ + void ResetBatch(SplitType split, bool reshuffle = false); - /** @brief Reset the batch counter of each split. */ - void ResetAllBatches(); + /** @brief Reset the batch counter of each split and optionally shuffle samples indices (within each split). + + @param[in] reshuffle boolean which indicates whether to shuffle the samples indices or not. + */ + void ResetAllBatches(bool reshuffle = false); /** @brief Load a batch into _images_ and _labels_ `tensor`. @param[out] images `tensor` which stores the batch of images. @@ -147,6 +170,14 @@ class DLDataset : public Dataset @param[out] images `tensor` which stores the batch of images. */ void LoadBatch(Tensor*& images); + + /** @brief Set a fixed seed for the random generated values. Useful to reproduce experiments with same shuffling during training. + @param[in] seed Value of the seed for the random engine. + */ + static void SetSplitSeed(unsigned seed) + { + re_.seed(seed); + } }; /** @brief Convert an EDDL Tensor into an ECVL Image. diff --git a/modules/eddl/src/support_eddl.cpp b/modules/eddl/src/support_eddl.cpp index f8039bd7..c891fd91 100644 --- a/modules/eddl/src/support_eddl.cpp +++ b/modules/eddl/src/support_eddl.cpp @@ -25,6 +25,8 @@ using namespace ecvl::filesystem; namespace ecvl { +default_random_engine DLDataset::re_(random_device{}()); + void TensorToImage(Tensor*& t, Image& img) { switch (t->ndim) { @@ -143,21 +145,55 @@ void ImageToTensor(const Image& img, Tensor*& t, const int& offset) memcpy(t->ptr + tot_dims * offset, tmp.data_, tot_dims * sizeof(float)); } -void DLDataset::ResetCurrentBatch() +void DLDataset::ResetBatch(int split, bool reshuffle) { { // CRITICAL REGION STARTS std::unique_lock lck(mutex_current_batch_); - this->current_batch_[current_split_] = 0; + auto split_index = current_split_ ? split < 0 : split; + + try { + this->current_batch_.at(split_index) = 0; + } + catch (const std::out_of_range) { + ECVL_ERROR_SPLIT_DOES_NOT_EXIST + } + + if (reshuffle) { + shuffle(begin(GetSplit(split_index)), end(GetSplit(split_index)), re_); + } } // CRITICAL REGION ENDS } -void DLDataset::ResetAllBatches() +void DLDataset::ResetBatch(string split, bool reshuffle) +{ + // check if the split exists + int index = static_cast(distance(split_.begin(), find_if(split_.begin(), split_.end(), [&](const auto& pair) { return pair.first == split; }))); + if (index < vsize(split_)) { + ResetBatch(index, reshuffle); + } + else { + ECVL_ERROR_SPLIT_DOES_NOT_EXIST + } +} + +void DLDataset::ResetBatch(SplitType split, bool reshuffle) +{ + ResetBatch(SplitTypeToString(split), reshuffle); +} + +void DLDataset::ResetAllBatches(bool reshuffle) { { // CRITICAL REGION STARTS std::unique_lock lck(mutex_current_batch_); fill(current_batch_.begin(), current_batch_.end(), 0); + + if (reshuffle) { + for (auto& s : split_) { + shuffle(begin(s.second), end(s.second), re_); + } + } } // CRITICAL REGION ENDS } From 18fca6ff366996f7db95e3fdf4726b1922222f16 Mon Sep 17 00:00:00 2001 From: Laura Canalini Date: Fri, 30 Apr 2021 17:12:14 +0200 Subject: [PATCH 07/38] Add SetBatchSize --- modules/eddl/include/ecvl/support_eddl.h | 7 +++++++ modules/eddl/src/support_eddl.cpp | 11 +++++++++++ 2 files changed, 18 insertions(+) diff --git a/modules/eddl/include/ecvl/support_eddl.h b/modules/eddl/include/ecvl/support_eddl.h index 2bc4f26e..34e98ab5 100644 --- a/modules/eddl/include/ecvl/support_eddl.h +++ b/modules/eddl/include/ecvl/support_eddl.h @@ -178,6 +178,13 @@ class DLDataset : public Dataset { re_.seed(seed); } + + /** @brief Set a new batch size inside the dataset. + + Notice that this will not affect the EDDL network batch size, that it has to be changed too. + @param[in] bs Value to set for the batch size. + */ + void SetBatchSize(int bs); }; /** @brief Convert an EDDL Tensor into an ECVL Image. diff --git a/modules/eddl/src/support_eddl.cpp b/modules/eddl/src/support_eddl.cpp index c891fd91..655bdbb2 100644 --- a/modules/eddl/src/support_eddl.cpp +++ b/modules/eddl/src/support_eddl.cpp @@ -327,6 +327,17 @@ void DLDataset::LoadBatch(Tensor*& images) } } +void DLDataset::SetBatchSize(int bs) +{ + // check if the provided batch size is negative or greater than the current split size + if (bs > 0 || bs > vsize(split_[current_split_].second)) { + batch_size_ = bs; + } + else { + ECVL_ERROR_WRONG_PARAMS("bs in SetBatchSize") + } +} + Image MakeGrid(Tensor*& t, int cols, bool normalize) { const auto batch_size = t->shape[0]; From 5967faf9e76d3e9d8b0b51d57ebf9515f8a4ef1b Mon Sep 17 00:00:00 2001 From: Laura Canalini Date: Mon, 3 May 2021 18:23:09 +0200 Subject: [PATCH 08/38] Change split_ in vector --- modules/dataset/include/ecvl/dataset_parser.h | 65 ++++++++++++------- modules/dataset/src/dataset_generator.cpp | 6 +- modules/dataset/src/dataset_parser.cpp | 61 ++++++++++------- modules/eddl/include/ecvl/support_eddl.h | 22 +++---- modules/eddl/src/support_eddl.cpp | 38 +++++------ 5 files changed, 109 insertions(+), 83 deletions(-) diff --git a/modules/dataset/include/ecvl/dataset_parser.h b/modules/dataset/include/ecvl/dataset_parser.h index d4f0775d..4a338771 100644 --- a/modules/dataset/include/ecvl/dataset_parser.h +++ b/modules/dataset/include/ecvl/dataset_parser.h @@ -63,6 +63,32 @@ class Sample ecvl::Image LoadImage(ecvl::ColorType ctype = ecvl::ColorType::BGR, const bool& is_gt = false); }; +/** @brief Split of a dataset. +This class provides the name of the split and the indices of the samples that belong to this split. +It optionally provides the split type if the split name is one of training, validation or test. +@anchor Split +*/ +class Split +{ +public: + std::string split_name_; /**< @brief Name of the split. */ + std::optional split_type_; /**< @brief If the split is training, validation or test the corresponding SpitType is provided. */ + std::vector samples_indices_; /**< @brief Vector containing samples indices of the split. */ + + Split() {} + + /** + @param[in] split_name Name of the split. + @param[in] samples_indices Vector containing samples indices of the split. + */ + Split(const std::string& split_name, const std::vector& samples_indices) : split_name_{ split_name }, samples_indices_{ samples_indices } + { + if (split_name_ == "training") split_type_ = SplitType::training; + else if (split_name_ == "validation") split_type_ = SplitType::validation; + else if (split_name_ == "test") split_type_ = SplitType::test; + } +}; + /** @brief DeepHealth Dataset. This class implements the DeepHealth Dataset Format (https://github.com/deephealthproject/ecvl/wiki/DeepHealth-Toolkit-Dataset-Format). @@ -77,7 +103,7 @@ class Dataset std::vector classes_; /**< @brief Vector with all the classes available in the Dataset. */ std::vector features_; /**< @brief Vector with all the features available in the Dataset. */ std::vector samples_; /**< @brief Vector containing all the Dataset samples. See @ref Sample. */ - std::vector>> split_; /**< @brief Splits of the Dataset. */ + std::vector split_; /**< @brief Splits of the Dataset. */ int current_split_ = -1; /**< @brief Current split from which images are loaded. */ Dataset() {} @@ -94,37 +120,37 @@ class Dataset std::vector& GetSplit(); /** @brief Returns the image indexes of the requested split. - @param[in] split ecvl::SplitType representing the split to get ("training", "validation", or "test"). + @param[in] split_type ecvl::SplitType representing the split to get ("training", "validation", or "test"). @return vector of image indexes of the requested split. */ - std::vector& GetSplit(const SplitType& split); + std::vector& GetSplit(const SplitType& split_type); /** @brief Returns the image indexes of the requested split. - @param[in] split string representing the split to get. + @param[in] split_name string representing the split to get. @return vector of image indexes of the requested split. */ - std::vector& GetSplit(const std::string& split); + std::vector& GetSplit(const std::string& split_name); /** @brief Returns the image indexes of the requested split. - @param[in] split int representing the index of the split to get. + @param[in] split_index int representing the index of the split to get. @return vector of image indexes of the requested split. */ - std::vector& GetSplit(const int& split); + std::vector& GetSplit(const int& split_index); /** @brief Set the current split. - @param[in] split ecvl::SplitType representing the split to set ("training", "validation", or "test"). + @param[in] split_type ecvl::SplitType representing the split to set ("training", "validation", or "test"). */ - void SetSplit(const SplitType& split); + void SetSplit(const SplitType& split_type); /** @brief Set the current split. - @param[in] split string representing the split to set. + @param[in] split_name string representing the split to set. */ - void SetSplit(const std::string& split); + void SetSplit(const std::string& split_name); /** @brief Set the current split. - @param[in] split int representing the index of the split to set. + @param[in] split_index int representing the index of the split to set. */ - void SetSplit(const int& split); + void SetSplit(const int& split_index); /** @brief Dump the Dataset into a YAML file following the DeepHealth Dataset Format. @@ -136,9 +162,9 @@ class Dataset void Dump(const filesystem::path& file_path); /** @brief Retrieve the list of all samples locations in the dataset file. - + A single Sample can have multiple locations (e.g., if they are different acquisitions of the same image). - + @param[out] vector containing all the samples locations. */ std::vector> GetLocations(); @@ -152,15 +178,6 @@ class Dataset void FindLabel(Sample& sample, const YAML::Node& n); }; -/** @brief Convert @ref SplitType in string. - -Useful for backward compatibility. - -@param[in] split SplitType to convert - -@return string that represent the provided SplitType -*/ -const std::string SplitTypeToString(const SplitType& split); } // namespace ecvl #endif // ECVL_DATASET_PARSER_H_ \ No newline at end of file diff --git a/modules/dataset/src/dataset_generator.cpp b/modules/dataset/src/dataset_generator.cpp index 60bd5ea5..e0d9ecd9 100644 --- a/modules/dataset/src/dataset_generator.cpp +++ b/modules/dataset/src/dataset_generator.cpp @@ -146,9 +146,9 @@ void GenerateDataset::LoadImagesAndSplits() // load indexes of images for each split d_.split_.resize(splits_.size()); for (int i = 0; i < splits_.size(); ++i) { - d_.split_[i].first = splits_[i]; - d_.split_[i].second.resize(num_samples_[i]); - iota(d_.split_[i].second.begin(), d_.split_[i].second.end(), img_index); + d_.split_[i].split_name_ = splits_[i]; + d_.split_[i].samples_indices_.resize(num_samples_[i]); + iota(d_.split_[i].samples_indices_.begin(), d_.split_[i].samples_indices_.end(), img_index); img_index += num_samples_[i]; } } diff --git a/modules/dataset/src/dataset_parser.cpp b/modules/dataset/src/dataset_parser.cpp index 986fba19..3db5ab94 100644 --- a/modules/dataset/src/dataset_parser.cpp +++ b/modules/dataset/src/dataset_parser.cpp @@ -228,8 +228,8 @@ void Dataset::Dump(const path& file_path) if (split_.size() > 0) { os << "split:" << endl; for (auto& s : split_) { - os << tab + s.first + ":" << endl; - for (auto& i : s.second) { + os << tab + s.split_name_ + ":" << endl; + for (auto& i : s.samples_indices_) { os << tab + tab + "- " << i << endl; } } @@ -277,31 +277,38 @@ Dataset::Dataset(const filesystem::path& filename, bool verify) if (config["split"].IsDefined()) { for (YAML::const_iterator it = config["split"].begin(); it != config["split"].end(); ++it) { // insert into the vector split_ the split name and the vector of image indices - split_.push_back(make_pair(it->first.as(), it->second.as>())); + Split s(it->first.as(), it->second.as>()); + split_.push_back(s); } } } -std::vector& Dataset::GetSplit(const SplitType& split) +std::vector& Dataset::GetSplit(const SplitType& split_type) { - return GetSplit(SplitTypeToString(split)); + auto it = std::find_if(split_.begin(), split_.end(), [&](const auto& s) { return s.split_type_ == split_type; }); + if (it == this->split_.end()) { + ECVL_ERROR_SPLIT_DOES_NOT_EXIST + } + else { + return it->samples_indices_; + } } -std::vector& Dataset::GetSplit(const string& split) +std::vector& Dataset::GetSplit(const string& split_name) { - auto it = std::find_if(split_.begin(), split_.end(), [&](const std::pair>& element) { return element.first == split; }); + auto it = std::find_if(split_.begin(), split_.end(), [&](const auto& s) { return s.split_name_ == split_name; }); if (it == this->split_.end()) { ECVL_ERROR_SPLIT_DOES_NOT_EXIST } else { - return it->second; + return it->samples_indices_; } } -std::vector& Dataset::GetSplit(const int& split) +std::vector& Dataset::GetSplit(const int& split_index) { try { - return split_.at(split).second; + return split_.at(split_index).samples_indices_; } catch (const std::out_of_range) { ECVL_ERROR_SPLIT_DOES_NOT_EXIST @@ -313,15 +320,22 @@ std::vector& Dataset::GetSplit() return GetSplit(current_split_); } -void Dataset::SetSplit(const SplitType& split) +void Dataset::SetSplit(const SplitType& split_type) { - SetSplit(SplitTypeToString(split)); + // check if the split exists + int index = static_cast(distance(split_.begin(), find_if(split_.begin(), split_.end(), [&](const auto& s) { return s.split_type_ == split_type; }))); + if (index < vsize(split_)) { + this->current_split_ = index; + } + else { + ECVL_ERROR_SPLIT_DOES_NOT_EXIST + } } -void Dataset::SetSplit(const string& split) +void Dataset::SetSplit(const string& split_name) { // check if the split exists - int index = static_cast(distance(split_.begin(), find_if(split_.begin(), split_.end(), [&](const auto& pair) { return pair.first == split; }))); + int index = static_cast(distance(split_.begin(), find_if(split_.begin(), split_.end(), [&](const auto& s) { return s.split_name_ == split_name; }))); if (index < vsize(split_)) { this->current_split_ = index; } @@ -330,6 +344,16 @@ void Dataset::SetSplit(const string& split) } } +void Dataset::SetSplit(const int& split_index) +{ + if (0 <= split_index && split_index < vsize(split_)) { + this->current_split_ = split_index; + } + else { + ECVL_ERROR_SPLIT_DOES_NOT_EXIST + } +} + vector> Dataset::GetLocations() { const auto& size = vsize(samples_); @@ -340,13 +364,4 @@ vector> Dataset::GetLocations() return locations; } -const string SplitTypeToString(const SplitType& split) -{ - switch (split) { - case SplitType::training: return "training"; - case SplitType::validation: return "validation"; - case SplitType::test: return "test"; - default: ECVL_ERROR_SPLIT_DOES_NOT_EXIST; - } -} } \ No newline at end of file diff --git a/modules/eddl/include/ecvl/support_eddl.h b/modules/eddl/include/ecvl/support_eddl.h index 34e98ab5..e82a1047 100644 --- a/modules/eddl/include/ecvl/support_eddl.h +++ b/modules/eddl/include/ecvl/support_eddl.h @@ -132,33 +132,33 @@ class DLDataset : public Dataset } } - /** @brief Reset the batch counter and optionally shuffle samples indices of the specified split. - - If no split is provided (i.e. it is provided a value less than 0), the current split is reset. - @param[in] split index of the split to reset. + /** @brief Reset the batch counter and optionally shuffle samples indices of the specified split. + + If no split is provided (i.e. it is provided a value less than 0), the current split is reset. + @param[in] split_index index of the split to reset. @param[in] reshuffle boolean which indicates whether to shuffle the split samples indices or not. */ - void ResetBatch(int split = -1, bool reshuffle = false); + void ResetBatch(int split_index = -1, bool shuffle = false); /** @brief Reset the batch counter and optionally shuffle samples indices of the specified split. - @param[in] split name of the split to reset. + @param[in] split_name name of the split to reset. @param[in] reshuffle boolean which indicates whether to shuffle the split samples indices or not. */ - void ResetBatch(std::string split, bool reshuffle = false); + void ResetBatch(std::string split_name, bool shuffle = false); /** @brief Reset the batch counter and optionally shuffle samples indices of the specified split. - @param[in] split SplitType of the split to reset. + @param[in] split_type SplitType of the split to reset. @param[in] reshuffle boolean which indicates whether to shuffle the split samples indices or not. */ - void ResetBatch(SplitType split, bool reshuffle = false); + void ResetBatch(SplitType split_type, bool shuffle = false); /** @brief Reset the batch counter of each split and optionally shuffle samples indices (within each split). @param[in] reshuffle boolean which indicates whether to shuffle the samples indices or not. */ - void ResetAllBatches(bool reshuffle = false); + void ResetAllBatches(bool shuffle = false); /** @brief Load a batch into _images_ and _labels_ `tensor`. @param[out] images `tensor` which stores the batch of images. @@ -180,7 +180,7 @@ class DLDataset : public Dataset } /** @brief Set a new batch size inside the dataset. - + Notice that this will not affect the EDDL network batch size, that it has to be changed too. @param[in] bs Value to set for the batch size. */ diff --git a/modules/eddl/src/support_eddl.cpp b/modules/eddl/src/support_eddl.cpp index 655bdbb2..fc3cad45 100644 --- a/modules/eddl/src/support_eddl.cpp +++ b/modules/eddl/src/support_eddl.cpp @@ -145,53 +145,47 @@ void ImageToTensor(const Image& img, Tensor*& t, const int& offset) memcpy(t->ptr + tot_dims * offset, tmp.data_, tot_dims * sizeof(float)); } -void DLDataset::ResetBatch(int split, bool reshuffle) +void DLDataset::ResetBatch(int split_index, bool shuffle) { { // CRITICAL REGION STARTS std::unique_lock lck(mutex_current_batch_); - - auto split_index = current_split_ ? split < 0 : split; - + int index = current_split_ ? split_index < 0 : split_index; + // check if the split exists try { - this->current_batch_.at(split_index) = 0; + this->current_batch_.at(index) = 0; } catch (const std::out_of_range) { ECVL_ERROR_SPLIT_DOES_NOT_EXIST } - if (reshuffle) { - shuffle(begin(GetSplit(split_index)), end(GetSplit(split_index)), re_); + if (shuffle) { + std::shuffle(begin(GetSplit(index)), end(GetSplit(index)), re_); } } // CRITICAL REGION ENDS } -void DLDataset::ResetBatch(string split, bool reshuffle) +void DLDataset::ResetBatch(string split_name, bool shuffle) { - // check if the split exists - int index = static_cast(distance(split_.begin(), find_if(split_.begin(), split_.end(), [&](const auto& pair) { return pair.first == split; }))); - if (index < vsize(split_)) { - ResetBatch(index, reshuffle); - } - else { - ECVL_ERROR_SPLIT_DOES_NOT_EXIST - } + int index = static_cast(distance(split_.begin(), find_if(split_.begin(), split_.end(), [&](const auto& s) { return s.split_name_ == split_name; }))); + ResetBatch(index, shuffle); } -void DLDataset::ResetBatch(SplitType split, bool reshuffle) +void DLDataset::ResetBatch(SplitType split_type, bool shuffle) { - ResetBatch(SplitTypeToString(split), reshuffle); + int index = static_cast(distance(split_.begin(), find_if(split_.begin(), split_.end(), [&](const auto& s) { return s.split_type_ == split_type; }))); + ResetBatch(index, shuffle); } -void DLDataset::ResetAllBatches(bool reshuffle) +void DLDataset::ResetAllBatches(bool shuffle) { { // CRITICAL REGION STARTS std::unique_lock lck(mutex_current_batch_); fill(current_batch_.begin(), current_batch_.end(), 0); - if (reshuffle) { + if (shuffle) { for (auto& s : split_) { - shuffle(begin(s.second), end(s.second), re_); + std::shuffle(begin(s.samples_indices_), end(s.samples_indices_), re_); } } } // CRITICAL REGION ENDS @@ -330,7 +324,7 @@ void DLDataset::LoadBatch(Tensor*& images) void DLDataset::SetBatchSize(int bs) { // check if the provided batch size is negative or greater than the current split size - if (bs > 0 || bs > vsize(split_[current_split_].second)) { + if (bs > 0 && bs < vsize(split_[current_split_].samples_indices_)) { batch_size_ = bs; } else { From c9956695f18d1ce96f57a203d1a7a05ffa45ddbc Mon Sep 17 00:00:00 2001 From: Laura Canalini Date: Thu, 6 May 2021 10:26:09 +0200 Subject: [PATCH 09/38] Add missing include --- modules/dataset/src/dataset_parser.cpp | 1 + modules/eddl/src/support_eddl.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/modules/dataset/src/dataset_parser.cpp b/modules/dataset/src/dataset_parser.cpp index 986fba19..9826da4f 100644 --- a/modules/dataset/src/dataset_parser.cpp +++ b/modules/dataset/src/dataset_parser.cpp @@ -13,6 +13,7 @@ #include "ecvl/dataset_parser.h" +#include #include #include diff --git a/modules/eddl/src/support_eddl.cpp b/modules/eddl/src/support_eddl.cpp index 655bdbb2..ff0a5876 100644 --- a/modules/eddl/src/support_eddl.cpp +++ b/modules/eddl/src/support_eddl.cpp @@ -18,6 +18,7 @@ #include "ecvl/core/imgproc.h" #include "ecvl/core/standard_errors.h" +#include #include using namespace eddl; From 636c91e2e7b927cb5c8f7a90aa645e6aba2e0aee Mon Sep 17 00:00:00 2001 From: Laura Canalini Date: Thu, 6 May 2021 10:41:29 +0200 Subject: [PATCH 10/38] Fix optional usage --- modules/dataset/include/ecvl/dataset_parser.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/dataset/include/ecvl/dataset_parser.h b/modules/dataset/include/ecvl/dataset_parser.h index 4a338771..a9d3db25 100644 --- a/modules/dataset/include/ecvl/dataset_parser.h +++ b/modules/dataset/include/ecvl/dataset_parser.h @@ -72,7 +72,7 @@ class Split { public: std::string split_name_; /**< @brief Name of the split. */ - std::optional split_type_; /**< @brief If the split is training, validation or test the corresponding SpitType is provided. */ + optional split_type_; /**< @brief If the split is training, validation or test the corresponding SpitType is provided. */ std::vector samples_indices_; /**< @brief Vector containing samples indices of the split. */ Split() {} From a98edf7b53d0a0ea3661a7cffeb9653a3e93ce75 Mon Sep 17 00:00:00 2001 From: Laura Canalini Date: Wed, 19 May 2021 18:04:26 +0200 Subject: [PATCH 11/38] Add built-in mechanism for parallel batches loading --- modules/dataset/include/ecvl/dataset_parser.h | 54 ++- modules/dataset/src/dataset_parser.cpp | 8 +- modules/eddl/include/ecvl/support_eddl.h | 411 +++++++++++++++--- modules/eddl/src/support_eddl.cpp | 233 ++++++++-- 4 files changed, 592 insertions(+), 114 deletions(-) diff --git a/modules/dataset/include/ecvl/dataset_parser.h b/modules/dataset/include/ecvl/dataset_parser.h index 4a338771..f6ff0706 100644 --- a/modules/dataset/include/ecvl/dataset_parser.h +++ b/modules/dataset/include/ecvl/dataset_parser.h @@ -36,6 +36,17 @@ namespace ecvl @anchor SplitType */ UNSIGNED_ENUM_CLASS(SplitType, training, validation, test) + +/** @brief Enum class representing allowed tasks for the ECVL Dataset. + +@anchor Task + */ +enum class Task +{ + classification, + segmentation, +}; + /** @brief Sample image in a dataset. This class provides the information to describe a dataset sample. @@ -45,11 +56,11 @@ This class provides the information to describe a dataset sample. class Sample { public: - std::vector location_; /**< @brief Absolute path of the sample. */ - optional> label_; /**< @brief Vector of sample labels. */ - optional label_path_; /**< @brief Absolute path of sample ground truth. */ - optional> values_; /**< @brief Map (`map`) which stores the features of a sample. */ - std::vector size_; /**< @brief Original x and y dimensions of the sample */ + std::vector location_; /**< @brief Absolute path of the sample. */ + optional> label_; /**< @brief Vector of sample labels. */ + optional label_path_; /**< @brief Absolute path of sample ground truth. */ + optional> values_; /**< @brief Map (`map`) which stores the features of a sample. */ + std::vector size_; /**< @brief Original x and y dimensions of the sample */ /** @brief Return an Image of the dataset. @@ -64,7 +75,7 @@ class Sample }; /** @brief Split of a dataset. -This class provides the name of the split and the indices of the samples that belong to this split. +This class provides the name of the split and the indices of the samples that belong to this split. It optionally provides the split type if the split name is one of training, validation or test. @anchor Split */ @@ -74,6 +85,10 @@ class Split std::string split_name_; /**< @brief Name of the split. */ std::optional split_type_; /**< @brief If the split is training, validation or test the corresponding SpitType is provided. */ std::vector samples_indices_; /**< @brief Vector containing samples indices of the split. */ + bool drop_last_ = false; /**< @brief Whether to drop elements that don't fit batch size or not. */ + int num_batches_; /**< @brief Number of batches of this split. */ + int last_batch_; /**< @brief Dimension of the last batch of this split. */ + bool no_label_ = false; /**< @brief Whether the split has samples with labels or not. */ Split() {} @@ -98,13 +113,14 @@ This class implements the DeepHealth Dataset Format (https://github.com/deepheal class Dataset { public: - std::string name_ = "DeepHealth dataset"; /**< @brief Name of the Dataset. */ - std::string description_ = "This is the DeepHealth example dataset!"; /**< @brief Description of the Dataset. */ - std::vector classes_; /**< @brief Vector with all the classes available in the Dataset. */ - std::vector features_; /**< @brief Vector with all the features available in the Dataset. */ - std::vector samples_; /**< @brief Vector containing all the Dataset samples. See @ref Sample. */ - std::vector split_; /**< @brief Splits of the Dataset. */ - int current_split_ = -1; /**< @brief Current split from which images are loaded. */ + std::string name_ = "DeepHealth dataset"; /**< @brief Name of the Dataset. */ + std::string description_ = "This is the DeepHealth example dataset!"; /**< @brief Description of the Dataset. */ + std::vector classes_; /**< @brief Vector with all the classes available in the Dataset. */ + std::vector features_; /**< @brief Vector with all the features available in the Dataset. */ + std::vector samples_; /**< @brief Vector containing all the Dataset samples. See @ref Sample. */ + std::vector split_; /**< @brief Splits of the Dataset. See @ref Split. */ + int current_split_ = -1; /**< @brief Current split from which images are loaded. */ + Task task_; /**< @brief Task of the dataset. */ Dataset() {} @@ -114,6 +130,9 @@ class Dataset */ Dataset(const filesystem::path& filename, bool verify = false); + /* Destructor */ + virtual ~Dataset() {} + /** @brief Returns the image indexes of the current split. @return vector of image indexes of the split in use. */ @@ -140,17 +159,17 @@ class Dataset /** @brief Set the current split. @param[in] split_type ecvl::SplitType representing the split to set ("training", "validation", or "test"). */ - void SetSplit(const SplitType& split_type); + virtual void SetSplit(const SplitType& split_type); /** @brief Set the current split. @param[in] split_name string representing the split to set. */ - void SetSplit(const std::string& split_name); + virtual void SetSplit(const std::string& split_name); /** @brief Set the current split. @param[in] split_index int representing the index of the split to set. */ - void SetSplit(const int& split_index); + virtual void SetSplit(const int& split_index); /** @brief Dump the Dataset into a YAML file following the DeepHealth Dataset Format. @@ -165,7 +184,7 @@ class Dataset A single Sample can have multiple locations (e.g., if they are different acquisitions of the same image). - @param[out] vector containing all the samples locations. + @return vector containing all the samples locations. */ std::vector> GetLocations(); @@ -177,7 +196,6 @@ class Dataset void DecodeImages(const YAML::Node& node, const filesystem::path& root_path, bool verify); void FindLabel(Sample& sample, const YAML::Node& n); }; - } // namespace ecvl #endif // ECVL_DATASET_PARSER_H_ \ No newline at end of file diff --git a/modules/dataset/src/dataset_parser.cpp b/modules/dataset/src/dataset_parser.cpp index 3db5ab94..329f2efa 100644 --- a/modules/dataset/src/dataset_parser.cpp +++ b/modules/dataset/src/dataset_parser.cpp @@ -274,13 +274,20 @@ Dataset::Dataset(const filesystem::path& filename, bool verify) } DecodeImages(config["images"], abs_filename.parent_path(), verify); + if (config["split"].IsDefined()) { for (YAML::const_iterator it = config["split"].begin(); it != config["split"].end(); ++it) { // insert into the vector split_ the split name and the vector of image indices Split s(it->first.as(), it->second.as>()); + + if (!samples_[s.samples_indices_[0]].label_ && !samples_[s.samples_indices_[0]].label_path_) { + s.no_label_ = true; + } split_.push_back(s); } } + + task_ = classes_.empty() ? Task::segmentation : Task::classification; } std::vector& Dataset::GetSplit(const SplitType& split_type) @@ -363,5 +370,4 @@ vector> Dataset::GetLocations() } return locations; } - } \ No newline at end of file diff --git a/modules/eddl/include/ecvl/support_eddl.h b/modules/eddl/include/ecvl/support_eddl.h index e82a1047..a1fefb0d 100644 --- a/modules/eddl/include/ecvl/support_eddl.h +++ b/modules/eddl/include/ecvl/support_eddl.h @@ -22,11 +22,63 @@ #include #include +#include namespace ecvl { #define ECVL_ERROR_AUG_DOES_NOT_EXIST throw std::runtime_error(ECVL_ERROR_MSG "Augmentation for this split does not exist"); +/** @brief Convert an ECVL Image into an EDDL Tensor. + +Image must have 3 dimensions "xy[czo]" (in any order). \n +Output Tensor will be created with shape \f$C\f$ x \f$H\f$ x \f$W\f$. \n + +@param[in] img Input ECVL Image. +@param[out] t Output EDDL Tensor. It is created inside the function. + +*/ +void ImageToTensor(const Image& img, Tensor*& t); + +/** @brief Insert an ECVL Image into an EDDL Tensor. + +This function is useful to insert into an EDDL Tensor more than one image, specifying how many images are already stored in the Tensor. +Image must have 3 dimensions "xy[czo]" (in any order). \n + +@param[in] img Input ECVL Image. +@param[out] t Output EDDL Tensor. It must be created with the right dimensions before calling this function. +@param[in] offset How many images are already stored in the Tensor. + +*/ +void ImageToTensor(const Image& img, Tensor*& t, const int& offset); + +/** @brief Convert an EDDL Tensor into an ECVL Image. + +Tensor dimensions must be \f$C\f$ x \f$H\f$ x \f$W\f$ or \f$N\f$ x \f$C\f$ x \f$H\f$ x \f$W\f$, where: \n +\f$N\f$ = batch size \n +\f$C\f$ = channels \n +\f$H\f$ = height \n +\f$W\f$ = width + +@param[in] t Input EDDL Tensor. +@param[out] img Output ECVL Image. It is a "xyo" with DataType::float32 and ColorType::none Image. + +*/ +void TensorToImage(const Tensor* t, Image& img); + +/** @brief Convert an EDDL Tensor into an ECVL View. + +Tensor dimensions must be \f$C\f$ x \f$H\f$ x \f$W\f$ or \f$N\f$ x \f$C\f$ x \f$H\f$ x \f$W\f$, where: \n +\f$N\f$ = batch size \n +\f$C\f$ = channels \n +\f$H\f$ = height \n +\f$W\f$ = width + +@param[in] t Input EDDL Tensor. +@param[out] v Output ECVL View. It is a "xyo" with ColorType::none View. + +*/ +void TensorToView(const Tensor* t, View& v); + /** @brief Dataset Augmentations. This class represent the augmentations which will be applied to each split. @@ -64,6 +116,185 @@ class DatasetAugmentations } }; +/** @brief Label class representing the Sample labels, which may have different representations depending on the task. + +@anchor Label +*/ +class Label +{ +public: + /** @brief Abstract function which copies the sample labels into the batch tensor. + + @param[in] tensor EDDL Tensor in which to copy the labels + @param[in] offset Position of the tensor from which to insert the sample labels + */ + virtual void ToTensorPlane(Tensor* tensor, int offset) = 0; + virtual ~Label() {}; +}; + +/** @brief Label for classification task. + +@anchor LabelClass +*/ +class LabelClass : public Label +{ +public: + vector label; /**< @brief Vector of the sample labels. */ + + /** @brief Convert the sample labels in a one-hot encoded tensor and copy it to the batch tensor. + + @param[in] tensor EDDL Tensor in which to copy the labels (dimensions: [batch_size, num_classes]) + @param[in] offset Position of the tensor from which to insert the sample labels + */ + void ToTensorPlane(Tensor* tensor, int offset) override + { + vector lab(tensor->shape[1], 0); + for (int j = 0; j < vsize(label); ++j) { + lab[label[j]] = 1; + } + //memcpy(tensor->ptr + lab.size() * offset, lab.data(), lab.size() * sizeof(float)); + std::copy(lab.data(), lab.data() + lab.size(), tensor->ptr + lab.size() * offset); + } +}; + +/** @brief Label for segmentation task. + +@anchor LabelImage +*/ +class LabelImage : public Label +{ +public: + Image gt; /**< @brief Ground truth image. */ + + /** @brief Convert the sample ground truth Image into a tensor and copy it to the batch tensor. + + @param[in] tensor EDDL Tensor in which to copy the ground truth (dimensions: [batch_size, num_channels, height, width]) + @param[in] offset Position of the tensor from which to insert the sample ground truth + */ + void ToTensorPlane(Tensor* tensor, int offset) override + { + ImageToTensor(gt, tensor, offset); + } +}; + +/** @brief Class that manages the producers-consumer queue of samples. +* The queue stores pairs of image and label, pushing and popping them in an exclusive way. +* The queue also has a maximum size (`max_size_` attribute) to avoid memory overflows. + +@anchor ProducersConsumerQueue +*/ +class ProducersConsumerQueue +{ + std::condition_variable cond_notempty_; /**< @brief Condition variable that wait if the queue is empty. */ + std::condition_variable cond_notfull_; /**< @brief Condition variable that wait if the queue is full. */ + std::mutex mutex_; /**< @brief Mutex to grant exclusive access to the queue. */ + std::queue> cpq_; /**< @brief Queue of samples, stored as pair of Image and Label pointer. */ + unsigned max_size_; /**< @brief Maximum size of the queue. */ + unsigned threshold_; /**< @brief Threshold from which restart to produce samples. If not specified, it's set to the half of maximum size. */ + +public: + ProducersConsumerQueue() {} + /** + @param[in] mxsz Maximum size of the queue. + */ + ProducersConsumerQueue(unsigned mxsz) : max_size_(mxsz), threshold_(max_size_ / 2) {} + /** + @param[in] mxsz Maximum size of the queue. + @param[in] thresh Threshold from which restart to produce samples. + */ + ProducersConsumerQueue(unsigned mxsz, unsigned thresh) : max_size_(mxsz), threshold_(thresh) {} + + /** @brief Push a sample in the queue. + + Take the lock of the queue and wait if the queue is full. Otherwise, push the pair Image, Label into the queue. + @param[in] image Image to push in the queue. + @param[in] label Label to push in the queue. + */ + void Push(const Image& image, Label* label) + { + std::unique_lock lock(mutex_); + cond_notfull_.wait(lock, [this]() { return !IsFull(); }); + cpq_.push(make_pair(image, label)); + cond_notempty_.notify_one(); + } + + /** @brief Pop a sample from the queue. + + Take the lock of the queue and wait if the queue is empty. Otherwise, pop an Image and its Label from the queue. + If the queue size is still bigger than the half of the maximum size, don't notify the Push to avoid an always-full queue. + + @param[in] image Image to pop from the queue. + @param[in] label Label to pop from the queue. + */ + void Pop(Image& image, Label*& label) + { + std::unique_lock lock(mutex_); + cond_notempty_.wait(lock, [this]() { return !IsEmpty(); }); + auto p = cpq_.front(); + cpq_.pop(); + image = p.first; + label = p.second; + if (Length() < threshold_) { + cond_notfull_.notify_one(); + } + } + + /** @brief Check if the queue is full. + + @return true if the queue is full, false otherwise. + */ + bool IsFull() const + { + return cpq_.size() >= max_size_; + } + + /** @brief Check if the queue is empty. + + @return true if the queue is empty, false otherwise. + */ + bool IsEmpty() const + { + return cpq_.empty(); + } + + /** @brief Calculate the current size of the queue. + + @return the current size of the queue. + */ + size_t Length() const + { + return cpq_.size(); + } + + /** @brief Set the maximum size of the queue and optionally the threshold from which restart to produce samples. + + @param[in] max_size maximum size of the queue. + @param[in] thresh threshold from which restart to produce samples. If not specified, it's set to the half of maximum size. + */ + void SetSize(int max_size, int thresh = -1) + { + max_size_ = max_size; + threshold_ = thresh != -1 ? thresh : max_size / 2; + } +}; + +/** @brief Class representing the thread counters. + +Each thread has its own indices to manage. The split samples have been assigned to several threads which manage them separately. + +@anchor ThreadCounters +*/ +class ThreadCounters +{ +public: + int counter_; /**< @brief Index of the sample currently used by the thread. */ + int min_, max_; /**< @brief Indices of samples managed by the thread in the interval [min_, max_). */ + + ThreadCounters(int min, int max) : counter_{ min }, min_{ min }, max_{ max } {} + ThreadCounters(int counter, int min, int max) : counter_{ counter }, min_{ min }, max_{ max } {} + void Reset() { counter_ = min_; } /**< @brief Reset the thread counter to its minimum value. */ +}; + /** @brief DeepHealth Deep Learning Dataset. This class extends the DeepHealth Dataset with Deep Learning specific members. @@ -72,39 +303,60 @@ This class extends the DeepHealth Dataset with Deep Learning specific members. */ class DLDataset : public Dataset { +protected: + int batch_size_; /**< @brief Size of each dataset mini batch. */ + std::vector resize_dims_; /**< @brief Dimensions (HxW) to which Dataset images must be resized. */ + std::vector current_batch_; /**< @brief Number of batches already loaded for each split. */ + ColorType ctype_; /**< @brief ecvl::ColorType of the Dataset images. */ + ColorType ctype_gt_; /**< @brief ecvl::ColorType of the Dataset ground truth images. */ + DatasetAugmentations augs_; /**< @brief ecvl::DatasetAugmentations to be applied to the Dataset images (and ground truth if exist) for each split. */ + int num_workers_; /**< @brief Number of parallel workers. */ + ProducersConsumerQueue queue_; /**< @brief Producers-consumer queue of the dataset. */ + std::pair< std::vector, std::vector> tensors_shape_; /**< @brief Shape of sample and label tensors. */ + std::vector> splits_tc_; /**< @brief Each dataset split has its own vector of threads, each of which has its counters: . */ + std::vector producers_; /**< @brief Vector of threads representing the samples producers. */ + bool active_ = false; /**< @brief Whether the threads have already been launched or not. */ + static std::default_random_engine re_; /**< @brief Engine used for random number generation. */ + Label* label_ = nullptr; /**< @brief Label pointer which will be specialized based on the dataset task. */ + + /** @brief Set which are the indices of the samples managed by each thread. + + @param[in] split_index index of the split to initialize. + */ + void InitTC(int split_index); + public: - int batch_size_; /**< @brief Size of each dataset mini batch. */ - int n_channels_; /**< @brief Number of channels of the images. */ - int n_channels_gt_ = -1; /**< @brief Number of channels of the ground truth images. */ - std::vector resize_dims_; /**< @brief Dimensions (HxW) to which Dataset images must be resized. */ - std::vector current_batch_; /**< @brief Number of batches already loaded for each split. */ - ColorType ctype_; /**< @brief ecvl::ColorType of the Dataset images. */ - ColorType ctype_gt_; /**< @brief ecvl::ColorType of the Dataset ground truth images. */ - DatasetAugmentations augs_; /**< @brief ecvl::DatasetAugmentations to be applied to the Dataset images (and ground truth if exist) for each split. */ - std::mutex mutex_current_batch_; /**< @brief std::mutex to add exclusive access to attribute current_batch_. */ - static std::default_random_engine re_; + int n_channels_; /**< @brief Number of channels of the images. */ + int n_channels_gt_ = -1; /**< @brief Number of channels of the ground truth images. */ /** @param[in] filename Path to the Dataset file. @param[in] batch_size Size of each dataset mini batch. - @param[in] augs Array with three DatasetAugmentations (training, validation and test) to be applied to the Dataset images (and ground truth if exists) for each split. - If no augmentation is required or the split doesn't exist, nullptr has to be passed. - @param[in] ctype ecvl::ColorType of the Dataset images. - @param[in] ctype_gt ecvl::ColorType of the Dataset ground truth images. + @param[in] augs Array with DatasetAugmentations to be applied to the Dataset images (and ground truth if exists) for each split. If no augmentation is required nullptr has to be passed. + @param[in] ctype ecvl::ColorType of the Dataset images. Default is RGB. + @param[in] ctype_gt ecvl::ColorType of the Dataset ground truth images. Default is GRAY. + @param[in] num_workers Number of parallel threads spawned. + @param[in] queue_ratio_size The producers-consumer queue will have a maximum size equal to \f$batch\_size \times queue\_ratio\_size \times num\_workers\f$. + @param[in] drop_last For each split, whether to drop the last samples that don't fit the batch size or not. The vector dimensions must match the number of splits. @param[in] verify If true, a list of all the images in the Dataset file which don't exist is printed with an ECVL_WARNING_MSG. */ DLDataset(const filesystem::path& filename, const int batch_size, DatasetAugmentations augs = DatasetAugmentations(), - ColorType ctype = ColorType::BGR, + ColorType ctype = ColorType::RGB, ColorType ctype_gt = ColorType::GRAY, + int num_workers = 1, + int queue_ratio_size = 1, + vector drop_last = {}, bool verify = false) : Dataset{ filename, verify }, batch_size_{ batch_size }, augs_(std::move(augs)), + num_workers_{ num_workers }, ctype_{ ctype }, - ctype_gt_{ ctype_gt } + ctype_gt_{ ctype_gt }, + queue_{ batch_size_ * queue_ratio_size * num_workers_ } { // resize current_batch_ to the number of splits and initialize it with 0 current_batch_.resize(split_.size(), 0); @@ -130,54 +382,90 @@ class DLDataset : public Dataset else { cout << ECVL_WARNING_MSG << "Missing splits in the dataset file." << endl; } + + // Set drop_last parameter for each split + if (!drop_last.empty() && vsize(drop_last) == vsize(split_)) { + for (int i = 0; i < vsize(drop_last); ++i) { + split_[i].drop_last_ = drop_last[i]; + } + } + + // Initialize num_batches, last_batch and the ThreadCounters for each split + auto s_index = 0; + splits_tc_ = std::vector>(vsize(split_)); + for (auto& s : split_) { + s.num_batches_ = s.drop_last_ ? vsize(s.samples_indices_) / batch_size_ : (vsize(s.samples_indices_) + batch_size_ - 1) / batch_size_; + s.last_batch_ = vsize(s.samples_indices_) % batch_size_; + + InitTC(s_index); + ++s_index; + } + + switch (task_) { + case Task::classification: + label_ = new LabelClass(); + tensors_shape_ = make_pair, vector>({ batch_size_, n_channels_, resize_dims_[0], resize_dims_[1] }, { batch_size_, vsize(classes_) }); + break; + case Task::segmentation: + label_ = new LabelImage(); + tensors_shape_ = make_pair, vector>({ batch_size_, n_channels_, resize_dims_[0], resize_dims_[1] }, + { batch_size_, n_channels_gt_, resize_dims_[0], resize_dims_[1] }); + break; + } + } + + /* Destructor */ + ~DLDataset() + { + delete label_; } /** @brief Reset the batch counter and optionally shuffle samples indices of the specified split. If no split is provided (i.e. it is provided a value less than 0), the current split is reset. @param[in] split_index index of the split to reset. - @param[in] reshuffle boolean which indicates whether to shuffle the split samples indices or not. + @param[in] shuffle boolean which indicates whether to shuffle the split samples indices or not. */ void ResetBatch(int split_index = -1, bool shuffle = false); /** @brief Reset the batch counter and optionally shuffle samples indices of the specified split. @param[in] split_name name of the split to reset. - @param[in] reshuffle boolean which indicates whether to shuffle the split samples indices or not. + @param[in] shuffle boolean which indicates whether to shuffle the split samples indices or not. */ void ResetBatch(std::string split_name, bool shuffle = false); /** @brief Reset the batch counter and optionally shuffle samples indices of the specified split. @param[in] split_type SplitType of the split to reset. - @param[in] reshuffle boolean which indicates whether to shuffle the split samples indices or not. + @param[in] shuffle boolean which indicates whether to shuffle the split samples indices or not. */ void ResetBatch(SplitType split_type, bool shuffle = false); /** @brief Reset the batch counter of each split and optionally shuffle samples indices (within each split). - @param[in] reshuffle boolean which indicates whether to shuffle the samples indices or not. + @param[in] shuffle boolean which indicates whether to shuffle the samples indices or not. */ void ResetAllBatches(bool shuffle = false); /** @brief Load a batch into _images_ and _labels_ `tensor`. + @param[out] images `tensor` which stores the batch of images. @param[out] labels `tensor` which stores the batch of labels. */ void LoadBatch(Tensor*& images, Tensor*& labels); /** @brief Load a batch into _images_ `tensor`. Useful for tests set when you don't have labels. + @param[out] images `tensor` which stores the batch of images. */ void LoadBatch(Tensor*& images); /** @brief Set a fixed seed for the random generated values. Useful to reproduce experiments with same shuffling during training. + @param[in] seed Value of the seed for the random engine. */ - static void SetSplitSeed(unsigned seed) - { - re_.seed(seed); - } + static void SetSplitSeed(unsigned seed) { re_.seed(seed); } /** @brief Set a new batch size inside the dataset. @@ -185,64 +473,65 @@ class DLDataset : public Dataset @param[in] bs Value to set for the batch size. */ void SetBatchSize(int bs); -}; -/** @brief Convert an EDDL Tensor into an ECVL Image. + /** @brief Load a sample and its label, and push them to the producers-consumer queue. -Tensor dimensions must be \f$C\f$ x \f$H\f$ x \f$W\f$ or \f$N\f$ x \f$C\f$ x \f$H\f$ x \f$W\f$, where: \n -\f$N\f$ = batch size \n -\f$C\f$ = channels \n -\f$H\f$ = height \n -\f$W\f$ = width + @param[in] elem Sample to load and push to the queue. -@param[in] t Input EDDL Tensor. -@param[out] img Output ECVL Image. It is a "xyo" with DataType::float32 and ColorType::none Image. + @anchor ProduceImageLabel + */ + void ProduceImageLabel(Sample& elem); -*/ -void TensorToImage(Tensor*& t, Image& img); + /** @brief Function called when the thread are spawned. -/** @brief Convert an EDDL Tensor into an ECVL View. + @ref ProduceImageLabel is called for each sample under the competence of the thread. -Tensor dimensions must be \f$C\f$ x \f$H\f$ x \f$W\f$ or \f$N\f$ x \f$C\f$ x \f$H\f$ x \f$W\f$, where: \n -\f$N\f$ = batch size \n -\f$C\f$ = channels \n -\f$H\f$ = height \n -\f$W\f$ = width + @param[in] thread_index index of the thread. + */ + void ThreadFunc(int thread_index); -@param[in] t Input EDDL Tensor. -@param[out] v Output ECVL View. It is a "xyo" with ColorType::none View. + /** @brief Pop batch_size samples from the queue and copy them into EDDL tensors. -*/ -void TensorToView(Tensor*& t, View& v); + @return pair of EDDL Tensor, first with the image, second with the label. + */ + pair, unique_ptr> GetBatch(); -/** @brief Convert an ECVL Image into an EDDL Tensor. + /** @brief Spawn num_workers thread. -Image must have 3 dimensions "xy[czo]" (in any order). \n -Output Tensor will be created with shape \f$C\f$ x \f$H\f$ x \f$W\f$. \n + @param[in] split_index Index of the split to use in the GetBatch function. If not specified, current split is used. + */ + void Start(int split_index = -1); -@param[in] img Input ECVL Image. -@param[out] t Output EDDL Tensor. It is created inside the function. + /** @brief Join all the threads. */ + void Stop(); -*/ -void ImageToTensor(const Image& img, Tensor*& t); + /** @brief Get the current size of the producers-consumer queue of the dataset. -/** @brief Insert an ECVL Image into an EDDL Tensor. + @return Size of the producers-consumer queue of the dataset. + */ + auto GetQueueSize() { return queue_.Length(); }; -This function is useful to insert into an EDDL Tensor more than one image, specifying how many images are already stored in the Tensor. -Image must have 3 dimensions "xy[czo]" (in any order). \n + /** @brief Set the current split and if the split doesn't have labels update the dataset tensors_shape_. + @param[in] split_type ecvl::SplitType representing the split to set ("training", "validation", or "test"). + */ + void SetSplit(const SplitType& split_type) override; -@param[in] img Input ECVL Image. -@param[out] t Output EDDL Tensor. It must be created with the right dimensions before calling this function. -@param[in] offset How many images are already stored in the Tensor. + /** @brief Set the current split and if the split doesn't have labels update the dataset tensors_shape_. + @param[in] split_name string representing the split to set. + */ + void SetSplit(const std::string& split_name) override; -*/ -void ImageToTensor(const Image& img, Tensor*& t, const int& offset); + /** @brief Set the current split and if the split doesn't have labels update the dataset tensors_shape_. + @param[in] split_index int representing the index of the split to set. + */ + void SetSplit(const int& split_index) override; +}; /** @brief Make a grid of images from a EDDL Tensor. Return a grid of Image from a EDDL Tensor. -@param[in] img Input EDDL Tensor of shape (B x C x H x W). +@param[in] t Input EDDL Tensor of shape (B x C x H x W). @param[in] cols Number of images displayed in each row of the grid. @param[in] normalize If true, shift the image to the range [0,1]. diff --git a/modules/eddl/src/support_eddl.cpp b/modules/eddl/src/support_eddl.cpp index fc3cad45..1e6c9e74 100644 --- a/modules/eddl/src/support_eddl.cpp +++ b/modules/eddl/src/support_eddl.cpp @@ -25,9 +25,12 @@ using namespace ecvl::filesystem; namespace ecvl { +#define ECVL_ERROR_START_ALREADY_ACTIVE throw std::runtime_error(ECVL_ERROR_MSG "Trying to start the producer threads when they are already running!"); +#define ECVL_ERROR_STOP_ALREADY_END throw std::runtime_error(ECVL_ERROR_MSG "Trying to stop the producer threads when they are already ended!"); +#define ECVL_ERROR_WORKERS_LESS_THAN_ONE throw std::runtime_error(ECVL_ERROR_MSG "Dataset workers must be at least one"); default_random_engine DLDataset::re_(random_device{}()); -void TensorToImage(Tensor*& t, Image& img) +void TensorToImage(const Tensor* t, Image& img) { switch (t->ndim) { case 3: @@ -44,7 +47,7 @@ void TensorToImage(Tensor*& t, Image& img) memcpy(img.data_, t->ptr, img.datasize_); } -void TensorToView(Tensor*& t, View& v) +void TensorToView(const Tensor* t, View& v) { switch (t->ndim) { case 3: @@ -147,21 +150,22 @@ void ImageToTensor(const Image& img, Tensor*& t, const int& offset) void DLDataset::ResetBatch(int split_index, bool shuffle) { - { // CRITICAL REGION STARTS - std::unique_lock lck(mutex_current_batch_); - int index = current_split_ ? split_index < 0 : split_index; - // check if the split exists - try { - this->current_batch_.at(index) = 0; - } - catch (const std::out_of_range) { - ECVL_ERROR_SPLIT_DOES_NOT_EXIST - } + int index = split_index < 0 ? current_split_ : split_index; + // check if the split exists + try { + this->current_batch_.at(index) = 0; + } + catch (const std::out_of_range) { + ECVL_ERROR_SPLIT_DOES_NOT_EXIST + } - if (shuffle) { - std::shuffle(begin(GetSplit(index)), end(GetSplit(index)), re_); - } - } // CRITICAL REGION ENDS + if (shuffle) { + std::shuffle(begin(GetSplit(index)), end(GetSplit(index)), re_); + } + + for (auto& tc : splits_tc_[index]) { + tc.Reset(); + } } void DLDataset::ResetBatch(string split_name, bool shuffle) @@ -178,17 +182,16 @@ void DLDataset::ResetBatch(SplitType split_type, bool shuffle) void DLDataset::ResetAllBatches(bool shuffle) { - { // CRITICAL REGION STARTS - std::unique_lock lck(mutex_current_batch_); + fill(current_batch_.begin(), current_batch_.end(), 0); - fill(current_batch_.begin(), current_batch_.end(), 0); - - if (shuffle) { - for (auto& s : split_) { - std::shuffle(begin(s.samples_indices_), end(s.samples_indices_), re_); + if (shuffle) { + for (int split_index = 0; split_index < vsize(split_); ++split_index) { + std::shuffle(begin(GetSplit(split_index)), end(GetSplit(split_index)), re_); + for (auto& tc : splits_tc_[split_index]) { + tc.Reset(); } } - } // CRITICAL REGION ENDS + } } void DLDataset::LoadBatch(Tensor*& images, Tensor*& labels) @@ -223,12 +226,9 @@ void DLDataset::LoadBatch(Tensor*& images, Tensor*& labels) } // Move to next samples - { // CRITICAL REGION STARTS - std::unique_lock lck(mutex_current_batch_); - start = current_batch_[current_split_] * bs; - ++current_batch_[current_split_]; - } // CRITICAL REGION ENDS + start = current_batch_[current_split_] * bs; + ++current_batch_[current_split_]; if (vsize(GetSplit()) < start + bs) { cerr << ECVL_ERROR_MSG "Batch size is not even with the number of samples. Hint: loop through `num_batches = num_samples / batch_size;`" << endl; @@ -292,12 +292,9 @@ void DLDataset::LoadBatch(Tensor*& images) } // Move to next samples - { // CRITICAL REGION STARTS - std::unique_lock lck(mutex_current_batch_); - start = current_batch_[current_split_] * bs; - ++current_batch_[current_split_]; - } // CRITICAL REGION ENDS + start = current_batch_[current_split_] * bs; + ++current_batch_[current_split_]; if (vsize(GetSplit()) < start + bs) { cerr << ECVL_ERROR_MSG "Batch size is not even with the number of samples. Hint: loop through `num_batches = num_samples / batch_size;`" << endl; @@ -372,4 +369,172 @@ Image MakeGrid(Tensor*& t, int cols, bool normalize) } return image_t; } + +void DLDataset::ProduceImageLabel(Sample& elem) +{ + Image img = elem.LoadImage(ctype_, false); + switch (task_) { + case Task::classification: + { + LabelClass* label = nullptr; + // Read the label + if (!split_[current_split_].no_label_) { + label = new LabelClass(); + label->label = elem.label_.value(); + } + // Apply chain of augmentations only to sample image + augs_.Apply(current_split_, img); + queue_.Push(img, label); + } + break; + case Task::segmentation: + { + LabelImage* label = nullptr; + // Read the ground truth + if (!split_[current_split_].no_label_) { + label = new LabelImage(); + Image gt = elem.LoadImage(ctype_gt_, true); + // Apply chain of augmentations to sample image and corresponding ground truth + augs_.Apply(current_split_, img, gt); + label->gt = gt; + } + else { + augs_.Apply(current_split_, img); + } + queue_.Push(img, label); + } + break; + } +} + +void DLDataset::InitTC(int split_index) +{ + auto& split_indexes = split_[split_index].samples_indices_; + auto& drop_last = split_[split_index].drop_last_; + auto samples_per_queue = vsize(split_indexes) / num_workers_; + auto exceeding_samples = vsize(split_indexes) % num_workers_ * !drop_last; + + // Set which are the indices of the samples managed by each thread + // The i-th thread manage samples from start to end + std::vector split_tc; + for (auto i = 0; i < num_workers_; ++i) { + auto start = samples_per_queue * i; + auto end = start + samples_per_queue; + if (i >= num_workers_ - 1) { + // The last thread takes charge of exceeding samples + end += exceeding_samples; + } + split_tc.push_back(ThreadCounters(start, end)); + } + + splits_tc_[split_index] = split_tc; +} + +void DLDataset::ThreadFunc(int thread_index) +{ + auto& tc_of_current_split = splits_tc_[current_split_]; + while (tc_of_current_split[thread_index].counter_ < tc_of_current_split[thread_index].max_) { + auto sample_index = split_[current_split_].samples_indices_[tc_of_current_split[thread_index].counter_]; + Sample& elem = samples_[sample_index]; + + ProduceImageLabel(elem); + + ++tc_of_current_split[thread_index].counter_; + } +} + +pair, unique_ptr> DLDataset::GetBatch() +{ + ++current_batch_[current_split_]; + auto& s = split_[current_split_]; + auto tensors_shape = tensors_shape_; + + // Reduce batch size for the last batch in the split + if (current_batch_[current_split_] == s.num_batches_) { + tensors_shape.first[0] = s.last_batch_; + if (!s.no_label_) { + tensors_shape.second[0] = s.last_batch_; + } + } + + unique_ptr x = make_unique(tensors_shape.first); + unique_ptr y = make_unique(tensors_shape.second); + + Image img; + for (int i = 0; i < x->shape[0]; ++i) { + queue_.Pop(img, label_); // Consumer get samples from the queue + + if (label_ != nullptr) { // Label nullptr means no label at all for this sample (example: possible for test split) + // Copy label into tensor + label_->ToTensorPlane(y.get(), i); + delete label_; + label_ = nullptr; + } + //Copy sample image into tensor + auto lhs = x.get(); + ImageToTensor(img, lhs, i); + } + + return make_pair(move(x), move(y)); +} + +void DLDataset::Start(int split_index) +{ + if (active_) { + ECVL_ERROR_START_ALREADY_ACTIVE + } + + active_ = true; + + if (split_index != -1 && split_index != current_split_) { + SetSplit(split_index); + } + + producers_.clear(); + + if (num_workers_ > 0) { + for (int i = 0; i < num_workers_; ++i) { + producers_.push_back(std::thread(&DLDataset::ThreadFunc, this, i)); + } + } + else { + ECVL_ERROR_WORKERS_LESS_THAN_ONE + } +} + +void DLDataset::Stop() +{ + if (!active_) { + ECVL_ERROR_STOP_ALREADY_END + } + + active_ = false; + for (int i = 0; i < num_workers_; ++i) { + producers_[i].join(); + } +} + +void DLDataset::SetSplit(const SplitType& split_type) +{ + Dataset::SetSplit(split_type); + if (split_[current_split_].no_label_) { + tensors_shape_.second = {}; + } +} + +void DLDataset::SetSplit(const std::string& split_name) +{ + Dataset::SetSplit(split_name); + if (split_[current_split_].no_label_) { + tensors_shape_.second = {}; + } +} + +void DLDataset::SetSplit(const int& split_index) +{ + Dataset::SetSplit(split_index); + if (split_[current_split_].no_label_) { + tensors_shape_.second = {}; + } +} } // namespace ecvl \ No newline at end of file From 5e4b5076fc5cdd1e0c98fc0b05e0f197334b7d0b Mon Sep 17 00:00:00 2001 From: Michele Cancilla Date: Wed, 19 May 2021 18:48:19 +0200 Subject: [PATCH 12/38] Fix warnings and errors --- modules/eddl/include/ecvl/support_eddl.h | 5 +++-- modules/eddl/src/support_eddl.cpp | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/eddl/include/ecvl/support_eddl.h b/modules/eddl/include/ecvl/support_eddl.h index a1fefb0d..a418800b 100644 --- a/modules/eddl/include/ecvl/support_eddl.h +++ b/modules/eddl/include/ecvl/support_eddl.h @@ -21,6 +21,7 @@ #include +#include #include #include @@ -305,7 +306,6 @@ class DLDataset : public Dataset { protected: int batch_size_; /**< @brief Size of each dataset mini batch. */ - std::vector resize_dims_; /**< @brief Dimensions (HxW) to which Dataset images must be resized. */ std::vector current_batch_; /**< @brief Number of batches already loaded for each split. */ ColorType ctype_; /**< @brief ecvl::ColorType of the Dataset images. */ ColorType ctype_gt_; /**< @brief ecvl::ColorType of the Dataset ground truth images. */ @@ -328,6 +328,7 @@ class DLDataset : public Dataset public: int n_channels_; /**< @brief Number of channels of the images. */ int n_channels_gt_ = -1; /**< @brief Number of channels of the ground truth images. */ + std::vector resize_dims_; /**< @brief Dimensions (HxW) to which Dataset images must be resized. */ /** @param[in] filename Path to the Dataset file. @@ -356,7 +357,7 @@ class DLDataset : public Dataset num_workers_{ num_workers }, ctype_{ ctype }, ctype_gt_{ ctype_gt }, - queue_{ batch_size_ * queue_ratio_size * num_workers_ } + queue_{ static_cast(batch_size_ * queue_ratio_size * num_workers_) } { // resize current_batch_ to the number of splits and initialize it with 0 current_batch_.resize(split_.size(), 0); diff --git a/modules/eddl/src/support_eddl.cpp b/modules/eddl/src/support_eddl.cpp index c0605a81..ed808d97 100644 --- a/modules/eddl/src/support_eddl.cpp +++ b/modules/eddl/src/support_eddl.cpp @@ -141,7 +141,7 @@ void ImageToTensor(const Image& img, Tensor*& t, const int& offset) tot_dims = accumulate(img.dims_.begin(), img.dims_.end(), 1, std::multiplies()); // Check if the current image exceeds the total size of the tensor - if (t->size < tot_dims * (offset + 1)) { + if (t->size < static_cast(tot_dims * (offset + 1))) { cerr << ECVL_ERROR_MSG "Size of the images exceeds those of the tensor" << endl; ECVL_ERROR_INCOMPATIBLE_DIMENSIONS } From 418024bac17ddceb10c8605dea216bff8099c1db Mon Sep 17 00:00:00 2001 From: Michele Cancilla Date: Wed, 19 May 2021 18:51:56 +0200 Subject: [PATCH 13/38] Update Actions workflow files --- .github/workflows/linux.yml | 4 ++-- .github/workflows/macos.yml | 4 ++-- .github/workflows/windows.yml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 08e4d6df..0dade367 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -4,8 +4,8 @@ on: [push, pull_request] env: BUILD_TYPE: "Release" - OPENCV_VERSION: "3.4.13" - EDDL_VERSION: "v0.8.3a" + OPENCV_VERSION: "3.4.14" + EDDL_VERSION: "v0.9.2b" PROC: 2 jobs: diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index f912bf29..2a509013 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -4,8 +4,8 @@ on: [push, pull_request] env: BUILD_TYPE: "Release" - OPENCV_VERSION: "3.4.13" - EDDL_VERSION: "v0.8.3a" + OPENCV_VERSION: "3.4.14" + EDDL_VERSION: "v0.9.2b" PROC: 2 jobs: diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 11b124a5..aa3cfcc3 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -4,8 +4,8 @@ on: [push, pull_request] env: BUILD_TYPE: "Release" - OPENCV_VERSION: "3.4.13" - EDDL_VERSION: "v0.8.3a" + OPENCV_VERSION: "3.4.14" + EDDL_VERSION: "v0.9.2b" PROC: 2 jobs: From 759c58931ff7097021c25515efe85ea9659370ef Mon Sep 17 00:00:00 2001 From: Laura Canalini Date: Fri, 21 May 2021 15:38:03 +0200 Subject: [PATCH 14/38] Update GetSplit, SetSplit and SetBatchSize and add SetAugmentations and GetNumBatches --- modules/core/include/ecvl/CMakeLists.txt | 1 + modules/dataset/include/ecvl/dataset_parser.h | 52 +++++-------- modules/dataset/src/dataset_parser.cpp | 76 +++++++++---------- modules/eddl/include/ecvl/support_eddl.h | 29 ++++--- modules/eddl/src/support_eddl.cpp | 61 +++++++++------ 5 files changed, 114 insertions(+), 105 deletions(-) diff --git a/modules/core/include/ecvl/CMakeLists.txt b/modules/core/include/ecvl/CMakeLists.txt index f513e3c0..9ec39a96 100644 --- a/modules/core/include/ecvl/CMakeLists.txt +++ b/modules/core/include/ecvl/CMakeLists.txt @@ -13,6 +13,7 @@ target_sources(ECVL_CORE PRIVATE core.h + core/any.h core/arithmetic.h core/arithmetic_impl.inc.h core/cpu_hal.h diff --git a/modules/dataset/include/ecvl/dataset_parser.h b/modules/dataset/include/ecvl/dataset_parser.h index f37240d7..35028784 100644 --- a/modules/dataset/include/ecvl/dataset_parser.h +++ b/modules/dataset/include/ecvl/dataset_parser.h @@ -15,10 +15,12 @@ #define ECVL_DATASET_PARSER_H_ #include "ecvl/core.h" +#include "ecvl/core/any.h" #include "ecvl/core/filesystem.h" #include "ecvl/core/optional.h" #include +#include #include #include #include @@ -102,6 +104,16 @@ class Split else if (split_name_ == "validation") split_type_ = SplitType::validation; else if (split_name_ == "test") split_type_ = SplitType::test; } + + void SetNumBatches(int batch_size) + { + num_batches_ = drop_last_ ? vsize(samples_indices_) / batch_size : (vsize(samples_indices_) + batch_size - 1) / batch_size; + } + + void SetLastBatch(int batch_size) + { + last_batch_ = vsize(samples_indices_) % batch_size; + } }; /** @brief DeepHealth Dataset. @@ -112,6 +124,11 @@ This class implements the DeepHealth Dataset Format (https://github.com/deepheal */ class Dataset { + std::map features_map_; + void DecodeImages(const YAML::Node& node, const filesystem::path& root_path, bool verify); + void FindLabel(Sample& sample, const YAML::Node& n); +protected: + std::vector::iterator GetSplitIt(ecvl::any split); public: std::string name_ = "DeepHealth dataset"; /**< @brief Name of the Dataset. */ std::string description_ = "This is the DeepHealth example dataset!"; /**< @brief Description of the Dataset. */ @@ -139,37 +156,15 @@ class Dataset std::vector& GetSplit(); /** @brief Returns the image indexes of the requested split. - @param[in] split_type ecvl::SplitType representing the split to get ("training", "validation", or "test"). + @param[in] split index, name or ecvl::SplitType representing the split to get. @return vector of image indexes of the requested split. */ - std::vector& GetSplit(const SplitType& split_type); - - /** @brief Returns the image indexes of the requested split. - @param[in] split_name string representing the split to get. - @return vector of image indexes of the requested split. - */ - std::vector& GetSplit(const std::string& split_name); - - /** @brief Returns the image indexes of the requested split. - @param[in] split_index int representing the index of the split to get. - @return vector of image indexes of the requested split. - */ - std::vector& GetSplit(const int& split_index); + std::vector& GetSplit(const ecvl::any& split); /** @brief Set the current split. - @param[in] split_type ecvl::SplitType representing the split to set ("training", "validation", or "test"). + @param[in] split index, name or ecvl::SplitType representing the split to set. */ - virtual void SetSplit(const SplitType& split_type); - - /** @brief Set the current split. - @param[in] split_name string representing the split to set. - */ - virtual void SetSplit(const std::string& split_name); - - /** @brief Set the current split. - @param[in] split_index int representing the index of the split to set. - */ - virtual void SetSplit(const int& split_index); + virtual void SetSplit(const ecvl::any& split); /** @brief Dump the Dataset into a YAML file following the DeepHealth Dataset Format. @@ -190,11 +185,6 @@ class Dataset // RegEx which matchs URLs static const std::regex url_regex_; - -private: - std::map features_map_; - void DecodeImages(const YAML::Node& node, const filesystem::path& root_path, bool verify); - void FindLabel(Sample& sample, const YAML::Node& n); }; } // namespace ecvl diff --git a/modules/dataset/src/dataset_parser.cpp b/modules/dataset/src/dataset_parser.cpp index f1462237..d13d8058 100644 --- a/modules/dataset/src/dataset_parser.cpp +++ b/modules/dataset/src/dataset_parser.cpp @@ -291,35 +291,36 @@ Dataset::Dataset(const filesystem::path& filename, bool verify) task_ = classes_.empty() ? Task::segmentation : Task::classification; } -std::vector& Dataset::GetSplit(const SplitType& split_type) +vector::iterator Dataset::GetSplitIt(any split) { - auto it = std::find_if(split_.begin(), split_.end(), [&](const auto& s) { return s.split_type_ == split_type; }); - if (it == this->split_.end()) { - ECVL_ERROR_SPLIT_DOES_NOT_EXIST - } - else { - return it->samples_indices_; + if (split.type() == typeid(int)) { + try { + return split_.begin() + any_cast(split); + } + catch (const out_of_range) { + ECVL_ERROR_SPLIT_DOES_NOT_EXIST + } } -} + auto func = [&](const auto& s) { + if (split.type() == typeid(string)) { + auto tmp = s.split_name_; + return tmp == any_cast(split); + } + else if (split.type() == typeid(SplitType)) { + auto tmp = s.split_type_; + return tmp == any_cast(split); + } + else { + ECVL_ERROR_SPLIT_DOES_NOT_EXIST + } + }; -std::vector& Dataset::GetSplit(const string& split_name) -{ - auto it = std::find_if(split_.begin(), split_.end(), [&](const auto& s) { return s.split_name_ == split_name; }); + auto it = std::find_if(split_.begin(), split_.end(), [&](const auto& s) { return func(s); }); if (it == this->split_.end()) { ECVL_ERROR_SPLIT_DOES_NOT_EXIST } else { - return it->samples_indices_; - } -} - -std::vector& Dataset::GetSplit(const int& split_index) -{ - try { - return split_.at(split_index).samples_indices_; - } - catch (const std::out_of_range) { - ECVL_ERROR_SPLIT_DOES_NOT_EXIST + return it; } } @@ -328,34 +329,25 @@ std::vector& Dataset::GetSplit() return GetSplit(current_split_); } -void Dataset::SetSplit(const SplitType& split_type) +std::vector& Dataset::GetSplit(const any& split) { - // check if the split exists - int index = static_cast(distance(split_.begin(), find_if(split_.begin(), split_.end(), [&](const auto& s) { return s.split_type_ == split_type; }))); - if (index < vsize(split_)) { - this->current_split_ = index; - } - else { - ECVL_ERROR_SPLIT_DOES_NOT_EXIST - } + auto it = GetSplitIt(split); + return it->samples_indices_; } -void Dataset::SetSplit(const string& split_name) +void Dataset::SetSplit(const any& split) { - // check if the split exists - int index = static_cast(distance(split_.begin(), find_if(split_.begin(), split_.end(), [&](const auto& s) { return s.split_name_ == split_name; }))); - if (index < vsize(split_)) { - this->current_split_ = index; + int index; + if (split.type() == typeid(int)) { + index = any_cast(split); } else { - ECVL_ERROR_SPLIT_DOES_NOT_EXIST + index = static_cast(distance(split_.begin(), GetSplitIt(split))); } -} -void Dataset::SetSplit(const int& split_index) -{ - if (0 <= split_index && split_index < vsize(split_)) { - this->current_split_ = split_index; + // check if the split exists + if (0 <= index && index < vsize(split_)) { + this->current_split_ = index; } else { ECVL_ERROR_SPLIT_DOES_NOT_EXIST diff --git a/modules/eddl/include/ecvl/support_eddl.h b/modules/eddl/include/ecvl/support_eddl.h index a418800b..54ade804 100644 --- a/modules/eddl/include/ecvl/support_eddl.h +++ b/modules/eddl/include/ecvl/support_eddl.h @@ -395,8 +395,8 @@ class DLDataset : public Dataset auto s_index = 0; splits_tc_ = std::vector>(vsize(split_)); for (auto& s : split_) { - s.num_batches_ = s.drop_last_ ? vsize(s.samples_indices_) / batch_size_ : (vsize(s.samples_indices_) + batch_size_ - 1) / batch_size_; - s.last_batch_ = vsize(s.samples_indices_) % batch_size_; + s.SetNumBatches(batch_size_); + s.SetLastBatch(batch_size_); InitTC(s_index); ++s_index; @@ -513,19 +513,28 @@ class DLDataset : public Dataset auto GetQueueSize() { return queue_.Length(); }; /** @brief Set the current split and if the split doesn't have labels update the dataset tensors_shape_. - @param[in] split_type ecvl::SplitType representing the split to set ("training", "validation", or "test"). + + @param[in] split index, name or ecvl::SplitType representing the split to set. */ - void SetSplit(const SplitType& split_type) override; + void SetSplit(const ecvl::any& split) override; - /** @brief Set the current split and if the split doesn't have labels update the dataset tensors_shape_. - @param[in] split_name string representing the split to set. + /** @brief Set the dataset augmentations. + + @param[in] da @ref DatasetAugmentations to set. */ - void SetSplit(const std::string& split_name) override; + void SetAugmentations(const DatasetAugmentations& da); - /** @brief Set the current split and if the split doesn't have labels update the dataset tensors_shape_. - @param[in] split_index int representing the index of the split to set. + /** @brief Get the number of batches of the specified split. + + @param[in] split index, name or ecvl::SplitType representing the split from which to get the number of batches. + @return number of batches of the specified split. + */ + int GetNumBatches(const ecvl::any& split); + + /** @brief Get the number of batches of the current split. + @return number of batches of the current split. */ - void SetSplit(const int& split_index) override; + int GetNumBatches(); }; /** @brief Make a grid of images from a EDDL Tensor. diff --git a/modules/eddl/src/support_eddl.cpp b/modules/eddl/src/support_eddl.cpp index ed808d97..72296dfd 100644 --- a/modules/eddl/src/support_eddl.cpp +++ b/modules/eddl/src/support_eddl.cpp @@ -319,17 +319,6 @@ void DLDataset::LoadBatch(Tensor*& images) } } -void DLDataset::SetBatchSize(int bs) -{ - // check if the provided batch size is negative or greater than the current split size - if (bs > 0 && bs < vsize(split_[current_split_].samples_indices_)) { - batch_size_ = bs; - } - else { - ECVL_ERROR_WRONG_PARAMS("bs in SetBatchSize") - } -} - Image MakeGrid(Tensor*& t, int cols, bool normalize) { const auto batch_size = t->shape[0]; @@ -515,27 +504,55 @@ void DLDataset::Stop() } } -void DLDataset::SetSplit(const SplitType& split_type) +int DLDataset::GetNumBatches() { - Dataset::SetSplit(split_type); - if (split_[current_split_].no_label_) { - tensors_shape_.second = {}; - } + return GetNumBatches(current_split_); +} + +int DLDataset::GetNumBatches(const any& split) +{ + auto it = GetSplitIt(split); + return it->num_batches_; } -void DLDataset::SetSplit(const std::string& split_name) +void DLDataset::SetSplit(const any& split) { - Dataset::SetSplit(split_name); + Dataset::SetSplit(split); if (split_[current_split_].no_label_) { tensors_shape_.second = {}; } } -void DLDataset::SetSplit(const int& split_index) +void DLDataset::SetAugmentations(const DatasetAugmentations& da) { - Dataset::SetSplit(split_index); - if (split_[current_split_].no_label_) { - tensors_shape_.second = {}; + augs_ = da; + + // Initialize resize_dims_ after that augmentations on the first image are performed + Image tmp = samples_[0].LoadImage(ctype_); + augs_.Apply(current_split_, tmp); + auto y = tmp.channels_.find('y'); + auto x = tmp.channels_.find('x'); + assert(y != std::string::npos && x != std::string::npos); + resize_dims_[0] = tmp.dims_[y]; + resize_dims_[1] = tmp.dims_[x]; +} + +void DLDataset::SetBatchSize(int bs) +{ + // check if the provided batch size is negative or greater than the current split size + if (bs > 0 && bs < vsize(split_[current_split_].samples_indices_)) { + int new_queue_size = static_cast(queue_.Length() / batch_size_ * bs); + batch_size_ = bs; + tensors_shape_.first[0] = batch_size_; + tensors_shape_.second[0] = batch_size_; + queue_.SetSize(new_queue_size); + for (auto& s : split_) { + s.SetNumBatches(batch_size_); + s.SetLastBatch(batch_size_); + } + } + else { + ECVL_ERROR_WRONG_PARAMS("bs in SetBatchSize") } } } // namespace ecvl \ No newline at end of file From 3db68902055fd22637c37504cae7296bced5d849 Mon Sep 17 00:00:00 2001 From: Laura Canalini Date: Fri, 21 May 2021 15:41:08 +0200 Subject: [PATCH 15/38] Add missing any.h --- modules/core/include/ecvl/core/any.h | 81 ++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 modules/core/include/ecvl/core/any.h diff --git a/modules/core/include/ecvl/core/any.h b/modules/core/include/ecvl/core/any.h new file mode 100644 index 00000000..62853fa3 --- /dev/null +++ b/modules/core/include/ecvl/core/any.h @@ -0,0 +1,81 @@ +/* +* ECVL - European Computer Vision Library +* Version: 0.3.4 +* copyright (c) 2021, Università degli Studi di Modena e Reggio Emilia (UNIMORE), AImageLab +* Authors: +* Costantino Grana (costantino.grana@unimore.it) +* Federico Bolelli (federico.bolelli@unimore.it) +* Michele Cancilla (michele.cancilla@unimore.it) +* Laura Canalini (laura.canalini@unimore.it) +* Stefano Allegretti (stefano.allegretti@unimore.it) +* All rights reserved. +*/ + +// We haven't checked which any to include yet +#ifndef INCLUDE_STD_ANY_EXPERIMENTAL + +// Check for feature test macro for +# if defined(__cpp_lib_any) +# define INCLUDE_STD_ANY_EXPERIMENTAL 0 + +// Check for feature test macro for +# elif defined(__cpp_lib_experimental_any) +# define INCLUDE_STD_ANY_EXPERIMENTAL 1 + +// We can't check if headers exist... +// Let's assume experimental to be safe +# elif !defined(__has_include) +# define INCLUDE_STD_ANY_EXPERIMENTAL 1 + +// Check if the header "" exists +# elif __has_include() + +// If we're compiling on Visual Studio and are not compiling with C++17, we need to use experimental +# ifdef _MSC_VER + +// Check and include header that defines "_HAS_CXX17" +# if __has_include() +# include + +// Check for enabled C++17 support +# if defined(_HAS_CXX17) && _HAS_CXX17 +// We're using C++17, so let's use the normal version +# define INCLUDE_STD_ANY_EXPERIMENTAL 0 +# endif +# endif + +// If the macro isn't defined yet, that means any of the other VS specific checks failed, so we need to use experimental +# ifndef INCLUDE_STD_ANY_EXPERIMENTAL +# define INCLUDE_STD_ANY_EXPERIMENTAL 1 +# endif + +// Not on Visual Studio. Let's use the normal version +# else // #ifdef _MSC_VER +# define INCLUDE_STD_ANY_EXPERIMENTAL 0 +# endif + +// Check if the header "" exists +# elif __has_include() +# define INCLUDE_STD_ANY_EXPERIMENTAL 1 + +// Fail if neither header is available with a nice error message +# else +# error Could not find system header "" or "" +# endif + +// We previously determined that we need the experimental version +# if INCLUDE_STD_ANY_EXPERIMENTAL +# include +namespace ecvl +{ +using any = std::experimental::any; +} +# else +# include +namespace ecvl +{ +using any = std::any; +} +# endif + +#endif // #ifndef INCLUDE_STD_ANY_EXPERIMENTAL \ No newline at end of file From f1113d11e2204e8ff9567c5595128aca69d6eeeb Mon Sep 17 00:00:00 2001 From: Laura Canalini Date: Fri, 21 May 2021 16:47:29 +0200 Subject: [PATCH 16/38] Fix any.h --- modules/core/include/ecvl/core/any.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/modules/core/include/ecvl/core/any.h b/modules/core/include/ecvl/core/any.h index 62853fa3..6d00771a 100644 --- a/modules/core/include/ecvl/core/any.h +++ b/modules/core/include/ecvl/core/any.h @@ -69,12 +69,24 @@ namespace ecvl { using any = std::experimental::any; + +template +auto any_cast(const T& t) +{ + return std::experimental::any_cast(t); +} } # else # include namespace ecvl { using any = std::any; + +template +auto any_cast(const T& t) +{ + return std::any_cast(t); +} } # endif From 26e72a07f21a94c9f5c5eca5b1b8a45ce2196b85 Mon Sep 17 00:00:00 2001 From: Laura Canalini Date: Mon, 24 May 2021 14:32:23 +0200 Subject: [PATCH 17/38] Update GetSplit, SetSplit, ResetBatch and GetNumBatches --- modules/dataset/include/ecvl/dataset_parser.h | 12 +++--- modules/dataset/src/dataset_parser.cpp | 42 +++++++++---------- modules/eddl/include/ecvl/support_eddl.h | 28 +++---------- modules/eddl/src/support_eddl.cpp | 29 ++----------- 4 files changed, 34 insertions(+), 77 deletions(-) diff --git a/modules/dataset/include/ecvl/dataset_parser.h b/modules/dataset/include/ecvl/dataset_parser.h index 35028784..4432e7ca 100644 --- a/modules/dataset/include/ecvl/dataset_parser.h +++ b/modules/dataset/include/ecvl/dataset_parser.h @@ -73,7 +73,7 @@ class Sample @return Image containing the loaded sample. */ - ecvl::Image LoadImage(ecvl::ColorType ctype = ecvl::ColorType::BGR, const bool& is_gt = false); + ecvl::Image LoadImage(ecvl::ColorType ctype = ecvl::ColorType::RGB, const bool& is_gt = false); }; /** @brief Split of a dataset. @@ -129,6 +129,7 @@ class Dataset void FindLabel(Sample& sample, const YAML::Node& n); protected: std::vector::iterator GetSplitIt(ecvl::any split); + int GetSplitIndex(ecvl::any split); public: std::string name_ = "DeepHealth dataset"; /**< @brief Name of the Dataset. */ std::string description_ = "This is the DeepHealth example dataset!"; /**< @brief Description of the Dataset. */ @@ -150,16 +151,13 @@ class Dataset /* Destructor */ virtual ~Dataset() {} - /** @brief Returns the image indexes of the current split. - @return vector of image indexes of the split in use. - */ - std::vector& GetSplit(); - /** @brief Returns the image indexes of the requested split. + + If no split is provided or an illegal value is provided, the current split is returned. @param[in] split index, name or ecvl::SplitType representing the split to get. @return vector of image indexes of the requested split. */ - std::vector& GetSplit(const ecvl::any& split); + std::vector& GetSplit(const ecvl::any& split = -1); /** @brief Set the current split. @param[in] split index, name or ecvl::SplitType representing the split to set. diff --git a/modules/dataset/src/dataset_parser.cpp b/modules/dataset/src/dataset_parser.cpp index d13d8058..03a86367 100644 --- a/modules/dataset/src/dataset_parser.cpp +++ b/modules/dataset/src/dataset_parser.cpp @@ -291,11 +291,25 @@ Dataset::Dataset(const filesystem::path& filename, bool verify) task_ = classes_.empty() ? Task::segmentation : Task::classification; } +int Dataset::GetSplitIndex(any split) +{ + if (split.type() == typeid(int)) { + auto s = any_cast(split); + int index = s < 0 || s >= split_.size() ? current_split_ : s; + return index; + } + else { + return static_cast(distance(split_.begin(), GetSplitIt(split))); + } +} + vector::iterator Dataset::GetSplitIt(any split) { if (split.type() == typeid(int)) { try { - return split_.begin() + any_cast(split); + auto s = any_cast(split); + int index = s < 0 || s >= split_.size() ? current_split_ : s; + return split_.begin() + index; } catch (const out_of_range) { ECVL_ERROR_SPLIT_DOES_NOT_EXIST @@ -306,6 +320,10 @@ vector::iterator Dataset::GetSplitIt(any split) auto tmp = s.split_name_; return tmp == any_cast(split); } + else if (split.type() == typeid(const char*)) { + auto tmp = s.split_name_; + return tmp == any_cast(split); + } else if (split.type() == typeid(SplitType)) { auto tmp = s.split_type_; return tmp == any_cast(split); @@ -324,11 +342,6 @@ vector::iterator Dataset::GetSplitIt(any split) } } -std::vector& Dataset::GetSplit() -{ - return GetSplit(current_split_); -} - std::vector& Dataset::GetSplit(const any& split) { auto it = GetSplitIt(split); @@ -337,21 +350,8 @@ std::vector& Dataset::GetSplit(const any& split) void Dataset::SetSplit(const any& split) { - int index; - if (split.type() == typeid(int)) { - index = any_cast(split); - } - else { - index = static_cast(distance(split_.begin(), GetSplitIt(split))); - } - - // check if the split exists - if (0 <= index && index < vsize(split_)) { - this->current_split_ = index; - } - else { - ECVL_ERROR_SPLIT_DOES_NOT_EXIST - } + int index = GetSplitIndex(split); + this->current_split_ = index; } vector> Dataset::GetLocations() diff --git a/modules/eddl/include/ecvl/support_eddl.h b/modules/eddl/include/ecvl/support_eddl.h index 54ade804..e4c9ef2e 100644 --- a/modules/eddl/include/ecvl/support_eddl.h +++ b/modules/eddl/include/ecvl/support_eddl.h @@ -423,25 +423,11 @@ class DLDataset : public Dataset /** @brief Reset the batch counter and optionally shuffle samples indices of the specified split. - If no split is provided (i.e. it is provided a value less than 0), the current split is reset. - @param[in] split_index index of the split to reset. + If no split is provided or an illegal value is provided, the current split is reset. + @param[in] split_index index, name or SplitType of the split to reset. @param[in] shuffle boolean which indicates whether to shuffle the split samples indices or not. */ - void ResetBatch(int split_index = -1, bool shuffle = false); - - /** @brief Reset the batch counter and optionally shuffle samples indices of the specified split. - - @param[in] split_name name of the split to reset. - @param[in] shuffle boolean which indicates whether to shuffle the split samples indices or not. - */ - void ResetBatch(std::string split_name, bool shuffle = false); - - /** @brief Reset the batch counter and optionally shuffle samples indices of the specified split. - - @param[in] split_type SplitType of the split to reset. - @param[in] shuffle boolean which indicates whether to shuffle the split samples indices or not. - */ - void ResetBatch(SplitType split_type, bool shuffle = false); + void ResetBatch(const ecvl::any& split = -1, bool shuffle = false); /** @brief Reset the batch counter of each split and optionally shuffle samples indices (within each split). @@ -526,15 +512,11 @@ class DLDataset : public Dataset /** @brief Get the number of batches of the specified split. + If no split is provided or an illegal value is provided, the number of batches of the current split is returned. @param[in] split index, name or ecvl::SplitType representing the split from which to get the number of batches. @return number of batches of the specified split. */ - int GetNumBatches(const ecvl::any& split); - - /** @brief Get the number of batches of the current split. - @return number of batches of the current split. - */ - int GetNumBatches(); + int GetNumBatches(const ecvl::any& split = -1); }; /** @brief Make a grid of images from a EDDL Tensor. diff --git a/modules/eddl/src/support_eddl.cpp b/modules/eddl/src/support_eddl.cpp index 72296dfd..02c7fa3b 100644 --- a/modules/eddl/src/support_eddl.cpp +++ b/modules/eddl/src/support_eddl.cpp @@ -149,16 +149,10 @@ void ImageToTensor(const Image& img, Tensor*& t, const int& offset) memcpy(t->ptr + tot_dims * offset, tmp.data_, tot_dims * sizeof(float)); } -void DLDataset::ResetBatch(int split_index, bool shuffle) +void DLDataset::ResetBatch(const any& split, bool shuffle) { - int index = split_index < 0 ? current_split_ : split_index; - // check if the split exists - try { - this->current_batch_.at(index) = 0; - } - catch (const std::out_of_range) { - ECVL_ERROR_SPLIT_DOES_NOT_EXIST - } + int index = GetSplitIndex(split); + this->current_batch_.at(index) = 0; if (shuffle) { std::shuffle(begin(GetSplit(index)), end(GetSplit(index)), re_); @@ -169,18 +163,6 @@ void DLDataset::ResetBatch(int split_index, bool shuffle) } } -void DLDataset::ResetBatch(string split_name, bool shuffle) -{ - int index = static_cast(distance(split_.begin(), find_if(split_.begin(), split_.end(), [&](const auto& s) { return s.split_name_ == split_name; }))); - ResetBatch(index, shuffle); -} - -void DLDataset::ResetBatch(SplitType split_type, bool shuffle) -{ - int index = static_cast(distance(split_.begin(), find_if(split_.begin(), split_.end(), [&](const auto& s) { return s.split_type_ == split_type; }))); - ResetBatch(index, shuffle); -} - void DLDataset::ResetAllBatches(bool shuffle) { fill(current_batch_.begin(), current_batch_.end(), 0); @@ -504,11 +486,6 @@ void DLDataset::Stop() } } -int DLDataset::GetNumBatches() -{ - return GetNumBatches(current_split_); -} - int DLDataset::GetNumBatches(const any& split) { auto it = GetSplitIt(split); From 855409fea8b37872109bea4a6d348853095e4ace Mon Sep 17 00:00:00 2001 From: Michele Cancilla Date: Wed, 26 May 2021 15:15:02 +0200 Subject: [PATCH 18/38] Add missing augmentation tests --- modules/eddl/test/test_eddl.cpp | 137 ++++++++++++++++++++++---------- 1 file changed, 96 insertions(+), 41 deletions(-) diff --git a/modules/eddl/test/test_eddl.cpp b/modules/eddl/test/test_eddl.cpp index 20b9f341..aba2c217 100644 --- a/modules/eddl/test/test_eddl.cpp +++ b/modules/eddl/test/test_eddl.cpp @@ -1,4 +1,4 @@ - /* +/* * ECVL - European Computer Vision Library * Version: 0.3.4 * copyright (c) 2021, Università degli Studi di Modena e Reggio Emilia (UNIMORE), AImageLab @@ -19,104 +19,159 @@ #include "ecvl/augmentations.h" using namespace ecvl; +using std::stringstream; +using std::unique_ptr; +using std::runtime_error; TEST(Augmentations, ConstructFromStreamAllParamsOk) { Image img({ 5, 5, 1 }, DataType::uint8, "xyc", ColorType::GRAY); - std::unique_ptr p; - std::stringstream ss("angle=[-5,5] center=(0,0) scale=0.5 interp=\"linear\""); + unique_ptr p; + stringstream ss("angle=[-5,5] center=(0,0) scale=0.5 interp=\"linear\""); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); - ss = std::stringstream("dims=(100,100) interp=\"linear\""); + ss = stringstream("dims=(100,100) interp=\"linear\""); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); - ss = std::stringstream("scale=(1.,2.) interp=\"linear\""); + ss = stringstream("scale=(1.,2.) interp=\"linear\""); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); - ss = std::stringstream("p=0.3"); + ss = stringstream("p=0.3"); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); - ss = std::stringstream("p=0.3"); + ss = stringstream("p=0.3"); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); - ss = std::stringstream("sigma=[1.,2.]"); + ss = stringstream("sigma=[1.,2.]"); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); - ss = std::stringstream("std_dev=[1.,2.]"); + ss = stringstream("std_dev=[1.,2.]"); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); - ss = std::stringstream("lambda=[1.,2.]"); + ss = stringstream("lambda=[1.,2.]"); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); - ss = std::stringstream("gamma=[1.,2.]"); + ss = stringstream("gamma=[1.,2.]"); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); - ss = std::stringstream("p=[0,0.55] drop_size=[0.02,0.1] per_channel=0"); + ss = stringstream("p=[0,0.55] drop_size=[0.02,0.1] per_channel=0"); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); - ss = std::stringstream("p=0.4"); + ss = stringstream("p=0.4"); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); - ss = std::stringstream("beta=[30,60]"); + ss = stringstream("beta=[30,60]"); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); - ss = std::stringstream("num_steps=[5,10] distort_limit=[-0.2,0.2] interp=\"linear\" border_type=\"reflect_101\" border_value=0"); + ss = stringstream("num_steps=[5,10] distort_limit=[-0.2,0.2] interp=\"linear\" border_type=\"reflect_101\" border_value=0"); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); - ss = std::stringstream("alpha=[34,60] sigma=[4,6] interp=\"linear\" border_type=\"reflect_101\" border_value=0"); + ss = stringstream("alpha=[34,60] sigma=[4,6] interp=\"linear\" border_type=\"reflect_101\" border_value=0"); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); + ss = stringstream("distort_limit=[5,10] shift_limit=[4,6] interp=\"linear\" border_type=\"reflect_101\" border_value=0"); + EXPECT_NO_THROW(p = make_unique(ss)); + EXPECT_NO_THROW(p->Apply(img)); + ss = stringstream("p=[0,0.55] per_channel=0"); + EXPECT_NO_THROW(p = make_unique(ss)); + EXPECT_NO_THROW(p->Apply(img)); + ss = stringstream("p=[0,0.55] per_channel=0"); + EXPECT_NO_THROW(p = make_unique(ss)); + EXPECT_NO_THROW(p->Apply(img)); + ss = stringstream("p=[0,0.55] per_channel=0"); + EXPECT_NO_THROW(p = make_unique(ss)); + EXPECT_NO_THROW(p->Apply(img)); + ss = stringstream("mean=100 std=1"); + EXPECT_NO_THROW(p = make_unique(ss)); + EXPECT_NO_THROW(p->Apply(img)); + ss = stringstream("size=(100,100)"); + EXPECT_NO_THROW(p = make_unique(ss)); + EXPECT_NO_THROW(p->Apply(img)); + ss = stringstream("divisor=255 divisor_gt=255"); + EXPECT_NO_THROW(p = make_unique(ss)); + EXPECT_NO_THROW(p->Apply(img)); + EXPECT_NO_THROW(p = make_unique(stringstream(""))); + EXPECT_NO_THROW(p->Apply(img)); + ss = stringstream("new_min=0 new_max=1"); + EXPECT_NO_THROW(p = make_unique(ss)); + EXPECT_NO_THROW(p->Apply(img)); } TEST(Augmentations, ConstructFromStreamWithoutOptionalParms) { Image img({ 5, 5, 1 }, DataType::uint8, "xyc", ColorType::GRAY); - std::unique_ptr p; - std::stringstream ss("angle=[-5,5]"); + unique_ptr p; + stringstream ss("angle=[-5,5]"); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); - ss = std::stringstream("dims=(100,100)"); + ss = stringstream("dims=(100,100)"); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); - ss = std::stringstream("scale=(1.,2.)"); + ss = stringstream("scale=(1.,2.)"); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); - ss = std::stringstream(""); + ss = stringstream(""); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); - ss = std::stringstream(""); + ss = stringstream(""); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); - ss = std::stringstream(""); + ss = stringstream(""); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); - ss = std::stringstream("beta=[30,60]"); + ss = stringstream("beta=[30,60]"); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); - ss = std::stringstream("num_steps=[5,10] distort_limit=[-0.2,0.2]"); + ss = stringstream("num_steps=[5,10] distort_limit=[-0.2,0.2]"); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); - ss = std::stringstream("alpha=[34,60] sigma=[4,6]"); + ss = stringstream("alpha=[34,60] sigma=[4,6]"); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); + ss = stringstream("distort_limit=[5,10] shift_limit=[4,6]"); + EXPECT_NO_THROW(p = make_unique(ss)); + EXPECT_NO_THROW(p->Apply(img)); + ss = stringstream("p=[0,0.55] per_channel=0"); + EXPECT_NO_THROW(p = make_unique(ss)); + EXPECT_NO_THROW(p->Apply(img)); + ss = stringstream("p=[0,0.55] per_channel=0"); + EXPECT_NO_THROW(p = make_unique(ss)); + EXPECT_NO_THROW(p->Apply(img)); + ss = stringstream("p=[0,0.55] per_channel=0"); + EXPECT_NO_THROW(p = make_unique(ss)); + EXPECT_NO_THROW(p->Apply(img)); + ss = stringstream("mean=100 std=1"); + EXPECT_NO_THROW(p = make_unique(ss)); + EXPECT_NO_THROW(p->Apply(img)); + ss = stringstream("size=(100,100)"); + EXPECT_NO_THROW(p = make_unique(ss)); + EXPECT_NO_THROW(p->Apply(img)); + ss = stringstream("divisor=255"); + EXPECT_NO_THROW(p = make_unique(ss)); + EXPECT_NO_THROW(p->Apply(img)); + EXPECT_NO_THROW(p = make_unique(stringstream(""))); + EXPECT_NO_THROW(p->Apply(img)); + ss = stringstream("new_min=0 new_max=1"); + EXPECT_NO_THROW(p = make_unique(ss)); + EXPECT_NO_THROW(p->Apply(img)); } TEST(Augmentations, ConstructFromStreamWithWrongParms) { Image img({ 5, 5, 1 }, DataType::uint8, "xyc", ColorType::GRAY); - std::unique_ptr p; - std::stringstream ss("angle=(-5,5)"); - EXPECT_THROW(p = make_unique(ss), std::runtime_error); - ss = std::stringstream("dims=100"); - EXPECT_THROW(p = make_unique(ss), std::runtime_error); - ss = std::stringstream(""); - EXPECT_THROW(p = make_unique(ss), std::runtime_error); - ss = std::stringstream("p=\"test\""); - EXPECT_THROW(p = make_unique(ss), std::runtime_error); - ss = std::stringstream(""); - EXPECT_THROW(p = make_unique(ss), std::runtime_error); - ss = std::stringstream("num_steps=[5,10] distort_limit=(-0.2,0.2)"); - EXPECT_THROW(p = make_unique(ss), std::runtime_error); - ss = std::stringstream("alpha=34"); - EXPECT_THROW(p = make_unique(ss), std::runtime_error); + unique_ptr p; + stringstream ss("angle=(-5,5)"); + EXPECT_THROW(p = make_unique(ss), runtime_error); + ss = stringstream("dims=100"); + EXPECT_THROW(p = make_unique(ss), runtime_error); + ss = stringstream(""); + EXPECT_THROW(p = make_unique(ss), runtime_error); + ss = stringstream("p=\"test\""); + EXPECT_THROW(p = make_unique(ss), runtime_error); + ss = stringstream(""); + EXPECT_THROW(p = make_unique(ss), runtime_error); + ss = stringstream("num_steps=[5,10] distort_limit=(-0.2,0.2)"); + EXPECT_THROW(p = make_unique(ss), runtime_error); + ss = stringstream("alpha=34"); + EXPECT_THROW(p = make_unique(ss), runtime_error); } \ No newline at end of file From 58a0bca84e2e022ba9ca41919886a821d88e2feb Mon Sep 17 00:00:00 2001 From: Laura Canalini Date: Wed, 26 May 2021 16:24:16 +0200 Subject: [PATCH 19/38] Add example_pipeline and fix bugs --- examples/CMakeLists.txt | 3 + examples/example_pipeline.cpp | 121 ++++++++++++++++++ modules/dataset/include/ecvl/dataset_parser.h | 7 +- modules/eddl/include/ecvl/support_eddl.h | 18 ++- modules/eddl/src/support_eddl.cpp | 4 + modules/eddl/test/test_eddl.cpp | 6 +- 6 files changed, 152 insertions(+), 7 deletions(-) create mode 100644 examples/example_pipeline.cpp diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 51b5a876..14a49dec 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -38,8 +38,11 @@ if(ECVL_DATASET) endif() if(ECVL_BUILD_EDDL AND eddl_FOUND) add_executable(example_ecvl_eddl "examples/example_ecvl_eddl.cpp") + add_executable(example_pipeline "examples/example_pipeline.cpp") set_target_properties(example_ecvl_eddl PROPERTIES FOLDER "Examples") target_link_libraries(example_ecvl_eddl ${ECVL_MODULES}) + set_target_properties(example_pipeline PROPERTIES FOLDER "Examples") + target_link_libraries(example_pipeline ${ECVL_MODULES}) endif() if(ECVL_BUILD_GUI AND wxWidgets_FOUND) add_executable(example_ecvl_gui "examples/example_ecvl_gui.cpp") diff --git a/examples/example_pipeline.cpp b/examples/example_pipeline.cpp new file mode 100644 index 00000000..3ad8fe47 --- /dev/null +++ b/examples/example_pipeline.cpp @@ -0,0 +1,121 @@ +/* +* ECVL - European Computer Vision Library +* Version: 0.3.4 +* copyright (c) 2021, Università degli Studi di Modena e Reggio Emilia (UNIMORE), AImageLab +* Authors: +* Costantino Grana (costantino.grana@unimore.it) +* Federico Bolelli (federico.bolelli@unimore.it) +* Michele Cancilla (michele.cancilla@unimore.it) +* Laura Canalini (laura.canalini@unimore.it) +* Stefano Allegretti (stefano.allegretti@unimore.it) +* All rights reserved. +*/ + +#include +#include +#include +#include + +#include "ecvl/augmentations.h" +#include "ecvl/core.h" +#include "ecvl/support_eddl.h" +#include "ecvl/core/filesystem.h" + +using namespace ecvl; +using namespace ecvl::filesystem; +using namespace eddl; +using namespace std; + +int main() +{ + // Create the augmentations to be applied to the dataset images during training and test. + auto training_augs = make_shared( + AugRotate({ -5, 5 }), + AugAdditiveLaplaceNoise({ 0, 0.2 * 255 }), + AugCoarseDropout({ 0, 0.55 }, { 0.02,0.1 }, 0), + AugAdditivePoissonNoise({ 0, 40 }), + AugToFloat32(255) + ); + + auto test_augs = make_shared(AugToFloat32(255)); + + // Replace the random seed with a fixed one to have reproducible experiments + AugmentationParam::SetSeed(0); + + DatasetAugmentations dataset_augmentations{ { training_augs, test_augs } }; + + int epochs = 5; + int batch_size = 200; + int num_workers = 4; + int queue_ratio = 5; + cout << "Creating a DLDataset" << endl; + + // Initialize the DLDataset + DLDataset d("../examples/data/mnist/mnist.yml", batch_size, dataset_augmentations, ColorType::GRAY, ColorType::none, num_workers, queue_ratio, { true, false }); + //DLDataset d("D:/Data/isic_skin_lesion/isic_skin_lesion/isic_classification.yml", batch_size, dataset_augmentations, ColorType::RGB, ColorType::none, num_workers, queue_ratio); + + ofstream of; + cv::TickMeter tm; + cv::TickMeter tm_epoch; + auto num_batches_training = d.GetNumBatches(SplitType::training); + auto num_batches_test = d.GetNumBatches(SplitType::test); + + pair, unique_ptr> samples_and_labels; + + for (int i = 0; i < epochs; ++i) { + tm_epoch.reset(); + tm_epoch.start(); + + cout << "Starting training" << endl; + d.SetSplit(SplitType::training); + + // Reset current split with shuffling + d.ResetBatch(d.current_split_, true); + + // Spawn num_workers threads + d.Start(); + for (int j = 0; j < num_batches_training; ++j) { + tm.reset(); + tm.start(); + cout << "Epoch " << i << "/" << epochs - 1 << " (batch " << j << "/" << num_batches_training - 1 << ") - "; + cout << "|fifo| " << d.GetQueueSize() << " - "; + + samples_and_labels = d.GetBatch(); + + // Sleep in order to simulate EDDL train_batch + cout << "sleeping..."; + this_thread::sleep_for(chrono::milliseconds(500)); + + tm.stop(); + cout << "Elapsed time: " << tm.getTimeMilli() << endl; + } + d.Stop(); + + cout << "Starting test" << endl; + d.SetSplit(SplitType::test); + + // Reset current split without shuffling + d.ResetBatch(d.current_split_, false); + + d.Start(); + for (int j = 0; j < num_batches_test; ++j) { + tm.reset(); + tm.start(); + cout << "Test: Epoch " << i << "/" << epochs - 1 << " (batch " << j << "/" << num_batches_test - 1 << ") - "; + cout << "|fifo| " << d.GetQueueSize() << " - "; + + samples_and_labels = d.GetBatch(); + + // Sleep in order to simulate EDDL evaluate_batch + cout << "sleeping... - "; + this_thread::sleep_for(chrono::milliseconds(500)); + tm.stop(); + cout << "Elapsed time: " << tm.getTimeMilli() << endl; + } + d.Stop(); + tm_epoch.stop(); + cout << "Epoch elapsed time: " << tm_epoch.getTimeSec() << endl; + } + + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/modules/dataset/include/ecvl/dataset_parser.h b/modules/dataset/include/ecvl/dataset_parser.h index 4432e7ca..b1c9c361 100644 --- a/modules/dataset/include/ecvl/dataset_parser.h +++ b/modules/dataset/include/ecvl/dataset_parser.h @@ -112,7 +112,10 @@ class Split void SetLastBatch(int batch_size) { - last_batch_ = vsize(samples_indices_) % batch_size; + // last batch is the remainder of the number of samples of the split divided by the batch size. + // if drop last is true or the remainder is 0, last batch is equal to the batch size. + auto value = vsize(samples_indices_) % batch_size; + last_batch_ = drop_last_ ? batch_size : (value == 0 ? batch_size : value); } }; @@ -152,7 +155,7 @@ class Dataset virtual ~Dataset() {} /** @brief Returns the image indexes of the requested split. - + If no split is provided or an illegal value is provided, the current split is returned. @param[in] split index, name or ecvl::SplitType representing the split to get. @return vector of image indexes of the requested split. diff --git a/modules/eddl/include/ecvl/support_eddl.h b/modules/eddl/include/ecvl/support_eddl.h index e4c9ef2e..66802726 100644 --- a/modules/eddl/include/ecvl/support_eddl.h +++ b/modules/eddl/include/ecvl/support_eddl.h @@ -115,6 +115,11 @@ class DatasetAugmentations { return Apply(+st, img, gt); // Magic + operator } + + bool IsEmpty() const + { + return augs_.empty(); + } }; /** @brief Label class representing the Sample labels, which may have different representations depending on the task. @@ -369,7 +374,14 @@ class DLDataset : public Dataset if (!split_.empty()) { current_split_ = 0; // Initialize resize_dims_ after that augmentations on the first image are performed - augs_.Apply(current_split_, tmp); + if (augs_.IsEmpty()) { + cout << ECVL_WARNING_MSG << "Augmentations are empty!" << endl; + } + else { + while (!augs_.Apply(current_split_, tmp)) { + ++current_split_; + } + } auto y = tmp.channels_.find('y'); auto x = tmp.channels_.find('x'); assert(y != std::string::npos && x != std::string::npos); @@ -406,12 +418,12 @@ class DLDataset : public Dataset case Task::classification: label_ = new LabelClass(); tensors_shape_ = make_pair, vector>({ batch_size_, n_channels_, resize_dims_[0], resize_dims_[1] }, { batch_size_, vsize(classes_) }); - break; + break; case Task::segmentation: label_ = new LabelImage(); tensors_shape_ = make_pair, vector>({ batch_size_, n_channels_, resize_dims_[0], resize_dims_[1] }, { batch_size_, n_channels_gt_, resize_dims_[0], resize_dims_[1] }); - break; + break; } } diff --git a/modules/eddl/src/support_eddl.cpp b/modules/eddl/src/support_eddl.cpp index 02c7fa3b..0f43a2c1 100644 --- a/modules/eddl/src/support_eddl.cpp +++ b/modules/eddl/src/support_eddl.cpp @@ -417,6 +417,10 @@ void DLDataset::ThreadFunc(int thread_index) pair, unique_ptr> DLDataset::GetBatch() { + if (!active_) { + cout << ECVL_WARNING_MSG << "You're trying to get a batch without starting the threads - you'll wait forever!" << endl; + } + ++current_batch_[current_split_]; auto& s = split_[current_split_]; auto tensors_shape = tensors_shape_; diff --git a/modules/eddl/test/test_eddl.cpp b/modules/eddl/test/test_eddl.cpp index aba2c217..e9bde237 100644 --- a/modules/eddl/test/test_eddl.cpp +++ b/modules/eddl/test/test_eddl.cpp @@ -90,7 +90,8 @@ TEST(Augmentations, ConstructFromStreamAllParamsOk) ss = stringstream("divisor=255 divisor_gt=255"); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); - EXPECT_NO_THROW(p = make_unique(stringstream(""))); + ss = stringstream(""); + EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); ss = stringstream("new_min=0 new_max=1"); EXPECT_NO_THROW(p = make_unique(ss)); @@ -149,7 +150,8 @@ TEST(Augmentations, ConstructFromStreamWithoutOptionalParms) ss = stringstream("divisor=255"); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); - EXPECT_NO_THROW(p = make_unique(stringstream(""))); + ss = stringstream(""); + EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); ss = stringstream("new_min=0 new_max=1"); EXPECT_NO_THROW(p = make_unique(ss)); From e9f73610d2659755b697962ba16d95d89c970f61 Mon Sep 17 00:00:00 2001 From: Michele Cancilla Date: Thu, 27 May 2021 11:59:15 +0200 Subject: [PATCH 20/38] Update example_pipeline.cpp --- examples/example_pipeline.cpp | 37 +++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/examples/example_pipeline.cpp b/examples/example_pipeline.cpp index 3ad8fe47..d642f574 100644 --- a/examples/example_pipeline.cpp +++ b/examples/example_pipeline.cpp @@ -44,10 +44,10 @@ int main() DatasetAugmentations dataset_augmentations{ { training_augs, test_augs } }; - int epochs = 5; - int batch_size = 200; - int num_workers = 4; - int queue_ratio = 5; + constexpr int epochs = 5; + constexpr int batch_size = 200; + constexpr int num_workers = 4; + constexpr int queue_ratio = 5; cout << "Creating a DLDataset" << endl; // Initialize the DLDataset @@ -60,12 +60,14 @@ int main() auto num_batches_training = d.GetNumBatches(SplitType::training); auto num_batches_test = d.GetNumBatches(SplitType::test); - pair, unique_ptr> samples_and_labels; - for (int i = 0; i < epochs; ++i) { tm_epoch.reset(); tm_epoch.start(); - + /* Resize to batch_size if we have done a resize previously + if (d.split_[d.current_split_].last_batch_ != batch_size){ + net->resize(batch_size); + } + */ cout << "Starting training" << endl; d.SetSplit(SplitType::training); @@ -80,11 +82,15 @@ int main() cout << "Epoch " << i << "/" << epochs - 1 << " (batch " << j << "/" << num_batches_training - 1 << ") - "; cout << "|fifo| " << d.GetQueueSize() << " - "; - samples_and_labels = d.GetBatch(); + // pair, unique_ptr> samples_and_labels; + // samples_and_labels = d.GetBatch(); + // or... + auto [x, y] = d.GetBatch(); // Sleep in order to simulate EDDL train_batch cout << "sleeping..."; this_thread::sleep_for(chrono::milliseconds(500)); + // eddl::train_batch(net, { x.get() }, { y.get() }); tm.stop(); cout << "Elapsed time: " << tm.getTimeMilli() << endl; @@ -104,11 +110,22 @@ int main() cout << "Test: Epoch " << i << "/" << epochs - 1 << " (batch " << j << "/" << num_batches_test - 1 << ") - "; cout << "|fifo| " << d.GetQueueSize() << " - "; - samples_and_labels = d.GetBatch(); - + // pair, unique_ptr> samples_and_labels; + // samples_and_labels = d.GetBatch(); + // or... + auto [x, y] = d.GetBatch(); + + /* Resize net for last batch + if (auto x_batch = x->shape[0]; j == num_batches_test - 1 && x_batch != batch_size) { + // last mini-batch could have different size + net->resize(x_batch); + } + */ // Sleep in order to simulate EDDL evaluate_batch cout << "sleeping... - "; this_thread::sleep_for(chrono::milliseconds(500)); + // eddl::eval_batch(net, { x.get() }, { y.get() }); + tm.stop(); cout << "Elapsed time: " << tm.getTimeMilli() << endl; } From bcf98478804a8e1f4088f15ba91e41856738d286 Mon Sep 17 00:00:00 2001 From: Michele Cancilla Date: Thu, 27 May 2021 12:14:03 +0200 Subject: [PATCH 21/38] Remove gcc-6 compatibility --- .github/workflows/linux.yml | 4 ++-- README.md | 36 ++++++++++++++++++------------------ 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 0dade367..5b1435b7 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -14,8 +14,8 @@ jobs: strategy: matrix: cfg: - - { os: ubuntu-18.04, c-version: gcc-6, cxx-version: g++-6, generator: "Unix Makefiles" } - - { os: ubuntu-18.04, c-version: gcc-10, cxx-version: g++-10, generator: "Unix Makefiles" } + - { os: ubuntu-18.04, c-version: gcc-7, cxx-version: g++-7, generator: "Unix Makefiles" } + - { os: ubuntu-18.04, c-version: gcc-11, cxx-version: g++-11, generator: "Unix Makefiles" } - { os: ubuntu-18.04, c-version: clang-5.0, cxx-version: clang++-5.0, generator: "Unix Makefiles" } - { os: ubuntu-18.04, c-version: clang-10, cxx-version: clang++-10, generator: "Unix Makefiles" } steps: diff --git a/README.md b/README.md index 494ef84a..181b6457 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ The ECVL documentation is available [here](https://deephealthproject.github.io/e ## Requirements - CMake 3.13 or later -- C++ Compiler with C++17 support (e.g. GCC 6 or later, Clang 5.0 or later, Visual Studio 2017 or later) +- C++ Compiler with C++17 support (e.g. GCC 7 or later, Clang 5.0 or later, Visual Studio 2017 or later) - [OpenCV](https://opencv.org) 3.0 or later (modules required: `core`, `imgproc`, `imgcodecs`, `photo`, [`calib3d` since OpenCV 4.0 only. Note that `calib3d` depends on `features2d` and `flann`]) ### Optional @@ -193,16 +193,16 @@ Contributions of any kind are welcome! Windows Server 2016 VS 2017 15.9.28307 - 3.4.13 - 0.8.3 + 3.4.14 + 0.9.2b GitHub Actions Workflow status badge Windows Server 2019 - VS 2019 16.8.30804 - 3.4.13 - 0.8.3 + VS 2019 16.9.31229 + 3.4.14 + 0.9.2b GitHub Actions @@ -228,31 +228,31 @@ Contributions of any kind are welcome! Ubuntu 18.04.5 - GCC 6.5.0 - 3.4.13 - 0.8.3 + GCC 7.5.0 + 3.4.14 + 0.9.2b GitHub Actions Workflow status badge Ubuntu 18.04.5 - GCC 10.1.0 - 3.4.13 - 0.8.3 + GCC 11.1.0 + 3.4.14 + 0.9.2b GitHub Actions Ubuntu 18.04.5 Clang 5.0.1 - 3.4.13 - 0.8.3 + 3.4.14 + 0.9.2b GitHub Actions Ubuntu 18.04.5 Clang 10.0.0 - 3.4.13 - 0.8.3 + 3.4.14 + 0.9.2b GitHub Actions @@ -273,8 +273,8 @@ Contributions of any kind are welcome! macOS 10.15 Apple Clang 12.0.0 - 3.4.13 - 0.8.3 + 3.4.14 + 0.9.2b GitHub Actions Workflow status badge From c8736656f93b5678b8e0a47d04205c8a0b402aa3 Mon Sep 17 00:00:00 2001 From: Laura Canalini Date: Fri, 28 May 2021 12:40:02 +0200 Subject: [PATCH 22/38] Add constness to some functions --- examples/example_ecvl_eddl.cpp | 7 +++++-- modules/dataset/include/ecvl/dataset_parser.h | 4 ++-- modules/dataset/src/dataset_parser.cpp | 10 +++++----- modules/eddl/include/ecvl/support_eddl.h | 4 ++-- modules/eddl/src/support_eddl.cpp | 2 +- 5 files changed, 15 insertions(+), 12 deletions(-) diff --git a/examples/example_ecvl_eddl.cpp b/examples/example_ecvl_eddl.cpp index 33e9f4ae..f434b4b8 100644 --- a/examples/example_ecvl_eddl.cpp +++ b/examples/example_ecvl_eddl.cpp @@ -70,6 +70,7 @@ int main() cout << "Executing TensorToView" << endl; TensorToView(t, view); + // Create an augmentation sequence from stream stringstream ss( "SequentialAugmentationContainer\n" " AugRotate angle=[-5,5] center=(0,0) interp=\"linear\"\n" @@ -84,8 +85,9 @@ int main() auto newdeal_augs = AugmentationFactory::create(ss); newdeal_augs->Apply(tmp); + /*--------------------------------------------------------------------------------------------*/ + // Create the augmentations to be applied to the dataset images during training and test. - // nullptr is given as augmentation for validation because this split doesn't exist in the mnist dataset. auto training_augs = make_shared( AugRotate({ -5, 5 }), AugAdditiveLaplaceNoise({ 0, 0.2 * 255 }), @@ -102,7 +104,8 @@ int main() AugNormalize({ 0.449 }, { 0.226 }) // mean of imagenet stats ); - // DatasetAugmentations dataset_augmentations{ {training_augs, nullptr, test_augs } }; // OLD version: nullptr are no more required + // OLD version: now the number of augmentations must match the number of splits in the yml file + // DatasetAugmentations dataset_augmentations{ {training_augs, nullptr, test_augs } }; DatasetAugmentations dataset_augmentations{ {training_augs, test_augs } }; int batch_size = 64; diff --git a/modules/dataset/include/ecvl/dataset_parser.h b/modules/dataset/include/ecvl/dataset_parser.h index b1c9c361..8107b5e1 100644 --- a/modules/dataset/include/ecvl/dataset_parser.h +++ b/modules/dataset/include/ecvl/dataset_parser.h @@ -132,7 +132,7 @@ class Dataset void FindLabel(Sample& sample, const YAML::Node& n); protected: std::vector::iterator GetSplitIt(ecvl::any split); - int GetSplitIndex(ecvl::any split); + const int GetSplitIndex(ecvl::any split); public: std::string name_ = "DeepHealth dataset"; /**< @brief Name of the Dataset. */ std::string description_ = "This is the DeepHealth example dataset!"; /**< @brief Description of the Dataset. */ @@ -182,7 +182,7 @@ class Dataset @return vector containing all the samples locations. */ - std::vector> GetLocations(); + std::vector> GetLocations() const; // RegEx which matchs URLs static const std::regex url_regex_; diff --git a/modules/dataset/src/dataset_parser.cpp b/modules/dataset/src/dataset_parser.cpp index 03a86367..5cd6c730 100644 --- a/modules/dataset/src/dataset_parser.cpp +++ b/modules/dataset/src/dataset_parser.cpp @@ -291,7 +291,7 @@ Dataset::Dataset(const filesystem::path& filename, bool verify) task_ = classes_.empty() ? Task::segmentation : Task::classification; } -int Dataset::GetSplitIndex(any split) +const int Dataset::GetSplitIndex(any split) { if (split.type() == typeid(int)) { auto s = any_cast(split); @@ -299,7 +299,7 @@ int Dataset::GetSplitIndex(any split) return index; } else { - return static_cast(distance(split_.begin(), GetSplitIt(split))); + return static_cast(distance(split_.begin(), GetSplitIt(split))); } } @@ -308,7 +308,7 @@ vector::iterator Dataset::GetSplitIt(any split) if (split.type() == typeid(int)) { try { auto s = any_cast(split); - int index = s < 0 || s >= split_.size() ? current_split_ : s; + const int index = s < 0 || s >= split_.size() ? current_split_ : s; return split_.begin() + index; } catch (const out_of_range) { @@ -342,7 +342,7 @@ vector::iterator Dataset::GetSplitIt(any split) } } -std::vector& Dataset::GetSplit(const any& split) +vector& Dataset::GetSplit(const any& split) { auto it = GetSplitIt(split); return it->samples_indices_; @@ -354,7 +354,7 @@ void Dataset::SetSplit(const any& split) this->current_split_ = index; } -vector> Dataset::GetLocations() +vector> Dataset::GetLocations() const { const auto& size = vsize(samples_); vector> locations(size); diff --git a/modules/eddl/include/ecvl/support_eddl.h b/modules/eddl/include/ecvl/support_eddl.h index 66802726..3608974f 100644 --- a/modules/eddl/include/ecvl/support_eddl.h +++ b/modules/eddl/include/ecvl/support_eddl.h @@ -508,7 +508,7 @@ class DLDataset : public Dataset @return Size of the producers-consumer queue of the dataset. */ - auto GetQueueSize() { return queue_.Length(); }; + auto GetQueueSize() const { return queue_.Length(); }; /** @brief Set the current split and if the split doesn't have labels update the dataset tensors_shape_. @@ -528,7 +528,7 @@ class DLDataset : public Dataset @param[in] split index, name or ecvl::SplitType representing the split from which to get the number of batches. @return number of batches of the specified split. */ - int GetNumBatches(const ecvl::any& split = -1); + const int GetNumBatches(const ecvl::any& split = -1); }; /** @brief Make a grid of images from a EDDL Tensor. diff --git a/modules/eddl/src/support_eddl.cpp b/modules/eddl/src/support_eddl.cpp index 0f43a2c1..d4c2ef57 100644 --- a/modules/eddl/src/support_eddl.cpp +++ b/modules/eddl/src/support_eddl.cpp @@ -490,7 +490,7 @@ void DLDataset::Stop() } } -int DLDataset::GetNumBatches(const any& split) +const int DLDataset::GetNumBatches(const any& split) { auto it = GetSplitIt(split); return it->num_batches_; From 8bdabca12939ef57205cfb3592e5c055819153cb Mon Sep 17 00:00:00 2001 From: Michele Cancilla Date: Fri, 28 May 2021 15:59:42 +0200 Subject: [PATCH 23/38] Add CenterCrop which infers crop dimensions --- examples/example_ecvl_eddl.cpp | 1 + modules/eddl/include/ecvl/augmentations.h | 23 ++++++++++++++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/examples/example_ecvl_eddl.cpp b/examples/example_ecvl_eddl.cpp index f434b4b8..e2cda679 100644 --- a/examples/example_ecvl_eddl.cpp +++ b/examples/example_ecvl_eddl.cpp @@ -33,6 +33,7 @@ int main() // Create an augmentation sequence to be applied to the image auto augs = make_shared( + AugCenterCrop(), // Make image squared AugRotate({ -5, 5 }), AugMirror(.5), AugFlip(.5), diff --git a/modules/eddl/include/ecvl/augmentations.h b/modules/eddl/include/ecvl/augmentations.h index 7b9f0960..f23dceb2 100644 --- a/modules/eddl/include/ecvl/augmentations.h +++ b/modules/eddl/include/ecvl/augmentations.h @@ -1154,7 +1154,7 @@ class AugPepper : public Augmentation virtual void RealApply(ecvl::Image& img, const ecvl::Image& gt = Image()) override { const auto p = params_["p"].value_; - const auto seed = params_["seed"].value_; + const auto seed = params_["seed"].value_; const bool per_channel = params_["per_channel"].value_ <= per_channel_ ? true : false; Pepper(img, img, p, per_channel, static_cast(seed)); } @@ -1199,7 +1199,7 @@ class AugSaltAndPepper : public Augmentation virtual void RealApply(ecvl::Image& img, const ecvl::Image& gt = Image()) override { const auto p = params_["p"].value_; - const auto seed = params_["seed"].value_; + const auto seed = params_["seed"].value_; const bool per_channel = params_["per_channel"].value_ <= per_channel_ ? true : false; SaltAndPepper(img, img, p, per_channel, static_cast(seed)); } @@ -1302,20 +1302,33 @@ class AugNormalize : public Augmentation class AugCenterCrop : public Augmentation { std::vector size_; + bool infer_; virtual void RealApply(ecvl::Image& img, const ecvl::Image& gt = Image()) override { - CenterCrop(img, img, size_); + std::vector new_size = size_; + if (infer_) { + // TODO: 3D implementation + new_size = std::vector(2, std::min(img.Width(), img.Height())); + } + CenterCrop(img, img, new_size); if (!gt.IsEmpty()) { - CenterCrop(gt, const_cast(gt), size_); + CenterCrop(gt, const_cast(gt), new_size); } } public: + /** @brief AugCenterCrop constructor. Crop size is inferred from the minimum image dimension. + \f$ + crop\_size = min(Image_{cols}, Image_{rows}) + \f$ + */ + AugCenterCrop() : infer_{ true } {} + /** @brief AugCenterCrop constructor @param[in] size std::vector that specifies the new size of each dimension [w,h]. */ - AugCenterCrop(const std::vector& size) : size_{ size } {} + AugCenterCrop(const std::vector& size) : size_{ size }, infer_{ false } {} AugCenterCrop(std::istream& is) { From aadb549b4ac6f460823242f1c7b79432e6c3d80f Mon Sep 17 00:00:00 2001 From: Michele Cancilla Date: Fri, 28 May 2021 17:35:29 +0200 Subject: [PATCH 24/38] Edit CenterCrop tests --- modules/eddl/include/ecvl/augmentations.h | 10 +++++++--- modules/eddl/test/test_eddl.cpp | 3 +++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/modules/eddl/include/ecvl/augmentations.h b/modules/eddl/include/ecvl/augmentations.h index f23dceb2..68c7649d 100644 --- a/modules/eddl/include/ecvl/augmentations.h +++ b/modules/eddl/include/ecvl/augmentations.h @@ -1335,9 +1335,13 @@ class AugCenterCrop : public Augmentation auto m = param::read(is, "AugCenterCrop"); param p; - m.Get("size", param::type::vector, true, p); - for (const auto& x : p.vals_) { - size_.emplace_back(static_cast(x)); + if (m.Get("size", param::type::vector, false, p)) { + for (const auto& x : p.vals_) { + size_.emplace_back(static_cast(x)); + } + infer_ = false; + } else { + infer_ = true; } } }; diff --git a/modules/eddl/test/test_eddl.cpp b/modules/eddl/test/test_eddl.cpp index e9bde237..480d2e88 100644 --- a/modules/eddl/test/test_eddl.cpp +++ b/modules/eddl/test/test_eddl.cpp @@ -84,6 +84,9 @@ TEST(Augmentations, ConstructFromStreamAllParamsOk) ss = stringstream("mean=100 std=1"); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); + ss = stringstream(""); + EXPECT_NO_THROW(p = make_unique(ss)); + EXPECT_NO_THROW(p->Apply(img)); ss = stringstream("size=(100,100)"); EXPECT_NO_THROW(p = make_unique(ss)); EXPECT_NO_THROW(p->Apply(img)); From f9ca99d981ba7af83eef0ea880dcfad7b16f090b Mon Sep 17 00:00:00 2001 From: Laura Canalini Date: Fri, 28 May 2021 18:40:18 +0200 Subject: [PATCH 25/38] Add virtual specifier to ProduceImageLabel --- modules/eddl/include/ecvl/support_eddl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/eddl/include/ecvl/support_eddl.h b/modules/eddl/include/ecvl/support_eddl.h index 3608974f..31f6ebff 100644 --- a/modules/eddl/include/ecvl/support_eddl.h +++ b/modules/eddl/include/ecvl/support_eddl.h @@ -479,7 +479,7 @@ class DLDataset : public Dataset @anchor ProduceImageLabel */ - void ProduceImageLabel(Sample& elem); + virtual void ProduceImageLabel(Sample& elem); /** @brief Function called when the thread are spawned. From 99e786aa7cc1e09beeec34d175ebc9d4033a0606 Mon Sep 17 00:00:00 2001 From: Michele Cancilla Date: Mon, 31 May 2021 16:56:33 +0200 Subject: [PATCH 26/38] Add Clone method for augmentations --- modules/eddl/include/ecvl/augmentations.h | 66 +++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/modules/eddl/include/ecvl/augmentations.h b/modules/eddl/include/ecvl/augmentations.h index 68c7649d..ad64e9a9 100644 --- a/modules/eddl/include/ecvl/augmentations.h +++ b/modules/eddl/include/ecvl/augmentations.h @@ -219,11 +219,13 @@ class Augmentation } RealApply(img, gt); } + virtual std::shared_ptr Clone() const = 0; virtual ~Augmentation() = default; private: virtual void RealApply(ecvl::Image& img, const ecvl::Image& gt = Image()) = 0; }; +#define DEFINE_AUGMENTATION_CLONE(class_name) std::shared_ptr Clone() const override { return std::make_shared(*this); } struct AugmentationFactory { @@ -261,11 +263,20 @@ class SequentialAugmentationContainer : public Augmentation } std::vector> augs_; /**< @brief vector containing the Augmentation to be applied */ public: + DEFINE_AUGMENTATION_CLONE(SequentialAugmentationContainer) + template SequentialAugmentationContainer(Ts&&... t) : augs_({ std::make_shared(std::forward(t))... }) {} SequentialAugmentationContainer(std::vector> augs) : augs_(augs) {} + SequentialAugmentationContainer(const SequentialAugmentationContainer& other) : Augmentation(other) + { + for (const auto& a : other.augs_) { + augs_.emplace_back(a->Clone()); + } + } + SequentialAugmentationContainer(std::istream& is) { while (true) { @@ -306,6 +317,8 @@ class OneOfAugmentationContainer : public Augmentation std::vector> augs_; /**< @brief vector containing the Augmentation to be applied */ double p_; public: + DEFINE_AUGMENTATION_CLONE(OneOfAugmentationContainer) + template OneOfAugmentationContainer(double p, Ts&&... t) : p_(p), augs_({ std::make_shared(std::forward(t))... }) { @@ -317,6 +330,13 @@ class OneOfAugmentationContainer : public Augmentation params_["p"] = AugmentationParam(0, 1); } + OneOfAugmentationContainer(const OneOfAugmentationContainer& other) : Augmentation(other) + { + for (const auto& a : other.augs_) { + augs_.emplace_back(a->Clone()); + } + } + OneOfAugmentationContainer(std::istream& is) { param p; @@ -369,6 +389,8 @@ class AugRotate : public Augmentation } } public: + DEFINE_AUGMENTATION_CLONE(AugRotate) + /** @brief AugRotate constructor @param[in] angle Parameter which determines the range of degrees [min,max] to randomly select from. @@ -434,6 +456,8 @@ class AugResizeDim : public Augmentation } } public: + DEFINE_AUGMENTATION_CLONE(AugResizeDim) + /** @brief AugResizeDim constructor @param[in] dims std::vector that specifies the new size of each dimension. @@ -484,6 +508,8 @@ class AugResizeScale : public Augmentation } } public: + DEFINE_AUGMENTATION_CLONE(AugResizeScale) + /** @brief AugResizeScale constructor @param[in] scale std::vector that specifies the scale to apply to each dimension. @@ -534,6 +560,8 @@ class AugFlip : public Augmentation } } public: + DEFINE_AUGMENTATION_CLONE(AugFlip) + /** @brief AugFlip constructor @param[in] p Probability of each image to get flipped. @@ -573,6 +601,8 @@ class AugMirror : public Augmentation } } public: + DEFINE_AUGMENTATION_CLONE(AugMirror) + /** @brief AugMirror constructor @param[in] p Probability of each image to get mirrored. @@ -605,6 +635,8 @@ class AugGaussianBlur : public Augmentation GaussianBlur(img, img, sigma); } public: + DEFINE_AUGMENTATION_CLONE(AugGaussianBlur) + /** @brief AugGaussianBlur constructor @param[in] sigma Parameter which determines the range of sigma [min,max] to randomly select from. @@ -636,6 +668,8 @@ class AugAdditiveLaplaceNoise : public Augmentation AdditiveLaplaceNoise(img, img, std_dev); } public: + DEFINE_AUGMENTATION_CLONE(AugAdditiveLaplaceNoise) + /** @brief AugAdditiveLaplaceNoise constructor @param[in] std_dev Parameter which determines the range of values [min,max] to randomly select the standard deviation of the noise generating distribution. @@ -668,6 +702,8 @@ class AugAdditivePoissonNoise : public Augmentation AdditivePoissonNoise(img, img, lambda); } public: + DEFINE_AUGMENTATION_CLONE(AugAdditivePoissonNoise) + /** @brief AugAdditivePoissonNoise constructor @param[in] lambda Parameter which determines the range of values [min,max] to randomly select the lambda of the noise generating distribution. @@ -700,6 +736,8 @@ class AugGammaContrast : public Augmentation GammaContrast(img, img, gamma); } public: + DEFINE_AUGMENTATION_CLONE(AugGammaContrast) + /** @brief AugGammaContrast constructor @param[in] gamma Parameter which determines the range of values [min,max] to randomly select the exponent for the contrast adjustment. @@ -736,6 +774,8 @@ class AugCoarseDropout : public Augmentation CoarseDropout(img, img, p, drop_size, per_channel); } public: + DEFINE_AUGMENTATION_CLONE(AugCoarseDropout) + /** @brief AugCoarseDropout constructor @param[in] p Parameter which determines the range of values [min,max] to randomly select the probability of any rectangle being set to zero. @@ -785,6 +825,8 @@ class AugTranspose : public Augmentation } } public: + DEFINE_AUGMENTATION_CLONE(AugTranspose) + /** @brief AugTranspose constructor @param[in] p Probability of each image to get transposed. @@ -817,6 +859,8 @@ class AugBrightness : public Augmentation Add(img, beta, img); } public: + DEFINE_AUGMENTATION_CLONE(AugBrightness) + /** @brief AugBrightness constructor @param[in] beta Parameter which determines the range of values [min,max] to randomly select the value for the brightness adjustment. @@ -861,6 +905,8 @@ class AugGridDistortion : public Augmentation } } public: + DEFINE_AUGMENTATION_CLONE(AugGridDistortion) + /** @brief AugGridDistortion constructor @param[in] num_steps Parameter which determines the range of values [min,max] to randomly select the number of grid cells on each side. @@ -947,6 +993,8 @@ class AugElasticTransform : public Augmentation } } public: + DEFINE_AUGMENTATION_CLONE(AugElasticTransform) + /** @brief AugElasticTransform constructor @param[in] alpha Parameter which determines the range of values [min,max] to randomly select the scaling factor that controls the intensity of the deformation. @@ -1035,6 +1083,8 @@ class AugOpticalDistortion : public Augmentation } } public: + DEFINE_AUGMENTATION_CLONE(AugOpticalDistortion) + /** @brief AugOpticalDistortion constructor @param[in] distort_limit Parameter which determines the range of values [min,max] to randomly select the distortion steps. @@ -1114,6 +1164,8 @@ class AugSalt : public Augmentation Salt(img, img, p, per_channel, static_cast(seed)); } public: + DEFINE_AUGMENTATION_CLONE(AugSalt) + /** @brief AugSalt constructor @param[in] p Parameter which determines the range of values [min,max] to randomly select the probability of any pixel being set to white. @@ -1159,6 +1211,8 @@ class AugPepper : public Augmentation Pepper(img, img, p, per_channel, static_cast(seed)); } public: + DEFINE_AUGMENTATION_CLONE(AugPepper) + /** @brief AugPepper constructor @param[in] p Parameter which determines the range of values [min,max] to randomly select the probability of any pixel being set to black. @@ -1204,6 +1258,8 @@ class AugSaltAndPepper : public Augmentation SaltAndPepper(img, img, p, per_channel, static_cast(seed)); } public: + DEFINE_AUGMENTATION_CLONE(AugSaltAndPepper) + /** @brief AugSaltAndPepper constructor @param[in] p Parameter which determines the range of values [min,max] to randomly select the probability of any pixel being set to white or black. @@ -1255,6 +1311,8 @@ class AugNormalize : public Augmentation } } public: + DEFINE_AUGMENTATION_CLONE(AugNormalize) + /** @brief AugNormalize constructor @param[in] mean Mean to substract from all pixel. @@ -1317,6 +1375,8 @@ class AugCenterCrop : public Augmentation } } public: + DEFINE_AUGMENTATION_CLONE(AugCenterCrop) + /** @brief AugCenterCrop constructor. Crop size is inferred from the minimum image dimension. \f$ crop\_size = min(Image_{cols}, Image_{rows}) @@ -1367,6 +1427,8 @@ class AugToFloat32 : public Augmentation } } public: + DEFINE_AUGMENTATION_CLONE(AugToFloat32) + /** @brief AugToFloat32 constructor @param[in] divisor Value used to divide the img Image. @@ -1402,6 +1464,8 @@ class AugDivBy255 : public Augmentation } } public: + DEFINE_AUGMENTATION_CLONE(AugDivBy255) + /** @brief AugDivBy255 constructor */ AugDivBy255() {} AugDivBy255(std::istream& is) {} @@ -1420,6 +1484,8 @@ class AugScaleTo : public Augmentation ScaleTo(img, img, new_min_, new_max_); } public: + DEFINE_AUGMENTATION_CLONE(AugScaleTo) + /** @brief AugScaleTo constructor @param[in] new_min double which indicates the new minimum value. From f23a52266fa78ace277a83309c6654eca5039042 Mon Sep 17 00:00:00 2001 From: Michele Cancilla Date: Mon, 31 May 2021 16:57:50 +0200 Subject: [PATCH 27/38] Each thread now uses an internal copy of the DatasetAugmentations --- modules/eddl/include/ecvl/support_eddl.h | 11 +++++++++-- modules/eddl/src/support_eddl.cpp | 11 ++++++----- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/modules/eddl/include/ecvl/support_eddl.h b/modules/eddl/include/ecvl/support_eddl.h index 31f6ebff..4a252c89 100644 --- a/modules/eddl/include/ecvl/support_eddl.h +++ b/modules/eddl/include/ecvl/support_eddl.h @@ -94,7 +94,14 @@ class DatasetAugmentations public: DatasetAugmentations(const std::vector>& augs = { nullptr, nullptr, nullptr }) : augs_(augs) {} -// Getters: YAGNI + // This makes a deep copy of the Augmentations + DatasetAugmentations(const DatasetAugmentations& other) { + for (const auto& a : other.augs_) { + augs_.emplace_back(a->Clone()); + } + } + + // Getters: YAGNI bool Apply(const int split, Image& img, const Image& gt = Image()) { @@ -479,7 +486,7 @@ class DLDataset : public Dataset @anchor ProduceImageLabel */ - virtual void ProduceImageLabel(Sample& elem); + virtual void ProduceImageLabel(DatasetAugmentations& augs, Sample& elem); /** @brief Function called when the thread are spawned. diff --git a/modules/eddl/src/support_eddl.cpp b/modules/eddl/src/support_eddl.cpp index d4c2ef57..efa01a7d 100644 --- a/modules/eddl/src/support_eddl.cpp +++ b/modules/eddl/src/support_eddl.cpp @@ -342,7 +342,7 @@ Image MakeGrid(Tensor*& t, int cols, bool normalize) return image_t; } -void DLDataset::ProduceImageLabel(Sample& elem) +void DLDataset::ProduceImageLabel(DatasetAugmentations& augs, Sample& elem) { Image img = elem.LoadImage(ctype_, false); switch (task_) { @@ -355,7 +355,7 @@ void DLDataset::ProduceImageLabel(Sample& elem) label->label = elem.label_.value(); } // Apply chain of augmentations only to sample image - augs_.Apply(current_split_, img); + augs.Apply(current_split_, img); queue_.Push(img, label); } break; @@ -367,11 +367,11 @@ void DLDataset::ProduceImageLabel(Sample& elem) label = new LabelImage(); Image gt = elem.LoadImage(ctype_gt_, true); // Apply chain of augmentations to sample image and corresponding ground truth - augs_.Apply(current_split_, img, gt); + augs.Apply(current_split_, img, gt); label->gt = gt; } else { - augs_.Apply(current_split_, img); + augs.Apply(current_split_, img); } queue_.Push(img, label); } @@ -405,11 +405,12 @@ void DLDataset::InitTC(int split_index) void DLDataset::ThreadFunc(int thread_index) { auto& tc_of_current_split = splits_tc_[current_split_]; + DatasetAugmentations augs = augs_; while (tc_of_current_split[thread_index].counter_ < tc_of_current_split[thread_index].max_) { auto sample_index = split_[current_split_].samples_indices_[tc_of_current_split[thread_index].counter_]; Sample& elem = samples_[sample_index]; - ProduceImageLabel(elem); + ProduceImageLabel(augs, elem); ++tc_of_current_split[thread_index].counter_; } From 2eac3606f6bc020b6175fd9d2418949ad8174d12 Mon Sep 17 00:00:00 2001 From: Michele Cancilla Date: Mon, 31 May 2021 17:24:17 +0200 Subject: [PATCH 28/38] Suppress dcmtk/CMakeLists.txt warnings --- 3rdparty/dcmtk/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/3rdparty/dcmtk/CMakeLists.txt b/3rdparty/dcmtk/CMakeLists.txt index 782d207f..7154da1f 100644 --- a/3rdparty/dcmtk/CMakeLists.txt +++ b/3rdparty/dcmtk/CMakeLists.txt @@ -6,6 +6,9 @@ FetchContent_Declare( if(ECVL_WITH_DICOM) if(ECVL_BUILD_DEPS) + if(POLICY CMP0115) + set(CMAKE_POLICY_DEFAULT_CMP0115 OLD) + endif() FetchContent_GetProperties(dcmtk) if(NOT dcmtk_POPULATED) FetchContent_Populate(dcmtk) From 19bfb6aeed1957167b885e20494e318995b9f2a2 Mon Sep 17 00:00:00 2001 From: Michele Cancilla Date: Tue, 1 Jun 2021 12:41:19 +0200 Subject: [PATCH 29/38] Change queue to store tuple of --- examples/example_pipeline.cpp | 10 ++++----- modules/eddl/include/ecvl/support_eddl.h | 27 +++++++++++++----------- modules/eddl/src/support_eddl.cpp | 14 ++++++------ 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/examples/example_pipeline.cpp b/examples/example_pipeline.cpp index d642f574..7cdfebe7 100644 --- a/examples/example_pipeline.cpp +++ b/examples/example_pipeline.cpp @@ -51,7 +51,7 @@ int main() cout << "Creating a DLDataset" << endl; // Initialize the DLDataset - DLDataset d("../examples/data/mnist/mnist.yml", batch_size, dataset_augmentations, ColorType::GRAY, ColorType::none, num_workers, queue_ratio, { true, false }); + DLDataset d("../examples/data/mnist/mnist_reduced.yml", batch_size, dataset_augmentations, ColorType::GRAY, ColorType::none, num_workers, queue_ratio, { true, false }); //DLDataset d("D:/Data/isic_skin_lesion/isic_skin_lesion/isic_classification.yml", batch_size, dataset_augmentations, ColorType::RGB, ColorType::none, num_workers, queue_ratio); ofstream of; @@ -82,10 +82,10 @@ int main() cout << "Epoch " << i << "/" << epochs - 1 << " (batch " << j << "/" << num_batches_training - 1 << ") - "; cout << "|fifo| " << d.GetQueueSize() << " - "; - // pair, unique_ptr> samples_and_labels; + // tuple, unique_ptr, unique_ptr> samples_and_labels; // samples_and_labels = d.GetBatch(); // or... - auto [x, y] = d.GetBatch(); + auto [samples, x, y] = d.GetBatch(); // Sleep in order to simulate EDDL train_batch cout << "sleeping..."; @@ -110,10 +110,10 @@ int main() cout << "Test: Epoch " << i << "/" << epochs - 1 << " (batch " << j << "/" << num_batches_test - 1 << ") - "; cout << "|fifo| " << d.GetQueueSize() << " - "; - // pair, unique_ptr> samples_and_labels; + // tuple, unique_ptr, unique_ptr> samples_and_labels; // samples_and_labels = d.GetBatch(); // or... - auto [x, y] = d.GetBatch(); + auto [_, x, y] = d.GetBatch(); /* Resize net for last batch if (auto x_batch = x->shape[0]; j == num_batches_test - 1 && x_batch != batch_size) { diff --git a/modules/eddl/include/ecvl/support_eddl.h b/modules/eddl/include/ecvl/support_eddl.h index 4a252c89..2ec095fd 100644 --- a/modules/eddl/include/ecvl/support_eddl.h +++ b/modules/eddl/include/ecvl/support_eddl.h @@ -24,6 +24,7 @@ #include #include #include +#include namespace ecvl { @@ -95,7 +96,8 @@ class DatasetAugmentations DatasetAugmentations(const std::vector>& augs = { nullptr, nullptr, nullptr }) : augs_(augs) {} // This makes a deep copy of the Augmentations - DatasetAugmentations(const DatasetAugmentations& other) { + DatasetAugmentations(const DatasetAugmentations& other) + { for (const auto& a : other.augs_) { augs_.emplace_back(a->Clone()); } @@ -201,7 +203,7 @@ class ProducersConsumerQueue std::condition_variable cond_notempty_; /**< @brief Condition variable that wait if the queue is empty. */ std::condition_variable cond_notfull_; /**< @brief Condition variable that wait if the queue is full. */ std::mutex mutex_; /**< @brief Mutex to grant exclusive access to the queue. */ - std::queue> cpq_; /**< @brief Queue of samples, stored as pair of Image and Label pointer. */ + std::queue> cpq_; /**< @brief Queue of samples, stored as tuple of Sample, Image and Label pointer. */ unsigned max_size_; /**< @brief Maximum size of the queue. */ unsigned threshold_; /**< @brief Threshold from which restart to produce samples. If not specified, it's set to the half of maximum size. */ @@ -219,34 +221,35 @@ class ProducersConsumerQueue /** @brief Push a sample in the queue. - Take the lock of the queue and wait if the queue is full. Otherwise, push the pair Image, Label into the queue. + Take the lock of the queue and wait if the queue is full. Otherwise, push the tuple Sample, Image, Label into the queue. + + @param[in] sample Sample to push in queue. @param[in] image Image to push in the queue. @param[in] label Label to push in the queue. */ - void Push(const Image& image, Label* label) + void Push(const Sample& sample, const Image& image, Label* const label) { std::unique_lock lock(mutex_); cond_notfull_.wait(lock, [this]() { return !IsFull(); }); - cpq_.push(make_pair(image, label)); + cpq_.push(make_tuple(sample, image, label)); cond_notempty_.notify_one(); } /** @brief Pop a sample from the queue. - Take the lock of the queue and wait if the queue is empty. Otherwise, pop an Image and its Label from the queue. + Take the lock of the queue and wait if the queue is empty. Otherwise, pop a Sample, Image and its Label from the queue. If the queue size is still bigger than the half of the maximum size, don't notify the Push to avoid an always-full queue. + @param[in] sample Sample to pop in queue. @param[in] image Image to pop from the queue. @param[in] label Label to pop from the queue. */ - void Pop(Image& image, Label*& label) + void Pop(Sample& sample, Image& image, Label*& label) { std::unique_lock lock(mutex_); cond_notempty_.wait(lock, [this]() { return !IsEmpty(); }); - auto p = cpq_.front(); + std::tie(sample, image, label) = cpq_.front(); cpq_.pop(); - image = p.first; - label = p.second; if (Length() < threshold_) { cond_notfull_.notify_one(); } @@ -498,9 +501,9 @@ class DLDataset : public Dataset /** @brief Pop batch_size samples from the queue and copy them into EDDL tensors. - @return pair of EDDL Tensor, first with the image, second with the label. + @return tuples of Samples and EDDL Tensors, the first with the image and the second with the label. */ - pair, unique_ptr> GetBatch(); + std::tuple, unique_ptr, unique_ptr> GetBatch(); /** @brief Spawn num_workers thread. diff --git a/modules/eddl/src/support_eddl.cpp b/modules/eddl/src/support_eddl.cpp index efa01a7d..e9bbc629 100644 --- a/modules/eddl/src/support_eddl.cpp +++ b/modules/eddl/src/support_eddl.cpp @@ -356,7 +356,7 @@ void DLDataset::ProduceImageLabel(DatasetAugmentations& augs, Sample& elem) } // Apply chain of augmentations only to sample image augs.Apply(current_split_, img); - queue_.Push(img, label); + queue_.Push(elem, img, label); } break; case Task::segmentation: @@ -373,7 +373,7 @@ void DLDataset::ProduceImageLabel(DatasetAugmentations& augs, Sample& elem) else { augs.Apply(current_split_, img); } - queue_.Push(img, label); + queue_.Push(elem, img, label); } break; } @@ -416,7 +416,7 @@ void DLDataset::ThreadFunc(int thread_index) } } -pair, unique_ptr> DLDataset::GetBatch() +tuple, unique_ptr, unique_ptr> DLDataset::GetBatch() { if (!active_) { cout << ECVL_WARNING_MSG << "You're trying to get a batch without starting the threads - you'll wait forever!" << endl; @@ -437,9 +437,11 @@ pair, unique_ptr> DLDataset::GetBatch() unique_ptr x = make_unique(tensors_shape.first); unique_ptr y = make_unique(tensors_shape.second); + const int batch_len = x->shape[0]; Image img; - for (int i = 0; i < x->shape[0]; ++i) { - queue_.Pop(img, label_); // Consumer get samples from the queue + vector samples(batch_len); + for (int i = 0; i < batch_len; ++i) { + queue_.Pop(samples[i], img, label_); // Consumer get samples from the queue if (label_ != nullptr) { // Label nullptr means no label at all for this sample (example: possible for test split) // Copy label into tensor @@ -452,7 +454,7 @@ pair, unique_ptr> DLDataset::GetBatch() ImageToTensor(img, lhs, i); } - return make_pair(move(x), move(y)); + return make_tuple(move(samples), move(x), move(y)); } void DLDataset::Start(int split_index) From 93b8decd3e935966a37e87b033528474b2652206 Mon Sep 17 00:00:00 2001 From: Michele Cancilla Date: Tue, 1 Jun 2021 12:45:08 +0200 Subject: [PATCH 30/38] LoadExistingDataset test now uses a reduced version of MNIST dataset --- modules/dataset/test/test_dataset_parser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/dataset/test/test_dataset_parser.cpp b/modules/dataset/test/test_dataset_parser.cpp index 5fab2e2e..f40af7f6 100644 --- a/modules/dataset/test/test_dataset_parser.cpp +++ b/modules/dataset/test/test_dataset_parser.cpp @@ -26,11 +26,11 @@ using namespace ecvl; #ifdef ECVL_WITH_EXAMPLES TEST(DatasetParser, LoadExistingDataset) { - Dataset d(CMAKE_CURRENT_SOURCE_DIR "/examples/data/mnist/mnist.yml"); + Dataset d(CMAKE_CURRENT_SOURCE_DIR "/examples/data/mnist/mnist_reduced.yml"); EXPECT_EQ(d.name_, "MNIST"); EXPECT_EQ(d.classes_.size(), 10); EXPECT_THAT(d.classes_, testing::ElementsAre("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")); - EXPECT_EQ(d.samples_.size(), 70000); + EXPECT_EQ(d.samples_.size(), 1000); } #endif From b1ed10d7326ca87e675fd802910cd1b1fb22e4c5 Mon Sep 17 00:00:00 2001 From: Laura Canalini Date: Tue, 1 Jun 2021 16:00:45 +0200 Subject: [PATCH 31/38] Fix multi thread DicomRead --- modules/core/include/ecvl/core/support_dcmtk.h | 6 ++++++ modules/core/src/support_dcmtk.cpp | 17 ++++++++++++----- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/modules/core/include/ecvl/core/support_dcmtk.h b/modules/core/include/ecvl/core/support_dcmtk.h index 118fe2a6..f1b6b0e4 100644 --- a/modules/core/include/ecvl/core/support_dcmtk.h +++ b/modules/core/include/ecvl/core/support_dcmtk.h @@ -57,6 +57,12 @@ The function DicomWrite saves the input image into a specified file, with the DI */ extern bool DicomWrite(const ecvl::filesystem::path& filename, const Image& src); +struct InitDCMTK +{ + InitDCMTK(); + ~InitDCMTK(); +}; + /** @example example_nifti_dicom.cpp Nifti and Dicom support example. */ diff --git a/modules/core/src/support_dcmtk.cpp b/modules/core/src/support_dcmtk.cpp index 6e4fed7d..3e718de8 100644 --- a/modules/core/src/support_dcmtk.cpp +++ b/modules/core/src/support_dcmtk.cpp @@ -31,6 +31,16 @@ using namespace std; namespace ecvl { + +InitDCMTK::InitDCMTK() +{ + DJDecoderRegistration::registerCodecs(); +} +InitDCMTK::~InitDCMTK() +{ + DJDecoderRegistration::cleanup(); +} + bool OverlayMetaData::Query(const std::string& name, std::string& value) const { if (name == "overlay") { @@ -50,10 +60,9 @@ bool OverlayMetaData::Query(const std::string& name, std::string& value) const bool DicomRead(const std::string& filename, Image& dst) { + static InitDCMTK init_dcmtk; // Created only first time DicomRead is called bool return_value = true; - DJDecoderRegistration::registerCodecs(); - DicomImage* image = new DicomImage(filename.c_str()); if (image == NULL) { return_value = false; @@ -93,7 +102,7 @@ bool DicomRead(const std::string& filename, Image& dst) } else { for (int i = 0; i < planes; i++) { - memcpy(dst.data_ + x * y * DataTypeSize(dst_datatype) * i, reinterpret_cast(dipixel_data)[i], x * y * DataTypeSize(dst_datatype)); + memcpy(dst.data_ + x * y * DataTypeSize(dst_datatype) * i, reinterpret_cast(dipixel_data)[i], x * y * DataTypeSize(dst_datatype)); } } @@ -158,8 +167,6 @@ bool DicomRead(const std::string& filename, Image& dst) dst = Image(); } - DJDecoderRegistration::cleanup(); - return return_value; } From 4a2a99308b7dcdc3838782d3c5678589dbb8b853 Mon Sep 17 00:00:00 2001 From: Laura Canalini Date: Tue, 1 Jun 2021 16:02:39 +0200 Subject: [PATCH 32/38] Add queue Clear, fix tensor shape error when split has no label --- modules/dataset/include/ecvl/dataset_parser.h | 2 +- modules/eddl/include/ecvl/support_eddl.h | 13 +++++----- modules/eddl/src/support_eddl.cpp | 24 ++++++++++--------- 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/modules/dataset/include/ecvl/dataset_parser.h b/modules/dataset/include/ecvl/dataset_parser.h index 8107b5e1..32c8b623 100644 --- a/modules/dataset/include/ecvl/dataset_parser.h +++ b/modules/dataset/include/ecvl/dataset_parser.h @@ -165,7 +165,7 @@ class Dataset /** @brief Set the current split. @param[in] split index, name or ecvl::SplitType representing the split to set. */ - virtual void SetSplit(const ecvl::any& split); + void SetSplit(const ecvl::any& split); /** @brief Dump the Dataset into a YAML file following the DeepHealth Dataset Format. diff --git a/modules/eddl/include/ecvl/support_eddl.h b/modules/eddl/include/ecvl/support_eddl.h index 31f6ebff..68dd3f44 100644 --- a/modules/eddl/include/ecvl/support_eddl.h +++ b/modules/eddl/include/ecvl/support_eddl.h @@ -282,6 +282,12 @@ class ProducersConsumerQueue max_size_ = max_size; threshold_ = thresh != -1 ? thresh : max_size / 2; } + + void Clear() + { + std::unique_lock lock(mutex_); + cpq_ = {}; + } }; /** @brief Class representing the thread counters. @@ -321,6 +327,7 @@ class DLDataset : public Dataset std::vector> splits_tc_; /**< @brief Each dataset split has its own vector of threads, each of which has its counters: . */ std::vector producers_; /**< @brief Vector of threads representing the samples producers. */ bool active_ = false; /**< @brief Whether the threads have already been launched or not. */ + std::mutex active_mutex_; /**< @brief Mutex for active_ variable. */ static std::default_random_engine re_; /**< @brief Engine used for random number generation. */ Label* label_ = nullptr; /**< @brief Label pointer which will be specialized based on the dataset task. */ @@ -510,12 +517,6 @@ class DLDataset : public Dataset */ auto GetQueueSize() const { return queue_.Length(); }; - /** @brief Set the current split and if the split doesn't have labels update the dataset tensors_shape_. - - @param[in] split index, name or ecvl::SplitType representing the split to set. - */ - void SetSplit(const ecvl::any& split) override; - /** @brief Set the dataset augmentations. @param[in] da @ref DatasetAugmentations to set. diff --git a/modules/eddl/src/support_eddl.cpp b/modules/eddl/src/support_eddl.cpp index d4c2ef57..391f6691 100644 --- a/modules/eddl/src/support_eddl.cpp +++ b/modules/eddl/src/support_eddl.cpp @@ -412,6 +412,11 @@ void DLDataset::ThreadFunc(int thread_index) ProduceImageLabel(elem); ++tc_of_current_split[thread_index].counter_; + + std::unique_lock lock(active_mutex_); + if (!active_) { + return; + } } } @@ -433,6 +438,9 @@ pair, unique_ptr> DLDataset::GetBatch() } } + // If current split has no labels (e.g., test split could have no labels) set y as empty tensor + tensors_shape.second = (s.no_label_) ? vector{} : tensors_shape.second; + unique_ptr x = make_unique(tensors_shape.first); unique_ptr y = make_unique(tensors_shape.second); @@ -440,15 +448,16 @@ pair, unique_ptr> DLDataset::GetBatch() for (int i = 0; i < x->shape[0]; ++i) { queue_.Pop(img, label_); // Consumer get samples from the queue + // Copy sample image into tensor + auto lhs = x.get(); + ImageToTensor(img, lhs, i); + if (label_ != nullptr) { // Label nullptr means no label at all for this sample (example: possible for test split) // Copy label into tensor label_->ToTensorPlane(y.get(), i); delete label_; label_ = nullptr; } - //Copy sample image into tensor - auto lhs = x.get(); - ImageToTensor(img, lhs, i); } return make_pair(move(x), move(y)); @@ -467,6 +476,7 @@ void DLDataset::Start(int split_index) } producers_.clear(); + queue_.Clear(); if (num_workers_ > 0) { for (int i = 0; i < num_workers_; ++i) { @@ -496,14 +506,6 @@ const int DLDataset::GetNumBatches(const any& split) return it->num_batches_; } -void DLDataset::SetSplit(const any& split) -{ - Dataset::SetSplit(split); - if (split_[current_split_].no_label_) { - tensors_shape_.second = {}; - } -} - void DLDataset::SetAugmentations(const DatasetAugmentations& da) { augs_ = da; From c3259aeb4a0c542fd365eb8957fbdef02bfa61dd Mon Sep 17 00:00:00 2001 From: Laura Canalini Date: Tue, 1 Jun 2021 18:51:30 +0200 Subject: [PATCH 33/38] Fix bug in DatasetAugmentations copy constructor --- modules/eddl/include/ecvl/support_eddl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/eddl/include/ecvl/support_eddl.h b/modules/eddl/include/ecvl/support_eddl.h index de95cdb0..90b2c563 100644 --- a/modules/eddl/include/ecvl/support_eddl.h +++ b/modules/eddl/include/ecvl/support_eddl.h @@ -99,7 +99,7 @@ class DatasetAugmentations DatasetAugmentations(const DatasetAugmentations& other) { for (const auto& a : other.augs_) { - augs_.emplace_back(a->Clone()); + a ? augs_.emplace_back(a->Clone()) : augs_.emplace_back(nullptr); } } @@ -365,7 +365,7 @@ class DLDataset : public Dataset */ DLDataset(const filesystem::path& filename, const int batch_size, - DatasetAugmentations augs = DatasetAugmentations(), + const DatasetAugmentations& augs = DatasetAugmentations(), ColorType ctype = ColorType::RGB, ColorType ctype_gt = ColorType::GRAY, int num_workers = 1, @@ -375,7 +375,7 @@ class DLDataset : public Dataset Dataset{ filename, verify }, batch_size_{ batch_size }, - augs_(std::move(augs)), + augs_(augs), num_workers_{ num_workers }, ctype_{ ctype }, ctype_gt_{ ctype_gt }, From f1263dbc32731b4ac4dfdaa10c219366d80c67bc Mon Sep 17 00:00:00 2001 From: Michele Cancilla Date: Thu, 3 Jun 2021 11:52:22 +0200 Subject: [PATCH 34/38] Jenkins now builds in Release --- Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index b980df82..a6772a7d 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -15,7 +15,7 @@ pipeline { steps { timeout(60) { echo 'Building..' - cmakeBuild buildDir: 'build', cmakeArgs: '-DECVL_TESTS=ON -DECVL_BUILD_EDDL=ON -DECVL_DATASET=ON -DECVL_WITH_DICOM=ON -DECVL_WITH_OPENSLIDE=ON -DECVL_GPU=OFF', installation: 'InSearchPath', sourceDir: '.', cleanBuild: true, steps: [ + cmakeBuild buildDir: 'build', buildType: 'Release', cmakeArgs: '-DECVL_TESTS=ON -DECVL_BUILD_EDDL=ON -DECVL_DATASET=ON -DECVL_WITH_DICOM=ON -DECVL_WITH_OPENSLIDE=ON -DECVL_GPU=OFF', installation: 'InSearchPath', sourceDir: '.', cleanBuild: true, steps: [ [args: '--parallel 4', withCmake: true] ] } @@ -46,7 +46,7 @@ pipeline { timeout(60) { echo 'Building..' bat 'powershell ../../ecvl_dependencies/ecvl_dependencies.ps1' - cmakeBuild buildDir: 'build', cmakeArgs: '-DECVL_TESTS=ON -DECVL_BUILD_EDDL=ON -DECVL_DATASET=ON -DECVL_WITH_DICOM=ON -DECVL_WITH_OPENSLIDE=ON -DOPENSLIDE_LIBRARIES=C:/Library/openslide-win32-20171122/lib/libopenslide.lib', installation: 'InSearchPath', sourceDir: '.', cleanBuild: true, steps: [ + cmakeBuild buildDir: 'build', buildType: 'Release', cmakeArgs: '-DECVL_TESTS=ON -DECVL_BUILD_EDDL=ON -DECVL_DATASET=ON -DECVL_WITH_DICOM=ON -DECVL_WITH_OPENSLIDE=ON -DOPENSLIDE_LIBRARIES=C:/Library/openslide-win32-20171122/lib/libopenslide.lib', installation: 'InSearchPath', sourceDir: '.', cleanBuild: true, steps: [ [args: '--parallel 4', withCmake: true] ] } From dc60300f305414fea2cab2e6b747ca9879cc45f7 Mon Sep 17 00:00:00 2001 From: Michele Cancilla Date: Thu, 3 Jun 2021 13:07:11 +0200 Subject: [PATCH 35/38] Jenkins now runs ctest in Release --- Jenkinsfile | 4 ++-- modules/eddl/include/ecvl/support_eddl.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index a6772a7d..d3585eba 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -25,7 +25,7 @@ pipeline { steps { timeout(15) { echo 'Testing..' - ctest arguments: '-C Debug -VV', installation: 'InSearchPath', workingDir: 'build' + ctest arguments: '-C Release -VV', installation: 'InSearchPath', workingDir: 'build' } } } @@ -56,7 +56,7 @@ pipeline { steps { timeout(15) { echo 'Testing..' - bat 'cd build && ctest -C Debug -VV' + bat 'cd build && ctest -C Release -VV' } } } diff --git a/modules/eddl/include/ecvl/support_eddl.h b/modules/eddl/include/ecvl/support_eddl.h index 90b2c563..9a901c70 100644 --- a/modules/eddl/include/ecvl/support_eddl.h +++ b/modules/eddl/include/ecvl/support_eddl.h @@ -99,7 +99,7 @@ class DatasetAugmentations DatasetAugmentations(const DatasetAugmentations& other) { for (const auto& a : other.augs_) { - a ? augs_.emplace_back(a->Clone()) : augs_.emplace_back(nullptr); + augs_.emplace_back(a ? a->Clone() : nullptr); } } From d6a162def35b040d651a4f38eb9d9721378b18c8 Mon Sep 17 00:00:00 2001 From: Michele Cancilla Date: Thu, 3 Jun 2021 15:19:40 +0200 Subject: [PATCH 36/38] Add missing Release parameter --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index d3585eba..681bbaa2 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -47,7 +47,7 @@ pipeline { echo 'Building..' bat 'powershell ../../ecvl_dependencies/ecvl_dependencies.ps1' cmakeBuild buildDir: 'build', buildType: 'Release', cmakeArgs: '-DECVL_TESTS=ON -DECVL_BUILD_EDDL=ON -DECVL_DATASET=ON -DECVL_WITH_DICOM=ON -DECVL_WITH_OPENSLIDE=ON -DOPENSLIDE_LIBRARIES=C:/Library/openslide-win32-20171122/lib/libopenslide.lib', installation: 'InSearchPath', sourceDir: '.', cleanBuild: true, steps: [ - [args: '--parallel 4', withCmake: true] + [args: '--config Release --parallel 4', withCmake: true] ] } } From 45b949c3632bfcb7208e127d4e01a85c48e67ebc Mon Sep 17 00:00:00 2001 From: Michele Cancilla Date: Fri, 4 Jun 2021 10:37:22 +0200 Subject: [PATCH 37/38] Run codecov in Release --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 681bbaa2..fd17b2c8 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -64,7 +64,7 @@ pipeline { steps { timeout(15) { echo 'Calculating coverage..' - bat '"C:/Program Files/OpenCppCoverage/OpenCppCoverage.exe" --source %cd% --export_type=cobertura --excluded_sources=3rdparty -- "build/bin/Debug/ECVL_TESTS.exe"' + bat '"C:/Program Files/OpenCppCoverage/OpenCppCoverage.exe" --source %cd% --export_type=cobertura --excluded_sources=3rdparty -- "build/bin/Release/ECVL_TESTS.exe"' cobertura coberturaReportFile: 'ECVL_TESTSCoverage.xml' bat 'codecov -f ECVL_TESTSCoverage.xml -t 7635bd2e-51cf-461e-bb1b-fc7ba9fb26d1' } From 83d324a9a1280c790d08b932144317563466b059 Mon Sep 17 00:00:00 2001 From: Michele Cancilla Date: Wed, 9 Jun 2021 10:29:48 +0200 Subject: [PATCH 38/38] Remove default value of DatasetAugmentations constructor --- modules/eddl/include/ecvl/support_eddl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/eddl/include/ecvl/support_eddl.h b/modules/eddl/include/ecvl/support_eddl.h index 9a901c70..df70f36c 100644 --- a/modules/eddl/include/ecvl/support_eddl.h +++ b/modules/eddl/include/ecvl/support_eddl.h @@ -93,7 +93,7 @@ class DatasetAugmentations { std::vector> augs_; public: - DatasetAugmentations(const std::vector>& augs = { nullptr, nullptr, nullptr }) : augs_(augs) {} + DatasetAugmentations(const std::vector>& augs) : augs_(augs) {} // This makes a deep copy of the Augmentations DatasetAugmentations(const DatasetAugmentations& other) @@ -365,7 +365,7 @@ class DLDataset : public Dataset */ DLDataset(const filesystem::path& filename, const int batch_size, - const DatasetAugmentations& augs = DatasetAugmentations(), + const DatasetAugmentations& augs, ColorType ctype = ColorType::RGB, ColorType ctype_gt = ColorType::GRAY, int num_workers = 1,